[mips] Enhance command line option "-mno-ldc1-sdc1" to expand base+index double

precision loads and stores as well as reg+imm double precision loads and stores. Previously, expansion of loads and stores was done after register allocation, but now it takes place during legalization. As a result, users will see double precision stores and loads being emitted to spill and restore 64-bit FP registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190235 91177308-0d34-0410-b5e6-96231b3b80d8
2024-07-21 02:29:22 +00:00 · 2013-09-07 00:52:30 +00:00 · 2013-09-07 00:52:30 +00:00 · 3e6758541b
commit 3e6758541b
parent d65d2fde4e
7 changed files with 147 additions and 89 deletions
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@ -329,6 +329,9 @@ namespace llvm {
      SmallVector<ByValArgInfo, 2> ByValArgs;
    };
  protected:
+    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
    // Subtarget Info
    const MipsSubtarget *Subtarget;

@ -366,8 +369,6 @@ namespace llvm {
    SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
    SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
                                 bool IsSRA) const;
-    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
    SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;

--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@ -368,12 +368,8 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
 }

 let Predicates = [NotFP64bit, HasStdEnc] in {
-  let isPseudo = 1, isCodeGenOnly = 1 in {
-    def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>;
-    def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>;
-  }
-  def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad>, LW_FM<0x35>;
-  def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore>, LW_FM<0x3d>;
+  def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
+  def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
 }

 // Indexed loads and stores.
@ -595,7 +591,7 @@ let AddedComplexity = 40 in {
  }

  let Predicates = [NotFP64bit, HasStdEnc] in {
-    def : LoadRegImmPat<PseudoLDC1, f64, load>;
-    def : StoreRegImmPat<PseudoSDC1, f64>;
+    def : LoadRegImmPat<LDC1, f64, load>;
+    def : StoreRegImmPat<SDC1, f64>;
  }
 }
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@ -25,6 +25,11 @@ static cl::opt<bool>
 EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
                    cl::desc("MIPS: Enable tail calls."), cl::init(false));

+static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
+                                   cl::desc("Expand double precision loads and "
+                                            "stores to their single precision "
+                                            "counterparts"));
+
 MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
  : MipsTargetLowering(TM) {
  // Set up the register classes
@ -129,6 +134,11 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);

+  if (NoDPLoadStore) {
+    setOperationAction(ISD::LOAD, MVT::f64, Custom);
+    setOperationAction(ISD::STORE, MVT::f64, Custom);
+  }
+
  computeRegisterProperties();
 }

@ -168,6 +178,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
 SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
                                             SelectionDAG &DAG) const {
  switch(Op.getOpcode()) {
+  case ISD::LOAD:  return lowerLOAD(Op, DAG);
+  case ISD::STORE: return lowerSTORE(Op, DAG);
  case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
  case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
  case ISD::MULHS:     return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
@ -611,6 +623,68 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
                                  InternalLinkage, CLI, Callee, Chain);
 }

+SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  LoadSDNode &Nd = *cast<LoadSDNode>(Op);
+
+  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+    return MipsTargetLowering::lowerLOAD(Op, DAG);
+
+  // Replace a double precision load with two i32 loads and a buildpair64.
+  SDLoc DL(Op);
+  SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+  EVT PtrVT = Ptr.getValueType();
+
+  // i32 load from lower address.
+  SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr,
+                           MachinePointerInfo(), Nd.isVolatile(),
+                           Nd.isNonTemporal(), Nd.isInvariant(),
+                           Nd.getAlignment());
+
+  // i32 load from higher address.
+  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+  SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
+                           MachinePointerInfo(), Nd.isVolatile(),
+                           Nd.isNonTemporal(), Nd.isInvariant(),
+                           Nd.getAlignment());
+
+  if (!Subtarget->isLittle())
+    std::swap(Lo, Hi);
+
+  SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+  SDValue Ops[2] = {BP, Hi.getValue(1)};
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
+
+  if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+    return MipsTargetLowering::lowerSTORE(Op, DAG);
+
+  // Replace a double precision store with two extractelement64s and i32 stores.
+  SDLoc DL(Op);
+  SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+  EVT PtrVT = Ptr.getValueType();
+  SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                           Val, DAG.getConstant(0, MVT::i32));
+  SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                           Val, DAG.getConstant(1, MVT::i32));
+
+  if (!Subtarget->isLittle())
+    std::swap(Lo, Hi);
+
+  // i32 store to lower address.
+  Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
+                       Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
+                       Nd.getTBAAInfo());
+
+  // i32 store to higher address.
+  Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+  return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
+                      Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
+                      Nd.getTBAAInfo());
+}
+
 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
                                          bool HasLo, bool HasHi,
                                          SelectionDAG &DAG) const {
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@ -58,6 +58,9 @@ namespace llvm {
                bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
                CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;

+    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
    SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
                        SelectionDAG &DAG) const;

--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@ -24,11 +24,6 @@

 using namespace llvm;

-static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
-                                   cl::desc("Expand double precision loads and "
-                                            "stores to their single precision "
-                                            "counterparts."));
-
 MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
  : MipsInstrInfo(tm,
                  tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
@ -294,12 +289,6 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
  case Mips::ExtractElementF64_64:
    expandExtractElementF64(MBB, MI, true);
    break;
-  case Mips::PseudoLDC1:
-    expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1);
-    break;
-  case Mips::PseudoSDC1:
-    expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1);
-    break;
  case Mips::MIPSeh_return32:
  case Mips::MIPSeh_return64:
    expandEhReturn(MBB, MI);
@ -484,56 +473,6 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
      .addReg(HiReg);
 }

-/// Add 4 to the displacement of operand MO.
-static void fixDisp(MachineOperand &MO) {
-  switch (MO.getType()) {
-  default:
-    llvm_unreachable("Unhandled operand type.");
-  case MachineOperand::MO_Immediate:
-    MO.setImm(MO.getImm() + 4);
-    break;
-  case MachineOperand::MO_GlobalAddress:
-  case MachineOperand::MO_ConstantPoolIndex:
-  case MachineOperand::MO_BlockAddress:
-  case MachineOperand::MO_TargetIndex:
-  case MachineOperand::MO_ExternalSymbol:
-    MO.setOffset(MO.getOffset() + 4);
-    break;
-  }
-}
-
-void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned OpcD, unsigned OpcS) const {
-  // If NoDPLoadStore is false, just change the opcode.
-  if (!NoDPLoadStore) {
-    genInstrWithNewOpc(OpcD, I);
-    return;
-  }
-
-  // Expand a double precision FP load or store to two single precision
-  // instructions.
-
-  const TargetRegisterInfo &TRI = getRegisterInfo();
-  const MachineOperand &ValReg = I->getOperand(0);
-  unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_lo);
-  unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_hi);
-
-  if (!TM.getSubtarget<MipsSubtarget>().isLittle())
-    std::swap(LoReg, HiReg);
-
-  // Create an instruction which loads from or stores to the lower memory
-  // address.
-  MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I);
-  MIB->getOperand(0).setReg(LoReg);
-
-  // Create an instruction which loads from or stores to the higher memory
-  // address.
-  MIB = genInstrWithNewOpc(OpcS, I);
-  MIB->getOperand(0).setReg(HiReg);
-  fixDisp(MIB->getOperand(2));
-}
-
 void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator I) const {
  // This pseudo instruction is generated as part of the lowering of
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@ -104,9 +104,6 @@ private:
                               MachineBasicBlock::iterator I, bool FP64) const;
  void expandBuildPairF64(MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator I, bool FP64) const;
-  void expandDPLoadStore(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator I, unsigned OpcD,
-                         unsigned OpcS) const;
  void expandEhReturn(MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator I) const;
 };
--- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@ -1,22 +1,31 @@
-; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \
-; RUN: FileCheck %s -check-prefix=LE-PIC
+; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s -check-prefix=LE-PIC
 ; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \
 ; RUN: FileCheck %s -check-prefix=LE-STATIC
 ; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \
 ; RUN: FileCheck %s -check-prefix=BE-PIC
-; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \
+; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1

@g0 = common global double 0.000000e+00, align 8

 ; LE-PIC-LABEL: test_ldc1:
-; LE-PIC: lwc1 $f0, 0(${{[0-9]+}})
-; LE-PIC: lwc1 $f1, 4(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
 ; LE-STATIC-LABEL: test_ldc1:
-; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}})
-; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}})
+; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]])
+; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0)
+; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]])
+; LE-STATIC-DAG: mtc1 $[[R1]], $f0
+; LE-STATIC-DAG: mtc1 $[[R3]], $f1
 ; BE-PIC-LABEL: test_ldc1:
-; BE-PIC: lwc1 $f1, 0(${{[0-9]+}})
-; BE-PIC: lwc1 $f0, 4(${{[0-9]+}})
+; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; BE-PIC-DAG: mtc1 $[[R1]], $f0
+; BE-PIC-DAG: mtc1 $[[R0]], $f1
 ; CHECK-LDC1-SDC1-LABEL: test_ldc1:
 ; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}}

@ -27,14 +36,22 @@ entry:
 }

 ; LE-PIC-LABEL: test_sdc1:
-; LE-PIC: swc1 $f12, 0(${{[0-9]+}})
-; LE-PIC: swc1 $f13, 4(${{[0-9]+}})
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
 ; LE-STATIC-LABEL: test_sdc1:
-; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}})
-; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}})
+; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0)
+; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]])
+; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0)
+; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]])
 ; BE-PIC-LABEL: test_sdc1:
-; BE-PIC: swc1 $f13, 0(${{[0-9]+}})
-; BE-PIC: swc1 $f12, 4(${{[0-9]+}})
+; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}})
+; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}})
 ; CHECK-LDC1-SDC1-LABEL: test_sdc1:
 ; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}}

@ -43,3 +60,34 @@ entry:
  store double %a, double* @g0, align 8
  ret void
 }
+
+
+; CHECK-LE-PIC-DAG-LABEL: test_ldxc1:
+; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}})
+; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}})
+; LE-PIC-DAG: mtc1 $[[R0]], $f0
+; LE-PIC-DAG: mtc1 $[[R1]], $f1
+; CHECK-LDC1-SDC1-LABEL: test_ldxc1:
+; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}}
+
+define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+; LE-PIC-DAG-LABEL: test_sdxc1:
+; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12
+; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13
+; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}})
+; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}})
+; CHECK-LDC1-SDC1-LABEL: test_sdxc1:
+; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}}
+
+define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
+entry:
+  %arrayidx = getelementptr inbounds double* %a, i32 %i
+  store double %b, double* %arrayidx, align 8
+  ret void
+}