diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4cc5a6a6ec3..85aa162fb41 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -329,6 +329,9 @@ namespace llvm { SmallVector ByValArgs; }; protected: + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + // Subtarget Info const MipsSubtarget *Subtarget; @@ -366,8 +369,6 @@ namespace llvm { SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; - SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 536dff610c3..7aa080ff64a 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -368,12 +368,8 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } let Predicates = [NotFP64bit, HasStdEnc] in { - let isPseudo = 1, isCodeGenOnly = 1 in { - def PseudoLDC1 : LW_FT<"", AFGR64Opnd, IIFLoad, load>; - def PseudoSDC1 : SW_FT<"", AFGR64Opnd, IIFStore, store>; - } - def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore>, LW_FM<0x3d>; + def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } // Indexed loads and stores. @@ -595,7 +591,7 @@ let AddedComplexity = 40 in { } let Predicates = [NotFP64bit, HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; } } diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fae294ce40a..f32e146368c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -25,6 +25,11 @@ static cl::opt EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); +static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), + cl::desc("Expand double precision loads and " + "stores to their single precision " + "counterparts")); + MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { // Set up the register classes @@ -129,6 +134,11 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + if (NoDPLoadStore) { + setOperationAction(ISD::LOAD, MVT::f64, Custom); + setOperationAction(ISD::STORE, MVT::f64, Custom); + } + computeRegisterProperties(); } @@ -168,6 +178,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { SDValue MipsSETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch(Op.getOpcode()) { + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); @@ -611,6 +623,68 @@ getOpndList(SmallVectorImpl &Ops, InternalLinkage, CLI, Callee, Chain); } +SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerLOAD(Op, DAG); + + // Replace a double precision load with two i32 loads and a buildpair64. + SDLoc DL(Op); + SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + + // i32 load from lower address. + SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + Nd.getAlignment()); + + // i32 load from higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr, + MachinePointerInfo(), Nd.isVolatile(), + Nd.isNonTemporal(), Nd.isInvariant(), + Nd.getAlignment()); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); + SDValue Ops[2] = {BP, Hi.getValue(1)}; + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) + return MipsTargetLowering::lowerSTORE(Op, DAG); + + // Replace a double precision store with two extractelement64s and i32 stores. + SDLoc DL(Op); + SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); + EVT PtrVT = Ptr.getValueType(); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Val, DAG.getConstant(1, MVT::i32)); + + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); + + // i32 store to lower address. + Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), + Nd.getTBAAInfo()); + + // i32 store to higher address. + Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); + return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), + Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), + Nd.getTBAAInfo()); +} + SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index d1a18e1fa60..dde0c23c35d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -58,6 +58,9 @@ namespace llvm { bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 374837e37d2..9c31254cf47 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -24,11 +24,6 @@ using namespace llvm; -static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), - cl::desc("Expand double precision loads and " - "stores to their single precision " - "counterparts.")); - MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), @@ -294,12 +289,6 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, true); break; - case Mips::PseudoLDC1: - expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1); - break; - case Mips::PseudoSDC1: - expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1); - break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); @@ -484,56 +473,6 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, .addReg(HiReg); } -/// Add 4 to the displacement of operand MO. -static void fixDisp(MachineOperand &MO) { - switch (MO.getType()) { - default: - llvm_unreachable("Unhandled operand type."); - case MachineOperand::MO_Immediate: - MO.setImm(MO.getImm() + 4); - break; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_TargetIndex: - case MachineOperand::MO_ExternalSymbol: - MO.setOffset(MO.getOffset() + 4); - break; - } -} - -void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned OpcD, unsigned OpcS) const { - // If NoDPLoadStore is false, just change the opcode. - if (!NoDPLoadStore) { - genInstrWithNewOpc(OpcD, I); - return; - } - - // Expand a double precision FP load or store to two single precision - // instructions. - - const TargetRegisterInfo &TRI = getRegisterInfo(); - const MachineOperand &ValReg = I->getOperand(0); - unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_lo); - unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_hi); - - if (!TM.getSubtarget().isLittle()) - std::swap(LoReg, HiReg); - - // Create an instruction which loads from or stores to the lower memory - // address. - MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I); - MIB->getOperand(0).setReg(LoReg); - - // Create an instruction which loads from or stores to the higher memory - // address. - MIB = genInstrWithNewOpc(OpcS, I); - MIB->getOperand(0).setReg(HiReg); - fixDisp(MIB->getOperand(2)); -} - void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 6b4f89a732a..a2dfd9579d1 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -104,9 +104,6 @@ private: MachineBasicBlock::iterator I, bool FP64) const; void expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const; - void expandDPLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, unsigned OpcD, - unsigned OpcS) const; void expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; }; diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll index be9d0b6b68c..9533f2f421c 100644 --- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll +++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll @@ -1,22 +1,31 @@ -; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 < %s | \ -; RUN: FileCheck %s -check-prefix=LE-PIC +; RUN: llc -march=mipsel -relocation-model=pic -mno-ldc1-sdc1 -mcpu=mips32r2 \ +; RUN: < %s | FileCheck %s -check-prefix=LE-PIC ; RUN: llc -march=mipsel -relocation-model=static -mno-ldc1-sdc1 < %s | \ ; RUN: FileCheck %s -check-prefix=LE-STATIC ; RUN: llc -march=mips -relocation-model=pic -mno-ldc1-sdc1 < %s | \ ; RUN: FileCheck %s -check-prefix=BE-PIC -; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=CHECK-LDC1-SDC1 +; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LDC1-SDC1 @g0 = common global double 0.000000e+00, align 8 ; LE-PIC-LABEL: test_ldc1: -; LE-PIC: lwc1 $f0, 0(${{[0-9]+}}) -; LE-PIC: lwc1 $f1, 4(${{[0-9]+}}) +; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}}) +; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}}) +; LE-PIC-DAG: mtc1 $[[R0]], $f0 +; LE-PIC-DAG: mtc1 $[[R1]], $f1 ; LE-STATIC-LABEL: test_ldc1: -; LE-STATIC: lwc1 $f0, %lo(g0)(${{[0-9]+}}) -; LE-STATIC: lwc1 $f1, %lo(g0+4)(${{[0-9]+}}) +; LE-STATIC-DAG: lui $[[R0:[0-9]+]], %hi(g0) +; LE-STATIC-DAG: lw $[[R1:[0-9]+]], %lo(g0)($[[R0]]) +; LE-STATIC-DAG: addiu $[[R2:[0-9]+]], $[[R0]], %lo(g0) +; LE-STATIC-DAG: lw $[[R3:[0-9]+]], 4($[[R2]]) +; LE-STATIC-DAG: mtc1 $[[R1]], $f0 +; LE-STATIC-DAG: mtc1 $[[R3]], $f1 ; BE-PIC-LABEL: test_ldc1: -; BE-PIC: lwc1 $f1, 0(${{[0-9]+}}) -; BE-PIC: lwc1 $f0, 4(${{[0-9]+}}) +; BE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}}) +; BE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}}) +; BE-PIC-DAG: mtc1 $[[R1]], $f0 +; BE-PIC-DAG: mtc1 $[[R0]], $f1 ; CHECK-LDC1-SDC1-LABEL: test_ldc1: ; CHECK-LDC1-SDC1: ldc1 $f{{[0-9]+}} @@ -27,14 +36,22 @@ entry: } ; LE-PIC-LABEL: test_sdc1: -; LE-PIC: swc1 $f12, 0(${{[0-9]+}}) -; LE-PIC: swc1 $f13, 4(${{[0-9]+}}) +; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12 +; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13 +; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}}) +; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}}) ; LE-STATIC-LABEL: test_sdc1: -; LE-STATIC: swc1 $f12, %lo(g0)(${{[0-9]+}}) -; LE-STATIC: swc1 $f13, %lo(g0+4)(${{[0-9]+}}) +; LE-STATIC-DAG: mfc1 $[[R0:[0-9]+]], $f12 +; LE-STATIC-DAG: mfc1 $[[R1:[0-9]+]], $f13 +; LE-STATIC-DAG: lui $[[R2:[0-9]+]], %hi(g0) +; LE-STATIC-DAG: sw $[[R0]], %lo(g0)($[[R2]]) +; LE-STATIC-DAG: addiu $[[R3:[0-9]+]], $[[R2]], %lo(g0) +; LE-STATIC-DAG: sw $[[R1]], 4($[[R3]]) ; BE-PIC-LABEL: test_sdc1: -; BE-PIC: swc1 $f13, 0(${{[0-9]+}}) -; BE-PIC: swc1 $f12, 4(${{[0-9]+}}) +; BE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12 +; BE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13 +; BE-PIC-DAG: sw $[[R1]], 0(${{[0-9]+}}) +; BE-PIC-DAG: sw $[[R0]], 4(${{[0-9]+}}) ; CHECK-LDC1-SDC1-LABEL: test_sdc1: ; CHECK-LDC1-SDC1: sdc1 $f{{[0-9]+}} @@ -43,3 +60,34 @@ entry: store double %a, double* @g0, align 8 ret void } + + +; CHECK-LE-PIC-DAG-LABEL: test_ldxc1: +; LE-PIC-DAG: lw $[[R0:[0-9]+]], 0(${{[0-9]+}}) +; LE-PIC-DAG: lw $[[R1:[0-9]+]], 4(${{[0-9]+}}) +; LE-PIC-DAG: mtc1 $[[R0]], $f0 +; LE-PIC-DAG: mtc1 $[[R1]], $f1 +; CHECK-LDC1-SDC1-LABEL: test_ldxc1: +; CHECK-LDC1-SDC1: ldxc1 $f{{[0-9]+}} + +define double @test_ldxc1(double* nocapture readonly %a, i32 %i) { +entry: + %arrayidx = getelementptr inbounds double* %a, i32 %i + %0 = load double* %arrayidx, align 8 + ret double %0 +} + +; LE-PIC-DAG-LABEL: test_sdxc1: +; LE-PIC-DAG: mfc1 $[[R0:[0-9]+]], $f12 +; LE-PIC-DAG: mfc1 $[[R1:[0-9]+]], $f13 +; LE-PIC-DAG: sw $[[R0]], 0(${{[0-9]+}}) +; LE-PIC-DAG: sw $[[R1]], 4(${{[0-9]+}}) +; CHECK-LDC1-SDC1-LABEL: test_sdxc1: +; CHECK-LDC1-SDC1: sdxc1 $f{{[0-9]+}} + +define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) { +entry: + %arrayidx = getelementptr inbounds double* %a, i32 %i + store double %b, double* %arrayidx, align 8 + ret void +}