diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 281399e84ee..f60a15e9e56 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -91,7 +92,7 @@ private: SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); // getImm - Return a target constant with the specified value. inline SDValue getImm(const SDNode *Node, unsigned Imm) { @@ -197,7 +198,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); // if Address is FI, get the TargetFrameIndex. @@ -256,6 +257,12 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } } + + // If an indexed load/store can be emitted, return false. + if (const LSBaseSDNode* LS = dyn_cast(Parent)) + if ((LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasMips32r2Or64()) + return false; } Base = Addr; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 968a78b1f2a..e0944c01676 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -261,7 +261,18 @@ MipsTargetLowering(MipsTargetMachine &TM) bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + case MVT::i16: + return true; + case MVT::f32: + case MVT::f64: + return Subtarget->hasMips32r2Or64(); + default: + return false; + } } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 055540b6188..46fd278e8b4 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -94,6 +94,24 @@ class FPStore op, string opstr, RegisterClass RC, Operand MemOpnd>: !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)], IIStore>; +// FP indexed load. +class FPIdxLoad funct, string opstr, RegisterClass DRC, + RegisterClass PRC, PatFrag FOp>: + FFMemIdx { + let fs = 0; +} + +// FP indexed store. +class FPIdxStore funct, string opstr, RegisterClass DRC, + RegisterClass PRC, PatFrag FOp>: + FFMemIdx { + let fd = 0; +} + // Instructions that convert an FP value to 32-bit fixed point. multiclass FFR1_W_M funct, string opstr> { def _S : FFR1; @@ -240,6 +258,34 @@ let Predicates = [NotN64, NotMips64] in { def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>; } +// Indexed loads and stores. +let Predicates = [HasMips32r2Or64] in { + def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; + def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>; + def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; + def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>; +} + +let Predicates = [HasMips32r2, NotMips64] in { + def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>; + def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; +} + +let Predicates = [HasMips64, NotN64] in { + def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; + def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; +} + +// n64 +let Predicates = [IsN64] in { + def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; + def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; + def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; + def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; + def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>; + def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; +} + /// Floating-point Aritmetic defm FADD : FFR2P_M<0x00, "add", fadd, 1>; defm FDIV : FFR2P_M<0x03, "div", fdiv>; diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 6bf8668fca0..de744b7ef99 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -308,3 +308,23 @@ class FFMADDSUB funct, bits<3> fmt, dag outs, dag ins, string asmstr, let Inst{5-3} = funct; let Inst{2-0} = fmt; } + +// FP indexed load/store instructions. +class FFMemIdx _funct, dag outs, dag ins, string asmstr, + list pattern> : + MipsInst +{ + bits<5> base; + bits<5> index; + bits<5> fs; + bits<5> fd; + bits<6> funct; + + let Opcode = 0x13; + + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3d58030c024..7b3308b5825 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -128,6 +128,7 @@ def HasCondMov : Predicate<"Subtarget.hasCondMov()">; def HasMips32 : Predicate<"Subtarget.hasMips32()">; def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; def HasMips64 : Predicate<"Subtarget.hasMips64()">; +def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">; def NotMips64 : Predicate<"!Subtarget.hasMips64()">; def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; def IsN64 : Predicate<"Subtarget.isABI_N64()">; @@ -231,7 +232,7 @@ def immZExt5 : ImmLeaf; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern; +def addr : ComplexPattern; //===----------------------------------------------------------------------===// // Pattern fragment for load/store diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 0c6dc5264aa..b501ade338b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -112,6 +112,8 @@ public: bool hasMips64() const { return MipsArchVersion >= Mips64; } bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } + bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); } + bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll new file mode 100644 index 00000000000..997aa9dc83c --- /dev/null +++ b/test/CodeGen/Mips/fp-indexed-ls.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s + +%struct.S = type <{ [4 x float] }> + +@s = external global [4 x %struct.S] +@gf = external global float +@gd = external global double + +define float @foo0(float* nocapture %b, i32 %o) nounwind readonly { +entry: +; CHECK: lwxc1 + %arrayidx = getelementptr inbounds float* %b, i32 %o + %0 = load float* %arrayidx, align 4 + ret float %0 +} + +define double @foo1(double* nocapture %b, i32 %o) nounwind readonly { +entry: +; CHECK: ldxc1 + %arrayidx = getelementptr inbounds double* %b, i32 %o + %0 = load double* %arrayidx, align 8 + ret double %0 +} + +define float @foo2(i32 %b, i32 %c) nounwind readonly { +entry: +; CHECK: luxc1 + %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c + %0 = load float* %arrayidx1, align 1 + ret float %0 +} + +define void @foo3(float* nocapture %b, i32 %o) nounwind { +entry: +; CHECK: swxc1 + %0 = load float* @gf, align 4 + %arrayidx = getelementptr inbounds float* %b, i32 %o + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo4(double* nocapture %b, i32 %o) nounwind { +entry: +; CHECK: sdxc1 + %0 = load double* @gd, align 8 + %arrayidx = getelementptr inbounds double* %b, i32 %o + store double %0, double* %arrayidx, align 8 + ret void +} + +define void @foo5(i32 %b, i32 %c) nounwind { +entry: +; CHECK: suxc1 + %0 = load float* @gf, align 4 + %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c + store float %0, float* %arrayidx1, align 1 + ret void +} + diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll new file mode 100644 index 00000000000..edeb285f92a --- /dev/null +++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll @@ -0,0 +1,67 @@ +; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s + +%struct.S = type <{ [4 x float] }> + +@s = external global [4 x %struct.S] +@gf = external global float +@gd = external global double + +define float @foo0(float* nocapture %b, i32 %o) nounwind readonly { +entry: +; CHECK: lwxc1 + %idxprom = zext i32 %o to i64 + %arrayidx = getelementptr inbounds float* %b, i64 %idxprom + %0 = load float* %arrayidx, align 4 + ret float %0 +} + +define double @foo1(double* nocapture %b, i32 %o) nounwind readonly { +entry: +; CHECK: ldxc1 + %idxprom = zext i32 %o to i64 + %arrayidx = getelementptr inbounds double* %b, i64 %idxprom + %0 = load double* %arrayidx, align 8 + ret double %0 +} + +define float @foo2(i32 %b, i32 %c) nounwind readonly { +entry: +; CHECK: luxc1 + %idxprom = zext i32 %c to i64 + %idxprom1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom + %0 = load float* %arrayidx2, align 1 + ret float %0 +} + +define void @foo3(float* nocapture %b, i32 %o) nounwind { +entry: +; CHECK: swxc1 + %0 = load float* @gf, align 4 + %idxprom = zext i32 %o to i64 + %arrayidx = getelementptr inbounds float* %b, i64 %idxprom + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo4(double* nocapture %b, i32 %o) nounwind { +entry: +; CHECK: sdxc1 + %0 = load double* @gd, align 8 + %idxprom = zext i32 %o to i64 + %arrayidx = getelementptr inbounds double* %b, i64 %idxprom + store double %0, double* %arrayidx, align 8 + ret void +} + +define void @foo5(i32 %b, i32 %c) nounwind { +entry: +; CHECK: suxc1 + %0 = load float* @gf, align 4 + %idxprom = zext i32 %c to i64 + %idxprom1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom + store float %0, float* %arrayidx2, align 1 + ret void +} +