diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index b38f64f0609..782d2037043 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -202,6 +202,21 @@ bool MipsDAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); + // If Parent is an unaligned f32 load or store, select a (base + index) + // floating point load/store instruction (luxc1 or suxc1). + const LSBaseSDNode* LS = 0; + + if (Parent && (LS = dyn_cast(Parent))) { + EVT VT = LS->getMemoryVT(); + + if (VT.getSizeInBits() / 8 > LS->getAlignment()) { + assert(TLI.allowsUnalignedMemoryAccesses(VT) && + "Unaligned loads/stores not supported for this type."); + if (VT == MVT::f32) + return false; + } + } + // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); @@ -259,11 +274,10 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { } } - // If an indexed load/store can be emitted, return false. - if (const LSBaseSDNode* LS = dyn_cast(Parent)) - if ((LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasMips32r2Or64()) - return false; + // If an indexed floating point load/store can be emitted, return false. + if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasMips32r2Or64()) + return false; } Base = Addr; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index f1b100d1bd5..725e4a61a96 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -268,7 +268,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i16: return true; case MVT::f32: - case MVT::f64: return Subtarget->hasMips32r2Or64(); default: return false; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index cb97a506f79..fe5eaeccb07 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -85,13 +85,13 @@ def fpimm0neg : PatLeaf<(fpimm), [{ // FP load. class FPLoad op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem; // FP store. class FPStore op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem; // FP indexed load. @@ -433,3 +433,16 @@ let Predicates = [IsFP64bit] in { def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } + +// Patterns for unaligned floating point loads and stores. +let Predicates = [HasMips32r2Or64, NotN64] in { + def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : Pat<(store_u FGR32:$src, CPURegs:$addr), + (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; +} + +let Predicates = [IsN64] in { + def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), + (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; +} diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 3292e867095..455530389eb 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -310,7 +310,7 @@ class FFMADDSUB funct, bits<3> fmt, dag outs, dag ins, string asmstr, } // FP indexed load/store instructions. -class FFMemIdx _funct, dag outs, dag ins, string asmstr, +class FFMemIdx funct, dag outs, dag ins, string asmstr, list pattern> : MipsInst { @@ -318,7 +318,6 @@ class FFMemIdx _funct, dag outs, dag ins, string asmstr, bits<5> index; bits<5> fs; bits<5> fd; - bits<6> funct; let Opcode = 0x13; diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll index 997aa9dc83c..08bd6e72ae7 100644 --- a/test/CodeGen/Mips/fp-indexed-ls.ll +++ b/test/CodeGen/Mips/fp-indexed-ls.ll @@ -1,10 +1,14 @@ ; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s %struct.S = type <{ [4 x float] }> +%struct.S2 = type <{ [4 x double] }> +%struct.S3 = type <{ i8, float }> @s = external global [4 x %struct.S] @gf = external global float @gd = external global double +@s2 = external global [4 x %struct.S2] +@s3 = external global %struct.S3 define float @foo0(float* nocapture %b, i32 %o) nounwind readonly { entry: @@ -57,3 +61,38 @@ entry: ret void } +define double @foo6(i32 %b, i32 %c) nounwind readonly { +entry: +; CHECK: foo6 +; CHECK-NOT: ldxc1 + %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c + %0 = load double* %arrayidx1, align 1 + ret double %0 +} + +define void @foo7(i32 %b, i32 %c) nounwind { +entry: +; CHECK: foo7 +; CHECK-NOT: sdxc1 + %0 = load double* @gd, align 8 + %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c + store double %0, double* %arrayidx1, align 1 + ret void +} + +define float @foo8() nounwind readonly { +entry: +; CHECK: foo8 +; CHECK: luxc1 + %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1 + ret float %0 +} + +define void @foo9(float %f) nounwind { +entry: +; CHECK: foo9 +; CHECK: suxc1 + store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1 + ret void +} + diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll index edeb285f92a..09745fb8f61 100644 --- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll +++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll @@ -1,10 +1,14 @@ ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s %struct.S = type <{ [4 x float] }> +%struct.S2 = type <{ [4 x double] }> +%struct.S3 = type <{ i8, float }> @s = external global [4 x %struct.S] @gf = external global float @gd = external global double +@s2 = external global [4 x %struct.S2] +@s3 = external global %struct.S3 define float @foo0(float* nocapture %b, i32 %o) nounwind readonly { entry: @@ -65,3 +69,42 @@ entry: ret void } +define double @foo6(i32 %b, i32 %c) nounwind readonly { +entry: +; CHECK: foo6 +; CHECK-NOT: ldxc1 + %idxprom = zext i32 %c to i64 + %idxprom1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom + %0 = load double* %arrayidx2, align 1 + ret double %0 +} + +define void @foo7(i32 %b, i32 %c) nounwind { +entry: +; CHECK: foo7 +; CHECK-NOT: sdxc1 + %0 = load double* @gd, align 8 + %idxprom = zext i32 %c to i64 + %idxprom1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom + store double %0, double* %arrayidx2, align 1 + ret void +} + +define float @foo8() nounwind readonly { +entry: +; CHECK: foo8 +; CHECK: luxc1 + %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1 + ret float %0 +} + +define void @foo9(float %f) nounwind { +entry: +; CHECK: foo9 +; CHECK: suxc1 + store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1 + ret void +} +