diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 79bde29cd85..7d26b9dd62d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -581,7 +581,6 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { -let canFoldAsLoad = 1 in def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; @@ -801,13 +800,14 @@ let isBranch = 1, isTerminator = 1 in { // // Load -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index d1831d1e488..b5956a32c58 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -296,7 +296,7 @@ let isBranch = 1, isTerminator = 1 in { // Load Store Instructions. // -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; @@ -332,13 +332,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1bb9bfd6f5e..43ecebe7738 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -471,7 +471,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // // Load -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension @@ -1183,7 +1183,7 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. -let isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index ba341f487e5..f44fe65a4fe 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -54,7 +54,7 @@ def vfp_f64imm : Operand, // Load / store Instructions. // -let canFoldAsLoad = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; diff --git a/test/CodeGen/ARM/remat-2.ll b/test/CodeGen/ARM/remat-2.ll new file mode 100644 index 00000000000..1a871d258e3 --- /dev/null +++ b/test/CodeGen/ARM/remat-2.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization" + +define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +entry: + br i1 undef, label %smvp.exit, label %bb.i3 + +bb.i3: ; preds = %bb.i3, %bb134 + br i1 undef, label %smvp.exit, label %bb.i3 + +smvp.exit: ; preds = %bb.i3 + %0 = fmul double undef, 2.400000e-03 ; [#uses=2] + br i1 undef, label %bb138.preheader, label %bb159 + +bb138.preheader: ; preds = %smvp.exit + br label %bb138 + +bb138: ; preds = %bb138, %bb138.preheader + br i1 undef, label %bb138, label %bb145.loopexit + +bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit + %1 = fmul double undef, -1.200000e-03 ; [#uses=1] + %2 = fadd double undef, %1 ; [#uses=1] + %3 = fmul double %2, undef ; [#uses=1] + %4 = fsub double 0.000000e+00, %3 ; [#uses=1] + br i1 %14, label %phi1.exit, label %bb.i35 + +bb.i35: ; preds = %bb142 + %5 = call arm_apcscc double @sin(double %15) nounwind readonly ; [#uses=1] + %6 = fmul double %5, 0x4031740AFA84AD8A ; [#uses=1] + %7 = fsub double 1.000000e+00, undef ; [#uses=1] + %8 = fdiv double %7, 6.000000e-01 ; [#uses=1] + br label %phi1.exit + +phi1.exit: ; preds = %bb.i35, %bb142 + %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=0] + %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=1] + %10 = fmul double undef, %9 ; [#uses=0] + br i1 %14, label %phi0.exit, label %bb.i + +bb.i: ; preds = %phi1.exit + unreachable + +phi0.exit: ; preds = %phi1.exit + %11 = fsub double %4, undef ; [#uses=1] + %12 = fadd double 0.000000e+00, %11 ; [#uses=1] + store double %12, double* undef, align 4 + br label %bb142 + +bb145.loopexit: ; preds = %bb138 + br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159 + +bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit + %13 = fmul double %0, 0x401921FB54442D18 ; [#uses=1] + %14 = fcmp ugt double %0, 6.000000e-01 ; [#uses=2] + %15 = fdiv double %13, 6.000000e-01 ; [#uses=1] + br label %bb142 + +bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134 + unreachable + +bb166: ; preds = %bb127 + unreachable +} + +declare arm_apcscc double @sin(double) nounwind readonly diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index eefbae53e72..607012799d9 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 6 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 7cbe26097b5..47d85b1aa0e 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,8 +22,7 @@ define arm_apcscc %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #+32] -; CHECK-NEXT : str.w r9, [sp, #+28] +; CHECK: ldr.w r9, [r7, #+28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br i1 false, label %bb, label %bb20 @@ -53,7 +52,6 @@ bb420: ; preds = %bb20, %bb20 ; CHECK: str r{{[0-7]}}, [sp] ; CHECK: str r{{[0-7]}}, [sp, #+4] ; CHECK: str r{{[0-7]}}, [sp, #+8] -; CHECK: ldr r{{[0-7]}}, [sp, #+28] ; CHECK: str r{{[0-7]}}, [sp, #+24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4