diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index ea0cfe2b940..1cd42ca2ae0 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -362,6 +362,15 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + // A sub-register definition can only be rematerialized if the instruction + // doesn't read the other parts of the register. Otherwise it is really a + // read-modify-write operation on the full virtual register which cannot be + // moved safely. + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(Reg)) + return false; + // A load from a fixed stack slot can be rematerialized. This may be // redundant with subsequent checks, but it's target-independent, // simple, and a common case. diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll new file mode 100644 index 00000000000..cf45c034514 --- /dev/null +++ b/test/CodeGen/ARM/subreg-remat.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source | FileCheck %s +target triple = "thumbv7-apple-ios" +; +; +; The vector %v2 is built like this: +; +; %vreg6:ssub_1 = VMOVSR %vreg0, pred:14, pred:%noreg, %vreg6; DPR_VFP2:%vreg6 GPR:%vreg0 +; %vreg6:ssub_0 = VLDRS , 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6 +; +; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized +; since it implicitly reads the ssub_1 sub-register. +; +; CHECK: f1 +; CHECK: vmov s1, r0 +; CHECK: vldr.32 s0, LCPI +; The vector must be spilled: +; CHECK: vstr.64 d0, +; CHECK: asm clobber d0 +; And reloaded after the asm: +; CHECK: vldr.64 [[D16:d[0-9]+]], +; CHECK: vstr.64 [[D16]], [r1] +define void @f1(float %x, <2 x float>* %p) { + %v1 = insertelement <2 x float> undef, float %x, i32 1 + %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0 + %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind + store <2 x float> %v2, <2 x float>* %p, align 8 + ret void +}