diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6c97261cf90..17ff7192b1e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1916,6 +1916,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; + const MCInstrDesc &DefMCID = DefMI->getDesc(); + if (DefMCID.hasOptionalDef()) { + unsigned NumOps = DefMCID.getNumOperands(); + const MachineOperand &MO = DefMI->getOperand(NumOps-1); + if (MO.getReg() == ARM::CPSR && !MO.isDead()) + // If DefMI defines CPSR and it is not dead, it's obviously not safe + // to delete DefMI. + return false; + } + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + if (UseMCID.hasOptionalDef()) { + unsigned NumOps = UseMCID.getNumOperands(); + if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + // If the instruction sets the flag, do not attempt this optimization + // since it may change the semantics of the code. + return false; + } + unsigned UseOpc = UseMI->getOpcode(); unsigned NewUseOpc = 0; uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll new file mode 100644 index 00000000000..0ff4f510eb3 --- /dev/null +++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s + +; ARM has a peephole optimization which looks for a def / use pair. The def +; produces a 32-bit immediate which is consumed by the use. It tries to +; fold the immediate by breaking it into two parts and fold them into the +; immmediate fields of two uses. e.g +; movw r2, #40885 +; movt r3, #46540 +; add r0, r0, r3 +; => +; add.w r0, r0, #3019898880 +; add.w r0, r0, #30146560 +; +; However, this transformation is incorrect if the user produces a flag. e.g. +; movw r2, #40885 +; movt r3, #46540 +; adds r0, r0, r3 +; => +; add.w r0, r0, #3019898880 +; adds.w r0, r0, #30146560 +; Note the adds.w may not set the carry flag even if the original sequence +; would. +; +; rdar://11116189 +define i64 @t(i64 %aInput) nounwind { +; CHECK: t: +; CHECK: movs [[REG:(r[0-9]+)]], #0 +; CHECK: movt [[REG]], #46540 +; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]] + %1 = mul i64 %aInput, 1000000 + %2 = add i64 %1, -7952618389194932224 + ret i64 %2 +}