diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c9de2a7d670..2d9223fe7e4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -97,10 +97,6 @@ namespace { void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - void UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, unsigned WordOffset, - ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -311,101 +307,6 @@ static bool isi32Store(unsigned Opc) { return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); } -static unsigned getImmScale(unsigned Opc) { - switch (Opc) { - default: llvm_unreachable("Unhandled opcode!"); - case ARM::tLDRi: - case ARM::tSTRi: - return 1; - case ARM::tLDRHi: - case ARM::tSTRHi: - return 2; - case ARM::tLDRBi: - case ARM::tSTRBi: - return 4; - } -} - -/// Update future uses of the base register with the offset introduced -/// due to writeback. This function only works on Thumb1. -void -ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, - unsigned WordOffset, - ARMCC::CondCodes Pred, unsigned PredReg) { - assert(isThumb1 && "Can only update base register uses for Thumb1!"); - - // Start updating any instructions with immediate offsets. Insert a sub before - // the first non-updateable instruction (if any). - for (; MBBI != MBB.end(); ++MBBI) { - if (MBBI->readsRegister(Base)) { - unsigned Opc = MBBI->getOpcode(); - int Offset; - bool InsertSub = false; - - if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || - Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || - Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { - // Loads and stores with immediate offsets can be updated, but only if - // the new offset isn't negative. - // The MachineOperand containing the offset immediate is the last one - // before predicates. - MachineOperand &MO = - MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); - // The offsets are scaled by 1, 2 or 4 depending on the Opcode - Offset = MO.getImm() - WordOffset * getImmScale(Opc); - if (Offset >= 0) - MO.setImm(Offset); - else - InsertSub = true; - - } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { - // SUB/ADD using this register. Merge it with the update. - // If the merged offset is too large, insert a new sub instead. - MachineOperand &MO = - MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); - Offset = (Opc == ARM::tSUBi8) ? - MO.getImm() + WordOffset * 4 : - MO.getImm() - WordOffset * 4 ; - if (TL->isLegalAddImmediate(Offset)) { - MO.setImm(Offset); - // The base register has now been reset, so exit early. - return; - } else { - InsertSub = true; - } - - } else { - // Can't update the instruction. - InsertSub = true; - } - - if (InsertSub) { - // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) - .addImm(Pred).addReg(PredReg); - return; - } - } - - if (MBBI->killsRegister(Base)) - // Register got killed. Stop updating. - return; - } - - // The end of the block was reached. This means register liveness escapes the - // block, and it's necessary to insert a sub before the last instruction. - if (MBB.succ_size() > 0) - // But only insert the SUB if there is actually a successor block. - // FIXME: Check more carefully if register is live at this point, e.g. by - // also examining the successor block's register liveness information. - AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) - .addImm(Pred).addReg(PredReg); -} - /// MergeOps - Create and insert a LDM or STM with Base as base register and /// registers in Regs as the register operands that would be loaded / stored. /// It returns true if the transformation is done. @@ -512,6 +413,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, break; } + // If the merged instruction has writeback and the base register is not killed + // it's not safe to do the merge on Thumb1. This is because resetting the base + // register writeback by inserting a SUBS sets the condition flags. + // FIXME: Try something clever here to see if resetting the base register can + // be avoided, e.g. by updating a later ADD/SUB of the base register with the + // writeback. + if (isThumb1 && Writeback && !BaseKill) return false; + MachineInstrBuilder MIB; if (Writeback) { @@ -524,12 +433,6 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Thumb1: we might need to set base writeback when building the MI. MIB.addReg(Base, getDefRegState(true)) .addReg(Base, getKillRegState(BaseKill)); - - // The base isn't dead after a merged instruction with writeback. Update - // future uses of the base with the added offset (if possible), or reset - // the base register as necessary. - if (!BaseKill) - UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); @@ -1740,12 +1643,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - // FIXME: Temporarily disabling for Thumb-1 due to miscompiles - if (isThumb1) { - delete RS; - return false; - } - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { diff --git a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll index ae663697ebe..18e17766313 100644 --- a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll +++ b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll @@ -1,12 +1,11 @@ -; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s -; XFAIL: * +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s define void @foo(i32* %A) #0 { entry: ; CHECK-LABEL: foo: ; CHECK: push {r7, lr} -; CHECK: ldm [[REG0:r[0-9]]]!, -; CHECK-NEXT: subs [[REG0]] +; CHECK: ldr +; CHECK-NEXT: ldr ; CHECK-NEXT: bl %0 = load i32* %A, align 4 %arrayidx1 = getelementptr inbounds i32* %A, i32 1 diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index 6c6de55347a..706a21350d0 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/Thumb/ldm-merge-call.ll b/test/CodeGen/Thumb/ldm-merge-call.ll new file mode 100644 index 00000000000..febc96b07ce --- /dev/null +++ b/test/CodeGen/Thumb/ldm-merge-call.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m--linux-gnueabi" + +; Function Attrs: nounwind optsize +define void @foo(i32* nocapture readonly %A) #0 { +entry: +; CHECK-LABEL: foo: +; CHECK: ldm r[[BASE:[0-9]]]!, +; CHECK-NEXT: mov r[[BASE]], + %0 = load i32* %A, align 4 + %arrayidx1 = getelementptr inbounds i32* %A, i32 1 + %1 = load i32* %arrayidx1, align 4 + %call = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2 + %call2 = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2 + ret void +} + +; Function Attrs: optsize +declare i32 @bar(i32, i32, i32, i32) #1 + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind optsize } diff --git a/test/CodeGen/Thumb/ldm-merge-struct.ll b/test/CodeGen/Thumb/ldm-merge-struct.ll new file mode 100644 index 00000000000..2f732e00bf7 --- /dev/null +++ b/test/CodeGen/Thumb/ldm-merge-struct.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none--eabi" + +%struct.S = type { i32, i32 } + +@s = common global %struct.S zeroinitializer, align 4 + +define i32 @f() { +entry: +; CHECK-LABEL: f: +; CHECK: ldm r[[BASE:[0-9]]], +; CHECK-NEXT-NOT: subs r[[BASE]] + %0 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4 + %1 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4 + %cmp = icmp sgt i32 %0, %1 + %2 = sub i32 0, %1 + %cond.p = select i1 %cmp, i32 %1, i32 %2 + %cond = add i32 %cond.p, %0 + ret i32 %cond +} diff --git a/test/CodeGen/Thumb/stm-merge.ll b/test/CodeGen/Thumb/stm-merge.ll new file mode 100644 index 00000000000..76e71f4da65 --- /dev/null +++ b/test/CodeGen/Thumb/stm-merge.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m--linux-gnueabi" + +@d = internal unnamed_addr global i32 0, align 4 +@c = internal global i32* null, align 4 +@e = internal unnamed_addr global i32* null, align 4 + +; Function Attrs: nounwind optsize +define void @fn1() #0 { +entry: +; CHECK-LABEL: fn1: +; CHECK: stm r[[BASE:[0-9]]]!, {{.*}} +; CHECK-NOT: {{.*}} r[[BASE]] +; CHECK: ldr r[[BASE]], {{.*}} + %g = alloca i32, align 4 + %h = alloca i32, align 4 + store i32 1, i32* %g, align 4 + store i32 0, i32* %h, align 4 + %.pr = load i32* @d, align 4 + %cmp11 = icmp slt i32 %.pr, 1 + br i1 %cmp11, label %for.inc.lr.ph, label %for.body5 + +for.inc.lr.ph: ; preds = %entry + store i32 1, i32* @d, align 4 + br label %for.body5 + +for.body5: ; preds = %entry, %for.inc.lr.ph, %for.body5 + %f.010 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.inc.lr.ph ], [ 0, %entry ] + store volatile i32* %g, i32** @c, align 4 + %inc7 = add nsw i32 %f.010, 1 + %exitcond = icmp eq i32 %inc7, 2 + br i1 %exitcond, label %for.end8, label %for.body5 + +for.end8: ; preds = %for.body5 + store i32* %h, i32** @e, align 4 + ret void +} + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll index 95f3edc9c4c..dd98e6f300e 100644 --- a/test/CodeGen/Thumb/thumb-ldm.ll +++ b/test/CodeGen/Thumb/thumb-ldm.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s -; XFAIL: * @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll index dedc82bf68c..e1e38739f1d 100644 --- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll +++ b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll @@ -1,5 +1,4 @@ ; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -; XFAIL: * @d = external global [64 x i32] @s = external global [64 x i32] @@ -7,29 +6,19 @@ ; Function Attrs: nounwind define void @t1() #0 { entry: -; CHECK: ldr [[REG0:r[0-9]]], -; CHECK: ldm [[REG0]]!, -; CHECK: ldr [[REG1:r[0-9]]], -; CHECK: stm [[REG1]]!, -; CHECK: subs [[REG0]], #32 -; CHECK-NEXT: ldrb -; CHECK: subs [[REG1]], #32 -; CHECK-NEXT: strb - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 33, i32 4, i1 false) +; CHECK-LABEL: t1 +; CHECK-NOT: ldm +; CHECK-NOT: stm + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false) ret void } ; Function Attrs: nounwind define void @t2() #0 { entry: -; CHECK: ldr [[REG0:r[0-9]]], -; CHECK: ldm [[REG0]]!, -; CHECK: ldr [[REG1:r[0-9]]], -; CHECK: stm [[REG1]]!, -; CHECK: ldrh -; CHECK: ldrb -; CHECK: strb -; CHECK: strh +; CHECK-LABEL: t2: +; CHECK-NOT: ldm +; CHECK-NOT: stm tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false) ret void }