diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a250a88c994..1f9b4156b9c 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2362,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // TODO: For now, just add the min and max offset, because it usually isn't // worthwhile looking at everything inbetween. - SmallVector Worklist; + SmallVector Worklist; Worklist.push_back(LU.MinOffset); if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); @@ -2376,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { - F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + std::swap(F.BaseRegs[i], F.BaseRegs.back()); + F.BaseRegs.pop_back(); + } else + F.BaseRegs[i] = NewG; (void)InsertFormula(LU, LUIdx, F); } diff --git a/test/CodeGen/X86/lsr-i386.ll b/test/CodeGen/X86/lsr-i386.ll new file mode 100644 index 00000000000..02baf2072e3 --- /dev/null +++ b/test/CodeGen/X86/lsr-i386.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=x86 < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" +; PR7651 + +; CHECK: align +; CHECK: align +; CHECK: align +; CHECK: movl $0, (%e +; CHECK-NEXT: addl $4, %e +; CHECK-NEXT: decl %e +; CHECK-NEXT: jne + +%struct.anon = type { [72 x i32], i32 } + +@mp2grad_ = external global %struct.anon + +define void @chomp2g_setup_(i32 %n, i32 %m) nounwind { +entry: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; [#uses=2] + %tmp21 = add i32 %indvar11, 1 ; [#uses=1] + %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1) + %tmp15 = mul i32 %n, %t ; [#uses=1] + %tmp16 = add i32 %tmp21, %tmp15 ; [#uses=1] + %tmp17 = shl i32 %tmp16, 3 ; [#uses=1] + %tmp18 = add i32 %tmp17, -8 ; [#uses=1] + br label %bb2 + +bb2: ; preds = %bb2, %bb2.preheader + %indvar = phi i32 [ 0, %bb1 ], [ %indvar.next, %bb2 ] ; [#uses=2] + %tmp19 = add i32 %tmp18, %indvar ; [#uses=1] + %scevgep = getelementptr %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; [#uses=1] + store i32 0, i32* %scevgep + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + %c = icmp ne i32 %indvar.next, %m + br i1 %c, label %bb2, label %bb6 + +bb6: ; preds = %bb2, %bb1 + %indvar.next12 = add i32 %indvar11, 1 ; [#uses=1] + br label %bb1 +}