diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 65acc1d9257..a0d07726fe8 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -143,11 +143,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // FIXME: Use common api to estimate size. for (BasicBlock::const_iterator OI = OrigHeader->begin(), OE = OrigHeader->end(); OI != OE; ++OI) { - if (isa(OI)) - continue; // PHI nodes don't count. - if (isa(OI)) - continue; // Debug intrinsics don't count as size. - ++Size; + if (isa(OI)) + continue; // PHI nodes don't count. + if (isa(OI)) + continue; // Debug intrinsics don't count as size. + ++Size; } if (Size > MAX_HEADER_SIZE) @@ -187,13 +187,30 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { for (; PHINode *PN = dyn_cast(I); ++I) ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader)); - // For the rest of the instructions, create a clone in the OldPreHeader. + // For the rest of the instructions, either hoist to the OrigPreheader if + // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator(); - for (; I != E; ++I) { - Instruction *C = I->clone(); - C->setName(I->getName()); + while (I != E) { + Instruction *Inst = I++; + + // If the instruction's operands are invariant and it doesn't read or write + // memory, then it is safe to hoist. Doing this doesn't change the order of + // execution in the preheader, but does prevent the instruction from + // executing in each iteration of the loop. This means it is safe to hoist + // something that might trap, but isn't safe to hoist something that reads + // memory (without proving that the loop doesn't write). + if (L->hasLoopInvariantOperands(Inst) && + !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && + !isa(Inst)) { + Inst->moveBefore(LoopEntryBranch); + continue; + } + + // Otherwise, create a duplicate of the instruction. + Instruction *C = Inst->clone(); + C->setName(Inst->getName()); C->insertBefore(LoopEntryBranch); - ValueMap[I] = C; + ValueMap[Inst] = C; } // Along with all the other instructions, we just cloned OrigHeader's diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll new file mode 100644 index 00000000000..b7bcb21d56f --- /dev/null +++ b/test/Transforms/LoopRotate/basic.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -loop-rotate %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; PR5319 - The "arrayidx" gep should be hoisted, not duplicated. We should +; end up with one phi node. +define void @test1() nounwind ssp { +; CHECK: @test1 +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +; CHECK: for.body: +; CHECK-NEXT: phi i32 [ 0 +; CHECK-NEXT: store i32 0 + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} + +declare void @g(i32*) +