From aa7426070da3b74d60186763bb7c53af3e095427 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 9 Jul 2010 18:49:45 +0000 Subject: [PATCH] Fix MachineLICM to actually visit inner loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108001 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineLICM.cpp | 14 ++++-- test/CodeGen/X86/licm-nested.ll | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/licm-nested.ll diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 9b24a9a2391..709b2d1587a 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -199,9 +199,14 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most /// loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) if (L->getLoopPredecessor()) return false; + // None of them did, so this is the outermost with a unique predecessor. return true; } @@ -224,14 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis(); AA = &getAnalysis(); - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ - CurLoop = *I; + SmallVector Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); CurPreheader = 0; // If this is done before regalloc, only visit outer-most preheader-sporting // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); continue; + } if (!PreRegAlloc) HoistRegionPostRA(); diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll new file mode 100644 index 00000000000..332fe748f81 --- /dev/null +++ b/test/CodeGen/X86/licm-nested.ll @@ -0,0 +1,89 @@ +; RUN: llc -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2 + +; MachineLICM should be able to hoist the symbolic addresses out of +; the inner loops. + +@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3] +@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp = icmp eq i32 %argc, 2 ; [#uses=1] + br i1 %cmp, label %while.cond.preheader, label %bb.nph53 + +while.cond.preheader: ; preds = %entry + %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; [#uses=1] + %tmp2 = load i8** %arrayidx ; [#uses=1] + %call = tail call i32 @atoi(i8* %tmp2) nounwind ; [#uses=2] + %tobool51 = icmp eq i32 %call, 0 ; [#uses=1] + br i1 %tobool51, label %while.end, label %bb.nph53 + +while.cond.loopexit: ; preds = %for.inc35 + %indvar.next77 = add i32 %indvar76, 1 ; [#uses=2] + %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; [#uses=1] + br i1 %exitcond78, label %while.end, label %bb.nph + +bb.nph53: ; preds = %entry, %while.cond.preheader + %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; [#uses=1] + br label %bb.nph + +bb.nph: ; preds = %while.cond.loopexit, %bb.nph53 + %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; [#uses=1] + br label %for.body + +for.body: ; preds = %for.body, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; [#uses=2] + %tmp = add i64 %indvar, 2 ; [#uses=1] + %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; [#uses=1] + store i8 1, i8* %arrayidx10 + %indvar.next = add i64 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 8191 ; [#uses=1] + br i1 %exitcond, label %for.body15, label %for.body + +for.body15: ; preds = %for.body, %for.inc35 + %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; [#uses=4] + %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; [#uses=2] + %tmp68 = add i64 %indvar57, 2 ; [#uses=2] + %tmp70 = mul i64 %indvar57, 3 ; [#uses=1] + %tmp71 = add i64 %tmp70, 6 ; [#uses=1] + %tmp73 = shl i64 %indvar57, 1 ; [#uses=1] + %add = add i64 %tmp73, 4 ; [#uses=2] + %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; [#uses=1] + %tmp18 = load i8* %arrayidx17 ; [#uses=1] + %tobool19 = icmp eq i8 %tmp18, 0 ; [#uses=1] + br i1 %tobool19, label %for.inc35, label %if.then + +if.then: ; preds = %for.body15 + %cmp2443 = icmp slt i64 %add, 8193 ; [#uses=1] + br i1 %cmp2443, label %for.body25, label %for.end32 + +for.body25: ; preds = %if.then, %for.body25 + %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; [#uses=2] + %tmp60 = mul i64 %tmp68, %indvar55 ; [#uses=2] + %tmp75 = add i64 %add, %tmp60 ; [#uses=1] + %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; [#uses=1] + store i8 0, i8* %arrayidx27 + %add31 = add i64 %tmp71, %tmp60 ; [#uses=1] + %cmp24 = icmp slt i64 %add31, 8193 ; [#uses=1] + %indvar.next56 = add i64 %indvar55, 1 ; [#uses=1] + br i1 %cmp24, label %for.body25, label %for.end32 + +for.end32: ; preds = %for.body25, %if.then + %inc34 = add nsw i32 %count.248, 1 ; [#uses=1] + br label %for.inc35 + +for.inc35: ; preds = %for.body15, %for.end32 + %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; [#uses=2] + %indvar.next58 = add i64 %indvar57, 1 ; [#uses=2] + %exitcond67 = icmp eq i64 %indvar.next58, 8191 ; [#uses=1] + br i1 %exitcond67, label %while.cond.loopexit, label %for.body15 + +while.end: ; preds = %while.cond.loopexit, %while.cond.preheader + %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; [#uses=1] + %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; [#uses=0] + ret i32 0 +} + +declare i32 @atoi(i8* nocapture) nounwind readonly + +declare i32 @printf(i8* nocapture, ...) nounwind