StructurizeCFG: Use a reverse post-order traversal

We were previously doing a post-order traversal and operating on the
list in reverse, however this would occasionaly cause backedges for
loops to be visited before some of the other blocks in the loop.

We know use a reverse post-order traversal, which avoids this issue.

The reverse post-order traversal is not completely ideal, so we need
to manually fixup the list to ensure that inner loop backedges are
visited before outer loop backedges.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228186 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2015-02-04 20:49:44 +00:00
parent 1d75b286e6
commit 7c038bc15f
4 changed files with 253 additions and 13 deletions

View File

@ -10,12 +10,14 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@ -281,11 +283,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
scc_iterator<Region *> I = scc_begin(ParentRegion);
for (Order.clear(); !I.isAtEnd(); ++I) {
const std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
RNVector TempOrder;
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
TempOrder.append(RPOT.begin(), RPOT.end());
std::map<Loop*, unsigned> LoopBlocks;
// The reverse post-order traversal of the list gives us an ordering close
// to what we want. The only problem with it is that sometimes backedges
// for outer loops will be visited before backedges for inner loops.
for (RegionNode *RN : TempOrder) {
BasicBlock *BB = RN->getEntry();
Loop *Loop = LI->getLoopFor(BB);
if (!LoopBlocks.count(Loop)) {
LoopBlocks[Loop] = 1;
continue;
}
LoopBlocks[Loop]++;
}
unsigned CurrentLoopDepth = 0;
Loop *CurrentLoop = nullptr;
BBSet TempVisited;
for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
BasicBlock *BB = (*I)->getEntry();
unsigned LoopDepth = LI->getLoopDepth(BB);
if (std::find(Order.begin(), Order.end(), *I) != Order.end())
continue;
if (LoopDepth < CurrentLoopDepth) {
// Make sure we have visited all blocks in this loop before moving back to
// the outer loop.
RNVector::iterator LoopI = I;
while(LoopBlocks[CurrentLoop]) {
LoopI++;
BasicBlock *LoopBB = (*LoopI)->getEntry();
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
LoopBlocks[CurrentLoop]--;
Order.push_back(*LoopI);
}
}
}
CurrentLoop = LI->getLoopFor(BB);
if (CurrentLoop) {
LoopBlocks[CurrentLoop]--;
}
CurrentLoopDepth = LoopDepth;
Order.push_back(*I);
}
// This pass originally used a post-order traversal and then operated on
// the list in reverse. Now that we are using a reverse post-order traversal
// rather than re-working the whole pass to operate on the list in order,
// we just reverse the list and continue to operate on it in reverse.
std::reverse(Order.begin(), Order.end());
}
/// \brief Determine the end of the loops
@ -441,6 +497,10 @@ void StructurizeCFG::collectInfos() {
for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
OI != OE; ++OI) {
DEBUG(dbgs() << "Visiting: " <<
((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
(*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
// Analyze all the conditions leading to a node
gatherPredicates(*OI);

View File

@ -0,0 +1,79 @@
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
define void @main(float addrspace(1)* %out) {
; CHECK: main_body:
; CHECK: br label %LOOP.outer
main_body:
br label %LOOP.outer
; CHECK: LOOP.outer:
; CHECK: br label %LOOP
LOOP.outer: ; preds = %ENDIF28, %main_body
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
br label %LOOP
; CHECK: LOOP:
; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
LOOP: ; preds = %IF29, %LOOP.outer
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
%tmp20 = add i32 %temp4.0, 1
%tmp22 = icmp sgt i32 %tmp20, 3
br i1 %tmp22, label %ENDLOOP, label %ENDIF
; CHECK: Flow3
; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
; CHECK: ENDLOOP:
; CHECK: ret void
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
%tmp23 = icmp eq i32 %tmp20, 3
%.45 = select i1 %tmp23, float 0.000000e+00, float 1.000000e+00
store float %.45, float addrspace(1)* %out
ret void
; CHECK: ENDIF:
; CHECK: br i1 %tmp31, label %IF29, label %Flow1
ENDIF: ; preds = %LOOP
%tmp31 = icmp sgt i32 %tmp20, 1
br i1 %tmp31, label %IF29, label %ENDIF28
; CHECK: Flow:
; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
; CHECK: IF29:
; CHECK: br label %Flow1
IF29: ; preds = %ENDIF
%tmp32 = icmp sgt i32 %tmp20, 2
br i1 %tmp32, label %ENDLOOP, label %LOOP
; CHECK: Flow1:
; CHECK: br label %Flow
; CHECK: Flow2:
; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
; CHECK: ENDIF28:
; CHECK: br label %Flow3
ENDIF28: ; preds = %ENDIF
%tmp35 = fadd float %temp8.0.ph, 1.0
%tmp36 = icmp sgt i32 %tmp20, 2
br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!1, !1, i64 0, i32 1}
!1 = !{!"const", null}

View File

@ -11,28 +11,29 @@ bb:
bb3: ; preds = %bb7, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
%tmp4 = fcmp ult float %arg1, 3.500000e+00
; CHECK: br i1 %tmp4, label %bb7, label %Flow
; CHECK: %0 = xor i1 %tmp4, true
; CHECK: br i1 %0, label %bb5, label %Flow
br i1 %tmp4, label %bb7, label %bb5
; CHECK: Flow:
; CHECK: br i1 %2, label %Flow1, label %bb3
; CHECK: Flow1:
; CHECK: br i1 %3, label %bb5, label %bb10
; CHECK: bb5:
bb5: ; preds = %bb3
%tmp6 = fcmp olt float 0.000000e+00, %arg2
; CHECK: br label %bb10
; CHECK: br label %Flow
br i1 %tmp6, label %bb10, label %bb7
; CHECK: Flow:
; CHECK: br i1 %3, label %bb7, label %Flow1
; CHECK: bb7
bb7: ; preds = %bb5, %bb3
%tmp8 = add nuw nsw i64 %tmp, 1
%tmp9 = icmp slt i64 %tmp8, 5
; CHECK: br label %Flow
; CHECK: br label %Flow1
br i1 %tmp9, label %bb3, label %bb10
; CHECK: Flow1:
; CHECK: br i1 %7, label %bb10, label %bb3
; CHECK: bb10
bb10: ; preds = %bb7, %bb5
%tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]

View File

@ -0,0 +1,100 @@
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
; The structurize cfg pass used to do a post-order traversal to generate a list
; of ; basic blocks and then operate on the list in reverse. This led to bugs,
; because sometimes successors would be visited before their predecessors.
; The fix for this was to do a reverse post-order traversal which is what the
; algorithm requires.
; Function Attrs: nounwind
define void @test(float* nocapture %out, i32 %K1, float* nocapture readonly %nr) {
; CHECK: entry:
; CHECK: br label %for.body
entry:
br label %for.body
; CHECK: for.body:
; CHECK: br i1 %{{[0-9]+}}, label %lor.lhs.false, label %Flow
for.body: ; preds = %for.body.backedge, %entry
%indvars.iv = phi i64 [ %indvars.iv.be, %for.body.backedge ], [ 1, %entry ]
%best_val.027 = phi float [ %best_val.027.be, %for.body.backedge ], [ 5.000000e+01, %entry ]
%prev_start.026 = phi i32 [ %tmp26, %for.body.backedge ], [ 0, %entry ]
%best_count.025 = phi i32 [ %best_count.025.be, %for.body.backedge ], [ 0, %entry ]
%tmp0 = trunc i64 %indvars.iv to i32
%cmp1 = icmp eq i32 %tmp0, %K1
br i1 %cmp1, label %if.then, label %lor.lhs.false
; CHECK: lor.lhs.false:
; CHECK: br label %Flow
lor.lhs.false: ; preds = %for.body
%arrayidx = getelementptr inbounds float* %nr, i64 %indvars.iv
%tmp1 = load float* %arrayidx, align 4
%tmp2 = add nsw i64 %indvars.iv, -1
%arrayidx2 = getelementptr inbounds float* %nr, i64 %tmp2
%tmp3 = load float* %arrayidx2, align 4
%cmp3 = fcmp une float %tmp1, %tmp3
br i1 %cmp3, label %if.then, label %for.body.1
; CHECK: Flow:
; CHECK: br i1 %{{[0-9]+}}, label %if.then, label %Flow1
; CHECK: if.then:
; CHECK: br label %Flow1
if.then: ; preds = %lor.lhs.false, %for.body
%sub4 = sub nsw i32 %tmp0, %prev_start.026
%tmp4 = add nsw i64 %indvars.iv, -1
%arrayidx8 = getelementptr inbounds float* %nr, i64 %tmp4
%tmp5 = load float* %arrayidx8, align 4
br i1 %cmp1, label %for.end, label %for.body.1
; CHECK: for.end:
; CHECK: ret void
for.end: ; preds = %for.body.1, %if.then
%best_val.0.lcssa = phi float [ %best_val.233, %for.body.1 ], [ %tmp5, %if.then ]
store float %best_val.0.lcssa, float* %out, align 4
ret void
; CHECK: Flow1
; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
; CHECK: for.body.1:
; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
for.body.1: ; preds = %if.then, %lor.lhs.false
%best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
%best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
%indvars.iv.next.454 = add nsw i64 %indvars.iv, 5
%tmp22 = trunc i64 %indvars.iv.next.454 to i32
%cmp1.5 = icmp eq i32 %tmp22, %K1
br i1 %cmp1.5, label %for.end, label %for.body.6
; CHECK: Flow2:
; CHECK: br i1 %{{[0-9]+}}, label %for.end, label %for.body
; CHECK: for.body.6:
; CHECK: br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
for.body.6: ; preds = %for.body.1
%indvars.iv.next.559 = add nsw i64 %indvars.iv, 6
%tmp26 = trunc i64 %indvars.iv.next.559 to i32
%sub4.6 = sub nsw i32 %tmp26, %tmp22
%cmp5.6 = icmp slt i32 %best_count.231, %sub4.6
br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
; CHECK: if.then6.6
; CHECK: br label %for.body.backedge
if.then6.6: ; preds = %for.body.6
%arrayidx8.6 = getelementptr inbounds float* %nr, i64 %indvars.iv.next.454
%tmp29 = load float* %arrayidx8.6, align 4
br label %for.body.backedge
; CHECK: Flow3:
; CHECK: br label %Flow2
; CHECK: for.body.backedge:
; CHECK: br label %Flow3
for.body.backedge: ; preds = %if.then6.6, %for.body.6
%best_val.027.be = phi float [ %tmp29, %if.then6.6 ], [ %best_val.233, %for.body.6 ]
%best_count.025.be = phi i32 [ %sub4.6, %if.then6.6 ], [ %best_count.231, %for.body.6 ]
%indvars.iv.be = add nsw i64 %indvars.iv, 7
br label %for.body
}