mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-17 05:31:32 +00:00
StructurizeCFG: Use a reverse post-order traversal
We were previously doing a post-order traversal and operating on the list in reverse, however this would occasionaly cause backedges for loops to be visited before some of the other blocks in the loop. We know use a reverse post-order traversal, which avoids this issue. The reverse post-order traversal is not completely ideal, so we need to manually fixup the list to ensure that inner loop backedges are visited before outer loop backedges. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228186 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1d75b286e6
commit
7c038bc15f
@ -10,12 +10,14 @@
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/RegionIterator.h"
|
||||
#include "llvm/Analysis/RegionPass.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Transforms/Utils/SSAUpdater.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -281,11 +283,65 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
|
||||
|
||||
/// \brief Build up the general order of nodes
|
||||
void StructurizeCFG::orderNodes() {
|
||||
scc_iterator<Region *> I = scc_begin(ParentRegion);
|
||||
for (Order.clear(); !I.isAtEnd(); ++I) {
|
||||
const std::vector<RegionNode *> &Nodes = *I;
|
||||
Order.append(Nodes.begin(), Nodes.end());
|
||||
RNVector TempOrder;
|
||||
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
|
||||
TempOrder.append(RPOT.begin(), RPOT.end());
|
||||
|
||||
std::map<Loop*, unsigned> LoopBlocks;
|
||||
|
||||
|
||||
// The reverse post-order traversal of the list gives us an ordering close
|
||||
// to what we want. The only problem with it is that sometimes backedges
|
||||
// for outer loops will be visited before backedges for inner loops.
|
||||
for (RegionNode *RN : TempOrder) {
|
||||
BasicBlock *BB = RN->getEntry();
|
||||
Loop *Loop = LI->getLoopFor(BB);
|
||||
if (!LoopBlocks.count(Loop)) {
|
||||
LoopBlocks[Loop] = 1;
|
||||
continue;
|
||||
}
|
||||
LoopBlocks[Loop]++;
|
||||
}
|
||||
|
||||
unsigned CurrentLoopDepth = 0;
|
||||
Loop *CurrentLoop = nullptr;
|
||||
BBSet TempVisited;
|
||||
for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
|
||||
BasicBlock *BB = (*I)->getEntry();
|
||||
unsigned LoopDepth = LI->getLoopDepth(BB);
|
||||
|
||||
if (std::find(Order.begin(), Order.end(), *I) != Order.end())
|
||||
continue;
|
||||
|
||||
if (LoopDepth < CurrentLoopDepth) {
|
||||
// Make sure we have visited all blocks in this loop before moving back to
|
||||
// the outer loop.
|
||||
|
||||
RNVector::iterator LoopI = I;
|
||||
while(LoopBlocks[CurrentLoop]) {
|
||||
LoopI++;
|
||||
BasicBlock *LoopBB = (*LoopI)->getEntry();
|
||||
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
|
||||
LoopBlocks[CurrentLoop]--;
|
||||
Order.push_back(*LoopI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CurrentLoop = LI->getLoopFor(BB);
|
||||
if (CurrentLoop) {
|
||||
LoopBlocks[CurrentLoop]--;
|
||||
}
|
||||
|
||||
CurrentLoopDepth = LoopDepth;
|
||||
Order.push_back(*I);
|
||||
}
|
||||
|
||||
// This pass originally used a post-order traversal and then operated on
|
||||
// the list in reverse. Now that we are using a reverse post-order traversal
|
||||
// rather than re-working the whole pass to operate on the list in order,
|
||||
// we just reverse the list and continue to operate on it in reverse.
|
||||
std::reverse(Order.begin(), Order.end());
|
||||
}
|
||||
|
||||
/// \brief Determine the end of the loops
|
||||
@ -441,6 +497,10 @@ void StructurizeCFG::collectInfos() {
|
||||
for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
|
||||
OI != OE; ++OI) {
|
||||
|
||||
DEBUG(dbgs() << "Visiting: " <<
|
||||
((*OI)->isSubRegion() ? "SubRegion with entry: " : "") <<
|
||||
(*OI)->getEntry()->getName() << " Loop Depth: " << LI->getLoopDepth((*OI)->getEntry()) << "\n");
|
||||
|
||||
// Analyze all the conditions leading to a node
|
||||
gatherPredicates(*OI);
|
||||
|
||||
|
79
test/Transforms/StructurizeCFG/nested-loop-order.ll
Normal file
79
test/Transforms/StructurizeCFG/nested-loop-order.ll
Normal file
@ -0,0 +1,79 @@
|
||||
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
|
||||
|
||||
define void @main(float addrspace(1)* %out) {
|
||||
|
||||
; CHECK: main_body:
|
||||
; CHECK: br label %LOOP.outer
|
||||
main_body:
|
||||
br label %LOOP.outer
|
||||
|
||||
; CHECK: LOOP.outer:
|
||||
; CHECK: br label %LOOP
|
||||
LOOP.outer: ; preds = %ENDIF28, %main_body
|
||||
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
|
||||
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
|
||||
br label %LOOP
|
||||
|
||||
; CHECK: LOOP:
|
||||
; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
|
||||
LOOP: ; preds = %IF29, %LOOP.outer
|
||||
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
|
||||
%tmp20 = add i32 %temp4.0, 1
|
||||
%tmp22 = icmp sgt i32 %tmp20, 3
|
||||
br i1 %tmp22, label %ENDLOOP, label %ENDIF
|
||||
|
||||
; CHECK: Flow3
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
|
||||
|
||||
; CHECK: ENDLOOP:
|
||||
; CHECK: ret void
|
||||
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
|
||||
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
|
||||
%tmp23 = icmp eq i32 %tmp20, 3
|
||||
%.45 = select i1 %tmp23, float 0.000000e+00, float 1.000000e+00
|
||||
store float %.45, float addrspace(1)* %out
|
||||
ret void
|
||||
|
||||
; CHECK: ENDIF:
|
||||
; CHECK: br i1 %tmp31, label %IF29, label %Flow1
|
||||
ENDIF: ; preds = %LOOP
|
||||
%tmp31 = icmp sgt i32 %tmp20, 1
|
||||
br i1 %tmp31, label %IF29, label %ENDIF28
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
|
||||
|
||||
; CHECK: IF29:
|
||||
; CHECK: br label %Flow1
|
||||
IF29: ; preds = %ENDIF
|
||||
%tmp32 = icmp sgt i32 %tmp20, 2
|
||||
br i1 %tmp32, label %ENDLOOP, label %LOOP
|
||||
|
||||
; CHECK: Flow1:
|
||||
; CHECK: br label %Flow
|
||||
|
||||
; CHECK: Flow2:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
|
||||
|
||||
; CHECK: ENDIF28:
|
||||
; CHECK: br label %Flow3
|
||||
ENDIF28: ; preds = %ENDIF
|
||||
%tmp35 = fadd float %temp8.0.ph, 1.0
|
||||
%tmp36 = icmp sgt i32 %tmp20, 2
|
||||
br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDIL.clamp.(float, float, float) #2
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { readnone }
|
||||
|
||||
!0 = !{!1, !1, i64 0, i32 1}
|
||||
!1 = !{!"const", null}
|
@ -11,28 +11,29 @@ bb:
|
||||
bb3: ; preds = %bb7, %bb
|
||||
%tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
|
||||
%tmp4 = fcmp ult float %arg1, 3.500000e+00
|
||||
; CHECK: br i1 %tmp4, label %bb7, label %Flow
|
||||
; CHECK: %0 = xor i1 %tmp4, true
|
||||
; CHECK: br i1 %0, label %bb5, label %Flow
|
||||
br i1 %tmp4, label %bb7, label %bb5
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK: br i1 %2, label %Flow1, label %bb3
|
||||
|
||||
; CHECK: Flow1:
|
||||
; CHECK: br i1 %3, label %bb5, label %bb10
|
||||
|
||||
; CHECK: bb5:
|
||||
bb5: ; preds = %bb3
|
||||
%tmp6 = fcmp olt float 0.000000e+00, %arg2
|
||||
; CHECK: br label %bb10
|
||||
; CHECK: br label %Flow
|
||||
br i1 %tmp6, label %bb10, label %bb7
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK: br i1 %3, label %bb7, label %Flow1
|
||||
|
||||
; CHECK: bb7
|
||||
bb7: ; preds = %bb5, %bb3
|
||||
%tmp8 = add nuw nsw i64 %tmp, 1
|
||||
%tmp9 = icmp slt i64 %tmp8, 5
|
||||
; CHECK: br label %Flow
|
||||
; CHECK: br label %Flow1
|
||||
br i1 %tmp9, label %bb3, label %bb10
|
||||
|
||||
; CHECK: Flow1:
|
||||
; CHECK: br i1 %7, label %bb10, label %bb3
|
||||
|
||||
; CHECK: bb10
|
||||
bb10: ; preds = %bb7, %bb5
|
||||
%tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
|
||||
|
100
test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
Normal file
100
test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
Normal file
@ -0,0 +1,100 @@
|
||||
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
|
||||
|
||||
; The structurize cfg pass used to do a post-order traversal to generate a list
|
||||
; of ; basic blocks and then operate on the list in reverse. This led to bugs,
|
||||
; because sometimes successors would be visited before their predecessors.
|
||||
; The fix for this was to do a reverse post-order traversal which is what the
|
||||
; algorithm requires.
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @test(float* nocapture %out, i32 %K1, float* nocapture readonly %nr) {
|
||||
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.body
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %lor.lhs.false, label %Flow
|
||||
for.body: ; preds = %for.body.backedge, %entry
|
||||
%indvars.iv = phi i64 [ %indvars.iv.be, %for.body.backedge ], [ 1, %entry ]
|
||||
%best_val.027 = phi float [ %best_val.027.be, %for.body.backedge ], [ 5.000000e+01, %entry ]
|
||||
%prev_start.026 = phi i32 [ %tmp26, %for.body.backedge ], [ 0, %entry ]
|
||||
%best_count.025 = phi i32 [ %best_count.025.be, %for.body.backedge ], [ 0, %entry ]
|
||||
%tmp0 = trunc i64 %indvars.iv to i32
|
||||
%cmp1 = icmp eq i32 %tmp0, %K1
|
||||
br i1 %cmp1, label %if.then, label %lor.lhs.false
|
||||
|
||||
; CHECK: lor.lhs.false:
|
||||
; CHECK: br label %Flow
|
||||
lor.lhs.false: ; preds = %for.body
|
||||
%arrayidx = getelementptr inbounds float* %nr, i64 %indvars.iv
|
||||
%tmp1 = load float* %arrayidx, align 4
|
||||
%tmp2 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx2 = getelementptr inbounds float* %nr, i64 %tmp2
|
||||
%tmp3 = load float* %arrayidx2, align 4
|
||||
%cmp3 = fcmp une float %tmp1, %tmp3
|
||||
br i1 %cmp3, label %if.then, label %for.body.1
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %if.then, label %Flow1
|
||||
|
||||
; CHECK: if.then:
|
||||
; CHECK: br label %Flow1
|
||||
if.then: ; preds = %lor.lhs.false, %for.body
|
||||
%sub4 = sub nsw i32 %tmp0, %prev_start.026
|
||||
%tmp4 = add nsw i64 %indvars.iv, -1
|
||||
%arrayidx8 = getelementptr inbounds float* %nr, i64 %tmp4
|
||||
%tmp5 = load float* %arrayidx8, align 4
|
||||
br i1 %cmp1, label %for.end, label %for.body.1
|
||||
|
||||
; CHECK: for.end:
|
||||
; CHECK: ret void
|
||||
for.end: ; preds = %for.body.1, %if.then
|
||||
%best_val.0.lcssa = phi float [ %best_val.233, %for.body.1 ], [ %tmp5, %if.then ]
|
||||
store float %best_val.0.lcssa, float* %out, align 4
|
||||
ret void
|
||||
|
||||
; CHECK: Flow1
|
||||
; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2
|
||||
|
||||
; CHECK: for.body.1:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3
|
||||
for.body.1: ; preds = %if.then, %lor.lhs.false
|
||||
%best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ]
|
||||
%best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]
|
||||
%indvars.iv.next.454 = add nsw i64 %indvars.iv, 5
|
||||
%tmp22 = trunc i64 %indvars.iv.next.454 to i32
|
||||
%cmp1.5 = icmp eq i32 %tmp22, %K1
|
||||
br i1 %cmp1.5, label %for.end, label %for.body.6
|
||||
|
||||
; CHECK: Flow2:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %for.end, label %for.body
|
||||
|
||||
; CHECK: for.body.6:
|
||||
; CHECK: br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
|
||||
for.body.6: ; preds = %for.body.1
|
||||
%indvars.iv.next.559 = add nsw i64 %indvars.iv, 6
|
||||
%tmp26 = trunc i64 %indvars.iv.next.559 to i32
|
||||
%sub4.6 = sub nsw i32 %tmp26, %tmp22
|
||||
%cmp5.6 = icmp slt i32 %best_count.231, %sub4.6
|
||||
br i1 %cmp5.6, label %if.then6.6, label %for.body.backedge
|
||||
|
||||
; CHECK: if.then6.6
|
||||
; CHECK: br label %for.body.backedge
|
||||
if.then6.6: ; preds = %for.body.6
|
||||
%arrayidx8.6 = getelementptr inbounds float* %nr, i64 %indvars.iv.next.454
|
||||
%tmp29 = load float* %arrayidx8.6, align 4
|
||||
br label %for.body.backedge
|
||||
|
||||
; CHECK: Flow3:
|
||||
; CHECK: br label %Flow2
|
||||
|
||||
; CHECK: for.body.backedge:
|
||||
; CHECK: br label %Flow3
|
||||
for.body.backedge: ; preds = %if.then6.6, %for.body.6
|
||||
%best_val.027.be = phi float [ %tmp29, %if.then6.6 ], [ %best_val.233, %for.body.6 ]
|
||||
%best_count.025.be = phi i32 [ %sub4.6, %if.then6.6 ], [ %best_count.231, %for.body.6 ]
|
||||
%indvars.iv.be = add nsw i64 %indvars.iv, 7
|
||||
br label %for.body
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user