diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index 91eb60beb6b..79f6532f1d7 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -14,6 +14,7 @@ #include "AMDGPU.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public FunctionPass { DominatorTree *DT; StackVector Stack; + LoopInfo *LI; + bool isTopOfStack(BasicBlock *BB); Value *popSaved(); @@ -99,6 +102,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); @@ -277,10 +281,26 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); push(Term->getSuccessor(0), Arg); -} - -/// \brief Close the last opened control flow +}/// \brief Close the last opened control flow void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { + llvm::Loop *L = LI->getLoopFor(BB); + + if (L && L->getHeader() == BB) { + // We can't insert an EndCF call into a loop header, because it will + // get executed on every iteration of the loop, when it should be + // executed only once before the loop. + SmallVector Latches; + L->getLoopLatches(Latches); + + std::vector Preds; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end()) + Preds.push_back(*PI); + } + BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT, + LI, false); + } + CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); } @@ -288,6 +308,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { /// recognize if/then/else and loops. bool SIAnnotateControlFlow::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); + LI = &getAnalysis().getLoopInfo(); for (df_iterator I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll new file mode 100644 index 00000000000..e3c5b3c1c36 --- /dev/null +++ b/test/CodeGen/R600/endcf-loop-header.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s + +; This tests that the llvm.SI.end.cf intrinsic is not inserted into the +; loop block. This intrinsic will be lowered to s_or_b64 by the code +; generator. + +; CHECK-LABEL: {{^}}test: + +; This is was lowered from the llvm.SI.end.cf intrinsic: +; CHECK: s_or_b64 exec, exec + +; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}} +; CHECK-NOT: s_or_b64 exec, exec +; CHECK: s_cbranch_execnz [[LOOP_LABEL]] +define void @test(i32 addrspace(1)* %out, i32 %cond) { +entry: + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %if, label %loop + +if: + store i32 0, i32 addrspace(1)* %out + br label %loop + +loop: + %tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop] + %inc = add i32 %tmp1, %cond + %tmp2 = icmp ugt i32 %inc, 10 + br i1 %tmp2, label %done, label %loop + +done: + %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1 + store i32 %inc, i32 addrspace(1)* %tmp3 + ret void +}