From 7043c7a35ecdbe1bb9ac9dc590233c3057cad674 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 2 Dec 2013 17:29:37 +0000 Subject: [PATCH] R600: Workaround for cayman loop bug git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/R600ControlFlowFinalizer.cpp | 10 ++++++ lib/Target/R600/R600Instructions.td | 4 +++ test/CodeGen/R600/cayman-loop-bug.ll | 32 ++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 test/CodeGen/R600/cayman-loop-bug.ll diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index ac3d8f63d57..ec39e097ba9 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -332,6 +332,7 @@ public: unsigned MaxStack = 0; unsigned CurrentStack = 0; + unsigned CurrentLoopDepth = 0; bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { @@ -370,6 +371,13 @@ public: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); HasPush = true; + if (ST.hasCaymanISA() && CurrentLoopDepth > 1) { + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM)) + .addImm(CfCount + 1) + .addImm(1); + MI->setDesc(TII->get(AMDGPU::CF_ALU)); + CfCount++; + } case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); @@ -378,6 +386,7 @@ public: break; case AMDGPU::WHILELOOP: { CurrentStack+=4; + CurrentLoopDepth++; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) @@ -392,6 +401,7 @@ public: } case AMDGPU::ENDLOOP: { CurrentStack-=4; + CurrentLoopDepth--; std::pair > Pair = LoopStack.back(); LoopStack.pop_back(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0346e24ab77..2249ceec3c0 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1867,6 +1867,10 @@ def : Pat < let COUNT = 0; } + def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; class RAT_STORE_DWORD mask> : diff --git a/test/CodeGen/R600/cayman-loop-bug.ll b/test/CodeGen/R600/cayman-loop-bug.ll new file mode 100644 index 00000000000..a87352895eb --- /dev/null +++ b/test/CodeGen/R600/cayman-loop-bug.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s + +; CHECK-LABEL: @main +; CHECK: LOOP_START_DX10 +; CHECK: ALU_PUSH_BEFORE +; CHECK: LOOP_START_DX10 +; CHECK: PUSH +; CHECK-NOT: ALU_PUSH_BEFORE +; CHECK: END_LOOP +; CHECK: END_LOOP +define void @main (<4 x float> inreg %reg0) #0 { +entry: + br label %outer_loop +outer_loop: + %cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop] + %cond = icmp eq i32 %cnt, 16 + br i1 %cond, label %outer_loop_body, label %exit +outer_loop_body: + %cnt_incr = add i32 %cnt, 1 + br label %inner_loop +inner_loop: + %cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body] + %cond2 = icmp eq i32 %cnt2, 16 + br i1 %cond, label %inner_loop_body, label %outer_loop +inner_loop_body: + %cnt2_incr = add i32 %cnt2, 1 + br label %inner_loop +exit: + ret void +} + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file