R600: Workaround for cayman loop bug

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196121 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-12-02 17:29:37 +00:00
parent cce5873de3
commit 7043c7a35e
3 changed files with 46 additions and 0 deletions

View File

@ -332,6 +332,7 @@ public:
unsigned MaxStack = 0;
unsigned CurrentStack = 0;
unsigned CurrentLoopDepth = 0;
bool HasPush = false;
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
@ -370,6 +371,13 @@ public:
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
HasPush = true;
if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
.addImm(CfCount + 1)
.addImm(1);
MI->setDesc(TII->get(AMDGPU::CF_ALU));
CfCount++;
}
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
@ -378,6 +386,7 @@ public:
break;
case AMDGPU::WHILELOOP: {
CurrentStack+=4;
CurrentLoopDepth++;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
@ -392,6 +401,7 @@ public:
}
case AMDGPU::ENDLOOP: {
CurrentStack-=4;
CurrentLoopDepth--;
std::pair<unsigned, std::set<MachineInstr *> > Pair =
LoopStack.back();
LoopStack.pop_back();

View File

@ -1867,6 +1867,10 @@ def : Pat <
let COUNT = 0;
}
def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
; CHECK-LABEL: @main
; CHECK: LOOP_START_DX10
; CHECK: ALU_PUSH_BEFORE
; CHECK: LOOP_START_DX10
; CHECK: PUSH
; CHECK-NOT: ALU_PUSH_BEFORE
; CHECK: END_LOOP
; CHECK: END_LOOP
define void @main (<4 x float> inreg %reg0) #0 {
entry:
br label %outer_loop
outer_loop:
%cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop]
%cond = icmp eq i32 %cnt, 16
br i1 %cond, label %outer_loop_body, label %exit
outer_loop_body:
%cnt_incr = add i32 %cnt, 1
br label %inner_loop
inner_loop:
%cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body]
%cond2 = icmp eq i32 %cnt2, 16
br i1 %cond, label %inner_loop_body, label %outer_loop
inner_loop_body:
%cnt2_incr = add i32 %cnt2, 1
br label %inner_loop
exit:
ret void
}
attributes #0 = { "ShaderType"="0" }