R600: Don't emit empty then clause and use alu_pop_after

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune
2013-07-19 21:45:15 +00:00
parent 12140450fa
commit 272458bd06
6 changed files with 175 additions and 17 deletions

View File

@@ -1039,8 +1039,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
} else if (FalseMBB->succ_size() == 1
&& *FalseMBB->succ_begin() == TrueMBB) {
// Triangle pattern, true is empty
LandBlk = TrueMBB;
TrueMBB = NULL;
// We reverse the predicate to make a triangle, empty false pattern;
std::swap(TrueMBB, FalseMBB);
reversePredicateSetter(MBB->end());
LandBlk = FalseMBB;
FalseMBB = NULL;
} else if (FalseMBB->succ_size() == 1
&& isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
LandBlk = *FalseMBB->succ_begin();
@@ -1456,6 +1459,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
assert (TrueMBB);
DEBUG(
dbgs() << "ifPattern BB" << MBB->getNumber();
dbgs() << "{ ";

View File

@@ -347,6 +347,9 @@ public:
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
std::vector<MachineInstr *> LastAlu(1);
std::vector<MachineInstr *> ToPopAfter;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -357,6 +360,10 @@ public:
}
MachineBasicBlock::iterator MI = I;
if (MI->getOpcode() != AMDGPU::ENDIF)
LastAlu.back() = 0;
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
@@ -403,6 +410,7 @@ public:
break;
}
case AMDGPU::IF_PREDICATE_SET: {
LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -420,7 +428,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(1);
.addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
@@ -429,17 +437,24 @@ public:
}
case AMDGPU::ENDIF: {
CurrentStack--;
if (LastAlu.back()) {
ToPopAfter.push_back(LastAlu.back());
} else {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
(void)MIb;
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CfCount++;
}
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount + 1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
(void)MIb;
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CounterPropagateAddr(IfOrElseInst, CfCount);
IfOrElseInst->getOperand(1).setImm(1);
LastAlu.pop_back();
MI->eraseFromParent();
CfCount++;
break;
}
case AMDGPU::PREDICATED_BREAK: {
@@ -484,6 +499,21 @@ public:
break;
}
}
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
MachineInstr *Alu = ToPopAfter[i];
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
TII->get(AMDGPU::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
.addImm(Alu->getOperand(1).getImm())
.addImm(Alu->getOperand(2).getImm())
.addImm(Alu->getOperand(3).getImm())
.addImm(Alu->getOperand(4).getImm())
.addImm(Alu->getOperand(5).getImm())
.addImm(Alu->getOperand(6).getImm())
.addImm(Alu->getOperand(7).getImm())
.addImm(Alu->getOperand(8).getImm());
Alu->eraseFromParent();
}
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}

View File

@@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
def FETCH_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {

View File

@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: JUMP @7
; CHECK: JUMP @5
; CHECK: EXPORT
; CHECK-NOT: EXPORT

View File

@@ -2,12 +2,11 @@
;CHECK: TEX
;CHECK: ALU_PUSH
;CHECK: JUMP @4
;CHECK: ELSE @16
;CHECK: JUMP @15
;CHECK: TEX
;CHECK: LOOP_START_DX10 @15
;CHECK: LOOP_BREAK @14
;CHECK: POP @16
;CHECK: LOOP_START_DX10 @14
;CHECK: LOOP_BREAK @13
;CHECK: POP @15
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"

View File

@@ -0,0 +1,124 @@
;RUN: llc < %s -march=r600 -mcpu=redwood
;REQUIRES: asserts
define void @main() #0 {
main_body:
%0 = call float @llvm.R600.load.input(i32 4)
%1 = call float @llvm.R600.load.input(i32 5)
%2 = call float @llvm.R600.load.input(i32 6)
%3 = call float @llvm.R600.load.input(i32 7)
%4 = bitcast float %0 to i32
%5 = icmp eq i32 %4, 0
%6 = sext i1 %5 to i32
%7 = bitcast i32 %6 to float
%8 = bitcast float %7 to i32
%9 = icmp ne i32 %8, 0
%. = select i1 %9, float 0x36A0000000000000, float %0
br label %LOOP
LOOP: ; preds = %LOOP47, %main_body
%temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
%temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
%temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
%10 = bitcast float %temp4.1 to i32
%11 = icmp eq i32 %10, 1
%12 = sext i1 %11 to i32
%13 = bitcast i32 %12 to float
%14 = bitcast float %13 to i32
%15 = icmp ne i32 %14, 0
br i1 %15, label %IF41, label %ENDIF40
IF41: ; preds = %LOOP
%16 = insertelement <4 x float> undef, float %0, i32 0
%17 = insertelement <4 x float> %16, float %temp8.0, i32 1
%18 = insertelement <4 x float> %17, float %temp12.0, i32 2
%19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
%20 = insertelement <4 x float> undef, float %0, i32 0
%21 = insertelement <4 x float> %20, float %temp8.0, i32 1
%22 = insertelement <4 x float> %21, float %temp12.0, i32 2
%23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
%24 = insertelement <4 x float> undef, float %0, i32 0
%25 = insertelement <4 x float> %24, float %temp8.0, i32 1
%26 = insertelement <4 x float> %25, float %temp12.0, i32 2
%27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
%28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
%30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
%31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
%32 = insertelement <4 x float> undef, float %0, i32 0
%33 = insertelement <4 x float> %32, float %temp8.0, i32 1
%34 = insertelement <4 x float> %33, float %temp12.0, i32 2
%35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
ret void
ENDIF40: ; preds = %LOOP
%36 = bitcast float %temp8.0 to i32
%37 = add i32 %36, 1
%38 = bitcast i32 %37 to float
%39 = bitcast float %temp4.1 to i32
%40 = urem i32 %39, 2
%41 = bitcast i32 %40 to float
%42 = bitcast float %41 to i32
%43 = icmp eq i32 %42, 0
%44 = sext i1 %43 to i32
%45 = bitcast i32 %44 to float
%46 = bitcast float %45 to i32
%47 = icmp ne i32 %46, 0
%48 = bitcast float %temp4.1 to i32
br i1 %47, label %IF44, label %ELSE45
IF44: ; preds = %ENDIF40
%49 = udiv i32 %48, 2
br label %ENDIF43
ELSE45: ; preds = %ENDIF40
%50 = mul i32 3, %48
%51 = add i32 %50, 1
br label %ENDIF43
ENDIF43: ; preds = %ELSE45, %IF44
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
%52 = bitcast i32 %.sink to float
%53 = load <4 x float> addrspace(8)* null
%54 = extractelement <4 x float> %53, i32 0
%55 = bitcast float %54 to i32
br label %LOOP47
LOOP47: ; preds = %ENDIF48, %ENDIF43
%temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
%temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
%56 = bitcast float %temp28.0 to i32
%57 = icmp uge i32 %56, %55
%58 = sext i1 %57 to i32
%59 = bitcast i32 %58 to float
%60 = bitcast float %59 to i32
%61 = icmp ne i32 %60, 0
br i1 %61, label %LOOP, label %ENDIF48
ENDIF48: ; preds = %LOOP47
%62 = bitcast float %temp12.1 to i32
%63 = mul i32 %62, 2
%64 = bitcast i32 %63 to float
%65 = bitcast float %64 to i32
%66 = urem i32 %65, 2147483647
%67 = bitcast i32 %66 to float
%68 = bitcast float %temp28.0 to i32
%69 = add i32 %68, 1
%70 = bitcast i32 %69 to float
br label %LOOP47
}
; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }