mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
R600: Don't emit empty then clause and use alu_pop_after
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
12140450fa
commit
272458bd06
@ -1039,8 +1039,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
|
||||
} else if (FalseMBB->succ_size() == 1
|
||||
&& *FalseMBB->succ_begin() == TrueMBB) {
|
||||
// Triangle pattern, true is empty
|
||||
LandBlk = TrueMBB;
|
||||
TrueMBB = NULL;
|
||||
// We reverse the predicate to make a triangle, empty false pattern;
|
||||
std::swap(TrueMBB, FalseMBB);
|
||||
reversePredicateSetter(MBB->end());
|
||||
LandBlk = FalseMBB;
|
||||
FalseMBB = NULL;
|
||||
} else if (FalseMBB->succ_size() == 1
|
||||
&& isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
|
||||
LandBlk = *FalseMBB->succ_begin();
|
||||
@ -1456,6 +1459,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
|
||||
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
|
||||
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
|
||||
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
|
||||
assert (TrueMBB);
|
||||
DEBUG(
|
||||
dbgs() << "ifPattern BB" << MBB->getNumber();
|
||||
dbgs() << "{ ";
|
||||
|
@ -347,6 +347,9 @@ public:
|
||||
MaxStack = 1;
|
||||
}
|
||||
std::vector<ClauseFile> FetchClauses, AluClauses;
|
||||
std::vector<MachineInstr *> LastAlu(1);
|
||||
std::vector<MachineInstr *> ToPopAfter;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E;) {
|
||||
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
|
||||
@ -357,6 +360,10 @@ public:
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator MI = I;
|
||||
if (MI->getOpcode() != AMDGPU::ENDIF)
|
||||
LastAlu.back() = 0;
|
||||
if (MI->getOpcode() == AMDGPU::CF_ALU)
|
||||
LastAlu.back() = MI;
|
||||
I++;
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||
@ -403,6 +410,7 @@ public:
|
||||
break;
|
||||
}
|
||||
case AMDGPU::IF_PREDICATE_SET: {
|
||||
LastAlu.push_back(0);
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_JUMP))
|
||||
.addImm(0)
|
||||
@ -420,7 +428,7 @@ public:
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_ELSE))
|
||||
.addImm(0)
|
||||
.addImm(1);
|
||||
.addImm(0);
|
||||
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
||||
IfThenElseStack.push_back(MIb);
|
||||
MI->eraseFromParent();
|
||||
@ -429,17 +437,24 @@ public:
|
||||
}
|
||||
case AMDGPU::ENDIF: {
|
||||
CurrentStack--;
|
||||
if (LastAlu.back()) {
|
||||
ToPopAfter.push_back(LastAlu.back());
|
||||
} else {
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_POP))
|
||||
.addImm(CfCount + 1)
|
||||
.addImm(1);
|
||||
(void)MIb;
|
||||
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
||||
CfCount++;
|
||||
}
|
||||
|
||||
MachineInstr *IfOrElseInst = IfThenElseStack.back();
|
||||
IfThenElseStack.pop_back();
|
||||
CounterPropagateAddr(IfOrElseInst, CfCount + 1);
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_POP))
|
||||
.addImm(CfCount + 1)
|
||||
.addImm(1);
|
||||
(void)MIb;
|
||||
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
||||
CounterPropagateAddr(IfOrElseInst, CfCount);
|
||||
IfOrElseInst->getOperand(1).setImm(1);
|
||||
LastAlu.pop_back();
|
||||
MI->eraseFromParent();
|
||||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::PREDICATED_BREAK: {
|
||||
@ -484,6 +499,21 @@ public:
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
|
||||
MachineInstr *Alu = ToPopAfter[i];
|
||||
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
|
||||
TII->get(AMDGPU::CF_ALU_POP_AFTER))
|
||||
.addImm(Alu->getOperand(0).getImm())
|
||||
.addImm(Alu->getOperand(1).getImm())
|
||||
.addImm(Alu->getOperand(2).getImm())
|
||||
.addImm(Alu->getOperand(3).getImm())
|
||||
.addImm(Alu->getOperand(4).getImm())
|
||||
.addImm(Alu->getOperand(5).getImm())
|
||||
.addImm(Alu->getOperand(6).getImm())
|
||||
.addImm(Alu->getOperand(7).getImm())
|
||||
.addImm(Alu->getOperand(8).getImm());
|
||||
Alu->eraseFromParent();
|
||||
}
|
||||
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
|
||||
}
|
||||
|
||||
|
@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
|
||||
|
||||
def CF_ALU : ALU_CLAUSE<8, "ALU">;
|
||||
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
|
||||
def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
|
||||
|
||||
def FETCH_CLAUSE : AMDGPUInst <(outs),
|
||||
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: JUMP @7
|
||||
; CHECK: JUMP @5
|
||||
; CHECK: EXPORT
|
||||
; CHECK-NOT: EXPORT
|
||||
|
||||
|
@ -2,12 +2,11 @@
|
||||
|
||||
;CHECK: TEX
|
||||
;CHECK: ALU_PUSH
|
||||
;CHECK: JUMP @4
|
||||
;CHECK: ELSE @16
|
||||
;CHECK: JUMP @15
|
||||
;CHECK: TEX
|
||||
;CHECK: LOOP_START_DX10 @15
|
||||
;CHECK: LOOP_BREAK @14
|
||||
;CHECK: POP @16
|
||||
;CHECK: LOOP_START_DX10 @14
|
||||
;CHECK: LOOP_BREAK @13
|
||||
;CHECK: POP @15
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
|
||||
target triple = "r600--"
|
||||
|
124
test/CodeGen/R600/r600cfg.ll
Normal file
124
test/CodeGen/R600/r600cfg.ll
Normal file
@ -0,0 +1,124 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood
|
||||
;REQUIRES: asserts
|
||||
|
||||
define void @main() #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = bitcast float %0 to i32
|
||||
%5 = icmp eq i32 %4, 0
|
||||
%6 = sext i1 %5 to i32
|
||||
%7 = bitcast i32 %6 to float
|
||||
%8 = bitcast float %7 to i32
|
||||
%9 = icmp ne i32 %8, 0
|
||||
%. = select i1 %9, float 0x36A0000000000000, float %0
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %LOOP47, %main_body
|
||||
%temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
|
||||
%temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
|
||||
%temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
|
||||
%10 = bitcast float %temp4.1 to i32
|
||||
%11 = icmp eq i32 %10, 1
|
||||
%12 = sext i1 %11 to i32
|
||||
%13 = bitcast i32 %12 to float
|
||||
%14 = bitcast float %13 to i32
|
||||
%15 = icmp ne i32 %14, 0
|
||||
br i1 %15, label %IF41, label %ENDIF40
|
||||
|
||||
IF41: ; preds = %LOOP
|
||||
%16 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%17 = insertelement <4 x float> %16, float %temp8.0, i32 1
|
||||
%18 = insertelement <4 x float> %17, float %temp12.0, i32 2
|
||||
%19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
|
||||
%20 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%21 = insertelement <4 x float> %20, float %temp8.0, i32 1
|
||||
%22 = insertelement <4 x float> %21, float %temp12.0, i32 2
|
||||
%23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
|
||||
%24 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%25 = insertelement <4 x float> %24, float %temp8.0, i32 1
|
||||
%26 = insertelement <4 x float> %25, float %temp12.0, i32 2
|
||||
%27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
|
||||
%28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
|
||||
%29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
|
||||
%30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
|
||||
%31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
|
||||
%32 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%33 = insertelement <4 x float> %32, float %temp8.0, i32 1
|
||||
%34 = insertelement <4 x float> %33, float %temp12.0, i32 2
|
||||
%35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
|
||||
ret void
|
||||
|
||||
ENDIF40: ; preds = %LOOP
|
||||
%36 = bitcast float %temp8.0 to i32
|
||||
%37 = add i32 %36, 1
|
||||
%38 = bitcast i32 %37 to float
|
||||
%39 = bitcast float %temp4.1 to i32
|
||||
%40 = urem i32 %39, 2
|
||||
%41 = bitcast i32 %40 to float
|
||||
%42 = bitcast float %41 to i32
|
||||
%43 = icmp eq i32 %42, 0
|
||||
%44 = sext i1 %43 to i32
|
||||
%45 = bitcast i32 %44 to float
|
||||
%46 = bitcast float %45 to i32
|
||||
%47 = icmp ne i32 %46, 0
|
||||
%48 = bitcast float %temp4.1 to i32
|
||||
br i1 %47, label %IF44, label %ELSE45
|
||||
|
||||
IF44: ; preds = %ENDIF40
|
||||
%49 = udiv i32 %48, 2
|
||||
br label %ENDIF43
|
||||
|
||||
ELSE45: ; preds = %ENDIF40
|
||||
%50 = mul i32 3, %48
|
||||
%51 = add i32 %50, 1
|
||||
br label %ENDIF43
|
||||
|
||||
ENDIF43: ; preds = %ELSE45, %IF44
|
||||
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
|
||||
%52 = bitcast i32 %.sink to float
|
||||
%53 = load <4 x float> addrspace(8)* null
|
||||
%54 = extractelement <4 x float> %53, i32 0
|
||||
%55 = bitcast float %54 to i32
|
||||
br label %LOOP47
|
||||
|
||||
LOOP47: ; preds = %ENDIF48, %ENDIF43
|
||||
%temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
|
||||
%temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
|
||||
%56 = bitcast float %temp28.0 to i32
|
||||
%57 = icmp uge i32 %56, %55
|
||||
%58 = sext i1 %57 to i32
|
||||
%59 = bitcast i32 %58 to float
|
||||
%60 = bitcast float %59 to i32
|
||||
%61 = icmp ne i32 %60, 0
|
||||
br i1 %61, label %LOOP, label %ENDIF48
|
||||
|
||||
ENDIF48: ; preds = %LOOP47
|
||||
%62 = bitcast float %temp12.1 to i32
|
||||
%63 = mul i32 %62, 2
|
||||
%64 = bitcast i32 %63 to float
|
||||
%65 = bitcast float %64 to i32
|
||||
%66 = urem i32 %65, 2147483647
|
||||
%67 = bitcast i32 %66 to float
|
||||
%68 = bitcast float %temp28.0 to i32
|
||||
%69 = add i32 %68, 1
|
||||
%70 = bitcast i32 %69 to float
|
||||
br label %LOOP47
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
Loading…
Reference in New Issue
Block a user