R600: Control Flow support for pre EG gen

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179020 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-04-08 13:05:49 +00:00
parent 58852ecc1f
commit bd7c634ab9
3 changed files with 242 additions and 74 deletions

View File

@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS);
break;
}
case AMDGPU::CF_TC:
case AMDGPU::CF_VC:
case AMDGPU::CF_CALL_FS:
case AMDGPU::CF_TC_EG:
case AMDGPU::CF_VC_EG:
case AMDGPU::CF_CALL_FS_EG:
case AMDGPU::CF_TC_R600:
case AMDGPU::CF_VC_R600:
case AMDGPU::CF_CALL_FS_R600:
return;
case AMDGPU::WHILE_LOOP:
case AMDGPU::END_LOOP:
case AMDGPU::LOOP_BREAK:
case AMDGPU::CF_CONTINUE:
case AMDGPU::CF_JUMP:
case AMDGPU::CF_ELSE:
case AMDGPU::POP: {
case AMDGPU::WHILE_LOOP_EG:
case AMDGPU::END_LOOP_EG:
case AMDGPU::LOOP_BREAK_EG:
case AMDGPU::CF_CONTINUE_EG:
case AMDGPU::CF_JUMP_EG:
case AMDGPU::CF_ELSE_EG:
case AMDGPU::POP_EG:
case AMDGPU::WHILE_LOOP_R600:
case AMDGPU::END_LOOP_R600:
case AMDGPU::LOOP_BREAK_R600:
case AMDGPU::CF_CONTINUE_R600:
case AMDGPU::CF_JUMP_R600:
case AMDGPU::CF_ELSE_R600:
case AMDGPU::POP_R600: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS);
Emit(Inst, OS);

View File

@ -30,9 +30,22 @@ namespace llvm {
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
enum ControlFlowInstruction {
CF_TC,
CF_CALL_FS,
CF_WHILE_LOOP,
CF_END_LOOP,
CF_LOOP_BREAK,
CF_LOOP_CONTINUE,
CF_JUMP,
CF_ELSE,
CF_POP
};
static char ID;
const R600InstrInfo *TII;
unsigned MaxFetchInst;
const AMDGPUSubtarget &ST;
bool isFetch(const MachineInstr *MI) const {
switch (MI->getOpcode()) {
@ -70,6 +83,52 @@ private:
}
}
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
switch (CFI) {
case CF_TC:
return TII->get(AMDGPU::CF_TC_R600);
case CF_CALL_FS:
return TII->get(AMDGPU::CF_CALL_FS_R600);
case CF_WHILE_LOOP:
return TII->get(AMDGPU::WHILE_LOOP_R600);
case CF_END_LOOP:
return TII->get(AMDGPU::END_LOOP_R600);
case CF_LOOP_BREAK:
return TII->get(AMDGPU::LOOP_BREAK_R600);
case CF_LOOP_CONTINUE:
return TII->get(AMDGPU::CF_CONTINUE_R600);
case CF_JUMP:
return TII->get(AMDGPU::CF_JUMP_R600);
case CF_ELSE:
return TII->get(AMDGPU::CF_ELSE_R600);
case CF_POP:
return TII->get(AMDGPU::POP_R600);
}
} else {
switch (CFI) {
case CF_TC:
return TII->get(AMDGPU::CF_TC_EG);
case CF_CALL_FS:
return TII->get(AMDGPU::CF_CALL_FS_EG);
case CF_WHILE_LOOP:
return TII->get(AMDGPU::WHILE_LOOP_EG);
case CF_END_LOOP:
return TII->get(AMDGPU::END_LOOP_EG);
case CF_LOOP_BREAK:
return TII->get(AMDGPU::LOOP_BREAK_EG);
case CF_LOOP_CONTINUE:
return TII->get(AMDGPU::CF_CONTINUE_EG);
case CF_JUMP:
return TII->get(AMDGPU::CF_JUMP_EG);
case CF_ELSE:
return TII->get(AMDGPU::CF_ELSE_EG);
case CF_POP:
return TII->get(AMDGPU::POP_EG);
}
}
}
MachineBasicBlock::iterator
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned CfAddress) const {
@ -85,7 +144,7 @@ private:
break;
}
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
TII->get(AMDGPU::CF_TC))
getHWInstrDesc(CF_TC))
.addImm(CfAddress) // ADDR
.addImm(AluInstCount); // COUNT
return I;
@ -104,7 +163,8 @@ private:
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
MaxFetchInst = 8;
@ -124,7 +184,7 @@ public:
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
if (MFI->ShaderType == 1) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
TII->get(AMDGPU::CF_CALL_FS));
getHWInstrDesc(CF_CALL_FS));
CfCount++;
}
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
@ -154,7 +214,7 @@ public:
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::WHILE_LOOP))
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(2);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
@ -170,7 +230,7 @@ public:
LoopStack.back();
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1);
MI->eraseFromParent();
CfCount++;
@ -178,7 +238,7 @@ public:
}
case AMDGPU::IF_PREDICATE_SET: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_JUMP))
getHWInstrDesc(CF_JUMP))
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
@ -192,7 +252,7 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_ELSE))
getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@ -207,7 +267,7 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount + 1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::POP))
getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@ -218,13 +278,13 @@ public:
case AMDGPU::PREDICATED_BREAK: {
CurrentStack--;
CfCount += 3;
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
.addImm(CfCount)
.addImm(1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::LOOP_BREAK))
getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
.addImm(CfCount)
.addImm(1);
LoopStack.back().second.insert(MIb);
@ -233,7 +293,7 @@ public:
}
case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_CONTINUE))
getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();

View File

@ -823,7 +823,61 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
let Inst{63-32} = Word1;
}
class CF_WORD0 {
class CF_WORD0_R600 {
field bits<32> Word0;
bits<32> ADDR;
let Word0 = ADDR;
}
class CF_WORD1_R600 {
field bits<32> Word1;
bits<3> POP_COUNT;
bits<5> CF_CONST;
bits<2> COND;
bits<3> COUNT;
bits<6> CALL_COUNT;
bits<1> COUNT_3;
bits<1> END_OF_PROGRAM;
bits<1> VALID_PIXEL_MODE;
bits<7> CF_INST;
bits<1> WHOLE_QUAD_MODE;
bits<1> BARRIER;
let Word1{2-0} = POP_COUNT;
let Word1{7-3} = CF_CONST;
let Word1{9-8} = COND;
let Word1{12-10} = COUNT;
let Word1{18-13} = CALL_COUNT;
let Word1{19} = COUNT_3;
let Word1{21} = END_OF_PROGRAM;
let Word1{22} = VALID_PIXEL_MODE;
let Word1{29-23} = CF_INST;
let Word1{30} = WHOLE_QUAD_MODE;
let Word1{31} = BARRIER;
}
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
let CF_INST = inst;
let BARRIER = 1;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
let CALL_COUNT = 0;
let COUNT_3 = 0;
let END_OF_PROGRAM = 0;
let WHOLE_QUAD_MODE = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class CF_WORD0_EG {
field bits<32> Word0;
bits<24> ADDR;
@ -833,7 +887,7 @@ class CF_WORD0 {
let Word0{26-24} = JUMPTABLE_SEL;
}
class CF_WORD1 {
class CF_WORD1_EG {
field bits<32> Word1;
bits<3> POP_COUNT;
@ -853,8 +907,8 @@ class CF_WORD1 {
let Word1{31} = BARRIER;
}
class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
field bits<64> Inst;
let CF_INST = inst;
@ -868,54 +922,6 @@ ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
let Inst{63-32} = Word1;
}
def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
@ -1433,6 +1439,52 @@ let Predicates = [isR600] in {
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in {
}
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//