R600: Control Flow support for pre EG gen

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179020 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune
2013-04-08 13:05:49 +00:00
parent 58852ecc1f
commit bd7c634ab9
3 changed files with 242 additions and 74 deletions

View File

@@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS); Emit(Inst, OS);
break; break;
} }
case AMDGPU::CF_TC: case AMDGPU::CF_TC_EG:
case AMDGPU::CF_VC: case AMDGPU::CF_VC_EG:
case AMDGPU::CF_CALL_FS: case AMDGPU::CF_CALL_FS_EG:
case AMDGPU::CF_TC_R600:
case AMDGPU::CF_VC_R600:
case AMDGPU::CF_CALL_FS_R600:
return; return;
case AMDGPU::WHILE_LOOP: case AMDGPU::WHILE_LOOP_EG:
case AMDGPU::END_LOOP: case AMDGPU::END_LOOP_EG:
case AMDGPU::LOOP_BREAK: case AMDGPU::LOOP_BREAK_EG:
case AMDGPU::CF_CONTINUE: case AMDGPU::CF_CONTINUE_EG:
case AMDGPU::CF_JUMP: case AMDGPU::CF_JUMP_EG:
case AMDGPU::CF_ELSE: case AMDGPU::CF_ELSE_EG:
case AMDGPU::POP: { case AMDGPU::POP_EG:
case AMDGPU::WHILE_LOOP_R600:
case AMDGPU::END_LOOP_R600:
case AMDGPU::LOOP_BREAK_R600:
case AMDGPU::CF_CONTINUE_R600:
case AMDGPU::CF_JUMP_R600:
case AMDGPU::CF_ELSE_R600:
case AMDGPU::POP_R600: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS); EmitByte(INSTR_NATIVE, OS);
Emit(Inst, OS); Emit(Inst, OS);

View File

@@ -30,9 +30,22 @@ namespace llvm {
class R600ControlFlowFinalizer : public MachineFunctionPass { class R600ControlFlowFinalizer : public MachineFunctionPass {
private: private:
enum ControlFlowInstruction {
CF_TC,
CF_CALL_FS,
CF_WHILE_LOOP,
CF_END_LOOP,
CF_LOOP_BREAK,
CF_LOOP_CONTINUE,
CF_JUMP,
CF_ELSE,
CF_POP
};
static char ID; static char ID;
const R600InstrInfo *TII; const R600InstrInfo *TII;
unsigned MaxFetchInst; unsigned MaxFetchInst;
const AMDGPUSubtarget &ST;
bool isFetch(const MachineInstr *MI) const { bool isFetch(const MachineInstr *MI) const {
switch (MI->getOpcode()) { switch (MI->getOpcode()) {
@@ -70,6 +83,52 @@ private:
} }
} }
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) {
switch (CFI) {
case CF_TC:
return TII->get(AMDGPU::CF_TC_R600);
case CF_CALL_FS:
return TII->get(AMDGPU::CF_CALL_FS_R600);
case CF_WHILE_LOOP:
return TII->get(AMDGPU::WHILE_LOOP_R600);
case CF_END_LOOP:
return TII->get(AMDGPU::END_LOOP_R600);
case CF_LOOP_BREAK:
return TII->get(AMDGPU::LOOP_BREAK_R600);
case CF_LOOP_CONTINUE:
return TII->get(AMDGPU::CF_CONTINUE_R600);
case CF_JUMP:
return TII->get(AMDGPU::CF_JUMP_R600);
case CF_ELSE:
return TII->get(AMDGPU::CF_ELSE_R600);
case CF_POP:
return TII->get(AMDGPU::POP_R600);
}
} else {
switch (CFI) {
case CF_TC:
return TII->get(AMDGPU::CF_TC_EG);
case CF_CALL_FS:
return TII->get(AMDGPU::CF_CALL_FS_EG);
case CF_WHILE_LOOP:
return TII->get(AMDGPU::WHILE_LOOP_EG);
case CF_END_LOOP:
return TII->get(AMDGPU::END_LOOP_EG);
case CF_LOOP_BREAK:
return TII->get(AMDGPU::LOOP_BREAK_EG);
case CF_LOOP_CONTINUE:
return TII->get(AMDGPU::CF_CONTINUE_EG);
case CF_JUMP:
return TII->get(AMDGPU::CF_JUMP_EG);
case CF_ELSE:
return TII->get(AMDGPU::CF_ELSE_EG);
case CF_POP:
return TII->get(AMDGPU::POP_EG);
}
}
}
MachineBasicBlock::iterator MachineBasicBlock::iterator
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned CfAddress) const { unsigned CfAddress) const {
@@ -85,7 +144,7 @@ private:
break; break;
} }
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
TII->get(AMDGPU::CF_TC)) getHWInstrDesc(CF_TC))
.addImm(CfAddress) // ADDR .addImm(CfAddress) // ADDR
.addImm(AluInstCount); // COUNT .addImm(AluInstCount); // COUNT
return I; return I;
@@ -104,7 +163,8 @@ private:
public: public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
MaxFetchInst = 8; MaxFetchInst = 8;
@@ -124,7 +184,7 @@ public:
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
if (MFI->ShaderType == 1) { if (MFI->ShaderType == 1) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
TII->get(AMDGPU::CF_CALL_FS)); getHWInstrDesc(CF_CALL_FS));
CfCount++; CfCount++;
} }
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
@@ -154,7 +214,7 @@ public:
CurrentStack++; CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack); MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::WHILE_LOOP)) getHWInstrDesc(CF_WHILE_LOOP))
.addImm(2); .addImm(2);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>()); std::set<MachineInstr *>());
@@ -170,7 +230,7 @@ public:
LoopStack.back(); LoopStack.back();
LoopStack.pop_back(); LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount); CounterPropagateAddr(Pair.second, CfCount);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1); .addImm(Pair.first + 1);
MI->eraseFromParent(); MI->eraseFromParent();
CfCount++; CfCount++;
@@ -178,7 +238,7 @@ public:
} }
case AMDGPU::IF_PREDICATE_SET: { case AMDGPU::IF_PREDICATE_SET: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_JUMP)) getHWInstrDesc(CF_JUMP))
.addImm(0) .addImm(0)
.addImm(0); .addImm(0);
IfThenElseStack.push_back(MIb); IfThenElseStack.push_back(MIb);
@@ -192,7 +252,7 @@ public:
IfThenElseStack.pop_back(); IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount); CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_ELSE)) getHWInstrDesc(CF_ELSE))
.addImm(0) .addImm(0)
.addImm(1); .addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump();); DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -207,7 +267,7 @@ public:
IfThenElseStack.pop_back(); IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount + 1); CounterPropagateAddr(IfOrElseInst, CfCount + 1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::POP)) getHWInstrDesc(CF_POP))
.addImm(CfCount + 1) .addImm(CfCount + 1)
.addImm(1); .addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump();); DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -218,13 +278,13 @@ public:
case AMDGPU::PREDICATED_BREAK: { case AMDGPU::PREDICATED_BREAK: {
CurrentStack--; CurrentStack--;
CfCount += 3; CfCount += 3;
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
.addImm(CfCount) .addImm(CfCount)
.addImm(1); .addImm(1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::LOOP_BREAK)) getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0); .addImm(0);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
.addImm(CfCount) .addImm(CfCount)
.addImm(1); .addImm(1);
LoopStack.back().second.insert(MIb); LoopStack.back().second.insert(MIb);
@@ -233,7 +293,7 @@ public:
} }
case AMDGPU::CONTINUE: { case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
TII->get(AMDGPU::CF_CONTINUE)) getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0); .addImm(0);
LoopStack.back().second.insert(MIb); LoopStack.back().second.insert(MIb);
MI->eraseFromParent(); MI->eraseFromParent();

View File

@@ -823,7 +823,61 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
let Inst{63-32} = Word1; let Inst{63-32} = Word1;
} }
class CF_WORD0 { class CF_WORD0_R600 {
field bits<32> Word0;
bits<32> ADDR;
let Word0 = ADDR;
}
class CF_WORD1_R600 {
field bits<32> Word1;
bits<3> POP_COUNT;
bits<5> CF_CONST;
bits<2> COND;
bits<3> COUNT;
bits<6> CALL_COUNT;
bits<1> COUNT_3;
bits<1> END_OF_PROGRAM;
bits<1> VALID_PIXEL_MODE;
bits<7> CF_INST;
bits<1> WHOLE_QUAD_MODE;
bits<1> BARRIER;
let Word1{2-0} = POP_COUNT;
let Word1{7-3} = CF_CONST;
let Word1{9-8} = COND;
let Word1{12-10} = COUNT;
let Word1{18-13} = CALL_COUNT;
let Word1{19} = COUNT_3;
let Word1{21} = END_OF_PROGRAM;
let Word1{22} = VALID_PIXEL_MODE;
let Word1{29-23} = CF_INST;
let Word1{30} = WHOLE_QUAD_MODE;
let Word1{31} = BARRIER;
}
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
let CF_INST = inst;
let BARRIER = 1;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
let CALL_COUNT = 0;
let COUNT_3 = 0;
let END_OF_PROGRAM = 0;
let WHOLE_QUAD_MODE = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
class CF_WORD0_EG {
field bits<32> Word0; field bits<32> Word0;
bits<24> ADDR; bits<24> ADDR;
@@ -833,7 +887,7 @@ class CF_WORD0 {
let Word0{26-24} = JUMPTABLE_SEL; let Word0{26-24} = JUMPTABLE_SEL;
} }
class CF_WORD1 { class CF_WORD1_EG {
field bits<32> Word1; field bits<32> Word1;
bits<3> POP_COUNT; bits<3> POP_COUNT;
@@ -853,8 +907,8 @@ class CF_WORD1 {
let Word1{31} = BARRIER; let Word1{31} = BARRIER;
} }
class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
field bits<64> Inst; field bits<64> Inst;
let CF_INST = inst; let CF_INST = inst;
@@ -868,54 +922,6 @@ ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
let Inst{63-32} = Word1; let Inst{63-32} = Word1;
} }
def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
@@ -1433,6 +1439,52 @@ let Predicates = [isR600] in {
let Word1{31} = 1; // BARRIER let Word1{31} = 1; // BARRIER
} }
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
} }
// Helper pattern for normalizing inputs to triginomic instructions for R700+ // Helper pattern for normalizing inputs to triginomic instructions for R700+
@@ -1589,6 +1641,52 @@ let hasSideEffects = 1 in {
} }
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
}
def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let POP_COUNT = 0;
}
def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Memory read/write instructions // Memory read/write instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//