R600: Turn TEX/VTX into native instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180756 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune
2013-04-30 00:13:53 +00:00
parent 631591e6f3
commit b6379de427
3 changed files with 51 additions and 16 deletions

View File

@@ -142,6 +142,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (isFCOp(MI.getOpcode())){ if (isFCOp(MI.getOpcode())){
EmitFCInstr(MI, OS); EmitFCInstr(MI, OS);
} else if (MI.getOpcode() == AMDGPU::RETURN || } else if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) { MI.getOpcode() == AMDGPU::KILL) {
return; return;
@@ -166,10 +167,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::TEX_VTX_TEXBUF : { case AMDGPU::TEX_VTX_TEXBUF : {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
InstWord2 |= 1 << 19;
EmitByte(INSTR_VTX, OS); EmitByte(INSTR_NATIVE, OS);
Emit(InstWord01, OS); Emit(InstWord01, OS);
EmitByte(INSTR_NATIVE, OS);
Emit(InstWord2, OS); Emit(InstWord2, OS);
Emit((u_int32_t) 0, OS);
break; break;
} }
case AMDGPU::TEX_LD: case AMDGPU::TEX_LD:
@@ -241,9 +245,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
Offsets[2] << 10; Offsets[2] << 10;
EmitByte(INSTR_TEX, OS); EmitByte(INSTR_NATIVE, OS);
Emit(Word01, OS); Emit(Word01, OS);
EmitByte(INSTR_NATIVE, OS);
Emit(Word2, OS); Emit(Word2, OS);
Emit((u_int32_t) 0, OS);
break; break;
} }
case AMDGPU::CF_ALU: case AMDGPU::CF_ALU:
@@ -253,13 +259,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit(Inst, OS); Emit(Inst, OS);
break; break;
} }
case AMDGPU::CF_TC_EG:
case AMDGPU::CF_VC_EG:
case AMDGPU::CF_CALL_FS_EG: case AMDGPU::CF_CALL_FS_EG:
case AMDGPU::CF_TC_R600:
case AMDGPU::CF_VC_R600:
case AMDGPU::CF_CALL_FS_R600: case AMDGPU::CF_CALL_FS_R600:
return; return;
case AMDGPU::CF_TC_EG:
case AMDGPU::CF_VC_EG:
case AMDGPU::CF_TC_R600:
case AMDGPU::CF_VC_R600:
case AMDGPU::WHILE_LOOP_EG: case AMDGPU::WHILE_LOOP_EG:
case AMDGPU::END_LOOP_EG: case AMDGPU::END_LOOP_EG:
case AMDGPU::LOOP_BREAK_EG: case AMDGPU::LOOP_BREAK_EG:

View File

@@ -30,6 +30,8 @@ namespace llvm {
class R600ControlFlowFinalizer : public MachineFunctionPass { class R600ControlFlowFinalizer : public MachineFunctionPass {
private: private:
typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
enum ControlFlowInstruction { enum ControlFlowInstruction {
CF_TC, CF_TC,
CF_VC, CF_VC,
@@ -105,28 +107,44 @@ private:
return TII->get(Opcode); return TII->get(Opcode);
} }
MachineBasicBlock::iterator ClauseFile
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
unsigned CfAddress) const { const {
MachineBasicBlock::iterator ClauseHead = I; MachineBasicBlock::iterator ClauseHead = I;
std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0; unsigned AluInstCount = 0;
bool IsTex = TII->usesTextureCache(ClauseHead); bool IsTex = TII->usesTextureCache(ClauseHead);
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I)) if (IsTrivialInst(I))
continue; continue;
if (AluInstCount > MaxFetchInst)
break;
if ((IsTex && !TII->usesTextureCache(I)) || if ((IsTex && !TII->usesTextureCache(I)) ||
(!IsTex && !TII->usesVertexCache(I))) (!IsTex && !TII->usesVertexCache(I)))
break; break;
AluInstCount ++; AluInstCount ++;
if (AluInstCount > MaxFetchInst) ClauseContent.push_back(I);
break;
} }
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
getHWInstrDesc(IsTex?CF_TC:CF_VC)) getHWInstrDesc(IsTex?CF_TC:CF_VC))
.addImm(CfAddress) // ADDR .addImm(0) // ADDR
.addImm(AluInstCount); // COUNT .addImm(AluInstCount - 1); // COUNT
return I; return ClauseFile(MIb, ClauseContent);
} }
void
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
unsigned &CfCount) {
CounterPropagateAddr(Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
.addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
CfCount += 2 * Clause.second.size();
}
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
} }
@@ -182,11 +200,12 @@ public:
getHWInstrDesc(CF_CALL_FS)); getHWInstrDesc(CF_CALL_FS));
CfCount++; CfCount++;
} }
std::vector<ClauseFile> FetchClauses;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) { I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump();); DEBUG(dbgs() << CfCount << ":"; I->dump(););
I = MakeFetchClause(MBB, I, 0); FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++; CfCount++;
continue; continue;
} }
@@ -307,6 +326,8 @@ public:
BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
CfCount++; CfCount++;
} }
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
EmitFetchClause(I, FetchClauses[i], CfCount);
} }
default: default:
break; break;

View File

@@ -477,6 +477,7 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
let FETCH_WHOLE_QUAD = 0; let FETCH_WHOLE_QUAD = 0;
let ALT_CONST = 0; let ALT_CONST = 0;
let SAMPLER_INDEX_MODE = 0; let SAMPLER_INDEX_MODE = 0;
let RESOURCE_INDEX_MODE = 0;
let COORD_TYPE_X = 0; let COORD_TYPE_X = 0;
let COORD_TYPE_Y = 0; let COORD_TYPE_Y = 0;
@@ -928,6 +929,13 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
def FETCH_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
field bits<8> Inst;
bits<8> num;
let Inst = num;
}
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
field bits<64> Inst; field bits<64> Inst;
} }