diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 0f811b1af42..36f2c1585ee 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -142,6 +142,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (isFCOp(MI.getOpcode())){ EmitFCInstr(MI, OS); } else if (MI.getOpcode() == AMDGPU::RETURN || + MI.getOpcode() == AMDGPU::FETCH_CLAUSE || MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::KILL) { return; @@ -166,10 +167,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, case AMDGPU::TEX_VTX_TEXBUF : { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset + InstWord2 |= 1 << 19; - EmitByte(INSTR_VTX, OS); + EmitByte(INSTR_NATIVE, OS); Emit(InstWord01, OS); + EmitByte(INSTR_NATIVE, OS); Emit(InstWord2, OS); + Emit((u_int32_t) 0, OS); break; } case AMDGPU::TEX_LD: @@ -241,9 +245,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | Offsets[2] << 10; - EmitByte(INSTR_TEX, OS); + EmitByte(INSTR_NATIVE, OS); Emit(Word01, OS); + EmitByte(INSTR_NATIVE, OS); Emit(Word2, OS); + Emit((u_int32_t) 0, OS); break; } case AMDGPU::CF_ALU: @@ -253,13 +259,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(Inst, OS); break; } - case AMDGPU::CF_TC_EG: - case AMDGPU::CF_VC_EG: case AMDGPU::CF_CALL_FS_EG: - case AMDGPU::CF_TC_R600: - case AMDGPU::CF_VC_R600: case AMDGPU::CF_CALL_FS_R600: return; + case AMDGPU::CF_TC_EG: + case AMDGPU::CF_VC_EG: + case AMDGPU::CF_TC_R600: + case AMDGPU::CF_VC_R600: case AMDGPU::WHILE_LOOP_EG: case AMDGPU::END_LOOP_EG: case AMDGPU::LOOP_BREAK_EG: diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index f9786121fff..611d61ace02 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -30,6 +30,8 @@ namespace llvm { class R600ControlFlowFinalizer : public MachineFunctionPass { private: + typedef std::pair > ClauseFile; + enum ControlFlowInstruction { CF_TC, CF_VC, @@ -105,28 +107,44 @@ private: return TII->get(Opcode); } - MachineBasicBlock::iterator - MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned CfAddress) const { + ClauseFile + MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { MachineBasicBlock::iterator ClauseHead = I; + std::vector ClauseContent; unsigned AluInstCount = 0; bool IsTex = TII->usesTextureCache(ClauseHead); for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; + if (AluInstCount > MaxFetchInst) + break; if ((IsTex && !TII->usesTextureCache(I)) || (!IsTex && !TII->usesVertexCache(I))) break; AluInstCount ++; - if (AluInstCount > MaxFetchInst) - break; + ClauseContent.push_back(I); } - BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), + MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), getHWInstrDesc(IsTex?CF_TC:CF_VC)) - .addImm(CfAddress) // ADDR - .addImm(AluInstCount); // COUNT - return I; + .addImm(0) // ADDR + .addImm(AluInstCount - 1); // COUNT + return ClauseFile(MIb, ClauseContent); } + + void + EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, + unsigned &CfCount) { + CounterPropagateAddr(Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE)) + .addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += 2 * Clause.second.size(); + } + void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); } @@ -182,11 +200,12 @@ public: getHWInstrDesc(CF_CALL_FS)); CfCount++; } + std::vector FetchClauses; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); - I = MakeFetchClause(MBB, I, 0); + FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; continue; } @@ -307,6 +326,8 @@ public: BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); CfCount++; } + for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) + EmitFetchClause(I, FetchClauses[i], CfCount); } default: break; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 18760cb3e6e..09728f80e5d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -477,6 +477,7 @@ class R600_TEX inst, string opName, list pattern, let FETCH_WHOLE_QUAD = 0; let ALT_CONST = 0; let SAMPLER_INDEX_MODE = 0; + let RESOURCE_INDEX_MODE = 0; let COORD_TYPE_X = 0; let COORD_TYPE_Y = 0; @@ -928,6 +929,13 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; +def FETCH_CLAUSE : AMDGPUInst <(outs), +(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { + field bits<8> Inst; + bits<8> num; + let Inst = num; +} + def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { field bits<64> Inst; }