diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index fcf9eca80e9..c6600550126 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -77,7 +77,11 @@ extern Target TheGCNTarget; namespace AMDGPU { enum TargetIndex { - TI_CONSTDATA_START + TI_CONSTDATA_START, + TI_SCRATCH_RSRC_DWORD0, + TI_SCRATCH_RSRC_DWORD1, + TI_SCRATCH_RSRC_DWORD2, + TI_SCRATCH_RSRC_DWORD3 }; } diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 28b4183d277..e0e81680cc0 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -962,16 +962,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, const SITargetLowering& Lowering = *static_cast(getTargetLowering()); - unsigned ScratchPtrReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); unsigned ScratchOffsetReg = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, ScratchOffsetReg, MVT::i32); + SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); + SDValue ScratchRsrcDword0 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); - SDValue ScratchPtr = - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64); + SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); + SDValue ScratchRsrcDword1 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); + + const SDValue RsrcOps[] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + ScratchRsrcDword0, + CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + ScratchRsrcDword1, + CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + }; + SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::v2i32, RsrcOps), 0); Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 5d870d5e661..03aa32d05ef 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -80,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateExpr(Expr); break; } + case MachineOperand::MO_ExternalSymbol: { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName())); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MCOp = MCOperand::CreateExpr(Expr); + break; + } } OutMI.addOperand(MCOp); } diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 73a9c73d8e7..1c74dda5362 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -163,4 +163,5 @@ namespace SIOutMods { #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 #define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12) + #endif diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 8c2f324005a..ccf90ddfae0 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -482,7 +482,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); @@ -528,7 +528,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index c09bed7840f..bd680233e65 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1951,14 +1951,14 @@ multiclass SI_SPILL_SGPR { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 @@ -1974,14 +1974,14 @@ multiclass SI_SPILL_VGPR { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs vgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp index f0e7edec6b4..b7934bd9e91 100644 --- a/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" using namespace llvm; namespace { @@ -84,28 +85,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) Entry->addLiveIn(ScratchOffsetPreloadReg); - // Load the scratch pointer - unsigned ScratchPtrReg = - TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass); - int ScratchPtrFI = -1; - - if (ScratchPtrReg != AMDGPU::NoRegister) { - // Found an SGPR to use. - MRI.setPhysRegUsed(ScratchPtrReg); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg) - .addReg(ScratchPtrPreloadReg); - } else { - // No SGPR is available, we must spill. - ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE)) - .addReg(ScratchPtrPreloadReg) - .addFrameIndex(ScratchPtrFI); - } - // Load the scratch offset. unsigned ScratchOffsetReg = TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); - int ScratchOffsetFI = ~0; + int ScratchOffsetFI = -1; if (ScratchOffsetReg != AMDGPU::NoRegister) { // Found an SGPR to use @@ -125,22 +108,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { // add them to all the SI_SPILL_V* instructions. RegScavenger RS; - bool UseRegScavenger = - (ScratchPtrReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister); + unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); + RS.addScavengingFrameIndex(ScratchRsrcFI); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; - if (UseRegScavenger) - RS.enterBasicBlock(&MBB); + // Add the scratch offset reg as a live-in so that the register scavenger + // doesn't re-use it. + if (!MBB.isLiveIn(ScratchOffsetReg)) + MBB.addLiveIn(ScratchOffsetReg); + RS.enterBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; + RS.forward(I); DebugLoc DL = MI.getDebugLoc(); switch(MI.getOpcode()) { - default: break;; + default: break; case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -153,18 +140,35 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: - // Scratch Pointer - if (ScratchPtrReg == AMDGPU::NoRegister) { - ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE), - ScratchPtrReg) - .addFrameIndex(ScratchPtrFI) - .addReg(AMDGPU::NoRegister) - .addReg(AMDGPU::NoRegister); - } else if (!MBB.isLiveIn(ScratchPtrReg)) { - MBB.addLiveIn(ScratchPtrReg); - } + // Scratch resource + unsigned ScratchRsrcReg = + RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) + .addImm(Rsrc & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) + .addImm(Rsrc >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + // Scratch Offset if (ScratchOffsetReg == AMDGPU::NoRegister) { ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), @@ -176,20 +180,26 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { MBB.addLiveIn(ScratchOffsetReg); } - if (ScratchPtrReg == AMDGPU::NoRegister || + if (ScratchRsrcReg == AMDGPU::NoRegister || ScratchOffsetReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF.getFunction()->getContext(); Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchPtrReg = AMDGPU::SGPR0; + ScratchRsrcReg = AMDGPU::SGPR0; ScratchOffsetReg = AMDGPU::SGPR0; } - MI.getOperand(2).setReg(ScratchPtrReg); + MI.getOperand(2).setReg(ScratchRsrcReg); + MI.getOperand(2).setIsKill(true); + MI.getOperand(2).setIsUndef(false); MI.getOperand(3).setReg(ScratchOffsetReg); + MI.getOperand(3).setIsUndef(false); + MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); + MI.dump(); break; } - if (UseRegScavenger) - RS.forward(); } } return true; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 59ff8a27d05..321d25fce73 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -98,7 +98,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const { @@ -113,34 +113,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, bool RanOutOfSGPRs = false; unsigned SOffset = ScratchOffset; - unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0); - if (RsrcReg == AMDGPU::NoRegister) { - RanOutOfSGPRs = true; - RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; - } - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned Size = NumSubRegs * 4; - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64), - getSubReg(RsrcReg, AMDGPU::sub0_sub1)) - .addReg(ScratchPtr) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub2)) - .addImm(Rsrc & 0xffffffff) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub3)) - .addImm(Rsrc >> 32) - .addReg(RsrcReg, RegState::ImplicitDefine); - if (!isUInt<12>(Offset + Size)) { + dbgs() << "Offset scavenge\n"; SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); if (SOffset == AMDGPU::NoRegister) { RanOutOfSGPRs = true; @@ -163,7 +140,7 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(IsLoad)) - .addReg(RsrcReg, getKillRegState(IsKill)) + .addReg(ScratchRsrcReg, getKillRegState(IsKill)) .addImm(Offset) .addReg(SOffset, getKillRegState(IsKill)) .addImm(0) // glc @@ -236,6 +213,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } if (isM0) { + dbgs() << "Scavenge M0\n"; SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); } @@ -262,7 +240,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); @@ -274,7 +252,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d14212c2b10..8aa02c30a43 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -111,7 +111,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, unsigned ScratchOffset, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const; };