mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 03:30:22 +00:00
R600/SI: Spill VGPRs to scratch space for compute shaders
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225988 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
735aa71398
commit
33040cf56e
@ -47,6 +47,7 @@ FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
|
||||
FunctionPass *createSIFixSGPRLiveRangesPass();
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
||||
FunctionPass *createSIPrepareScratchRegs();
|
||||
|
||||
void initializeSIFoldOperandsPass(PassRegistry &);
|
||||
extern char &SIFoldOperandsID;
|
||||
|
@ -189,6 +189,7 @@ void AMDGPUPassConfig::addPostRegAlloc() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||
addPass(createSIPrepareScratchRegs(), false);
|
||||
addPass(createSIShrinkInstructionsPass(), false);
|
||||
}
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ add_llvm_target(R600CodeGen
|
||||
SILowerControlFlow.cpp
|
||||
SILowerI1Copies.cpp
|
||||
SIMachineFunctionInfo.cpp
|
||||
SIPrepareScratchRegs.cpp
|
||||
SIRegisterInfo.cpp
|
||||
SIShrinkInstructions.cpp
|
||||
SITypeRewriter.cpp
|
||||
|
@ -433,13 +433,9 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
||||
static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const TargetMachine &TM = MF->getTarget();
|
||||
|
||||
// FIXME: Even though it can cause problems, we need to enable
|
||||
// spilling at -O0, since the fast register allocator always
|
||||
// spills registers that are live at the end of blocks.
|
||||
return MFI->getShaderType() == ShaderType::COMPUTE &&
|
||||
TM.getOptLevel() == CodeGenOpt::None;
|
||||
// FIXME: Implement spilling for other shader types.
|
||||
return MFI->getShaderType() == ShaderType::COMPUTE;
|
||||
|
||||
}
|
||||
|
||||
@ -450,6 +446,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
int Opcode = -1;
|
||||
@ -466,6 +463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
|
||||
}
|
||||
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
|
||||
MFI->setHasSpilledVGPRs();
|
||||
|
||||
switch(RC->getSize() * 8) {
|
||||
case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
|
||||
case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
|
||||
@ -480,7 +479,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
FrameInfo->setObjectAlignment(FrameIndex, 4);
|
||||
BuildMI(MBB, MI, DL, get(Opcode))
|
||||
.addReg(SrcReg)
|
||||
.addFrameIndex(FrameIndex);
|
||||
.addFrameIndex(FrameIndex)
|
||||
// Place-holder registers, these will be filled in by
|
||||
// SIPrepareScratchRegs.
|
||||
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
} else {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
|
||||
@ -522,7 +525,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
if (Opcode != -1) {
|
||||
FrameInfo->setObjectAlignment(FrameIndex, 4);
|
||||
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
|
||||
.addFrameIndex(FrameIndex);
|
||||
.addFrameIndex(FrameIndex)
|
||||
// Place-holder registers, these will be filled in by
|
||||
// SIPrepareScratchRegs.
|
||||
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
|
||||
.addReg(AMDGPU::SGPR0, RegState::Undef);
|
||||
|
||||
} else {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
|
||||
@ -553,7 +561,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator Insert = Entry.front();
|
||||
DebugLoc DL = Insert->getDebugLoc();
|
||||
|
||||
TIDReg = RI.findUnusedVGPR(MF->getRegInfo());
|
||||
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
|
||||
if (TIDReg == AMDGPU::NoRegister)
|
||||
return TIDReg;
|
||||
|
||||
|
@ -1763,6 +1763,7 @@ multiclass MUBUF_Load_Helper_vi <bits<7> op, string asm, RegisterClass regClass,
|
||||
multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
|
||||
ValueType store_vt, SDPatternOperator st> {
|
||||
|
||||
let mayLoad = 0, mayStore = 1 in {
|
||||
let addr64 = 0 in {
|
||||
|
||||
def "" : MUBUF_si <
|
||||
@ -1820,6 +1821,7 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
|
||||
let tfe = 0;
|
||||
let soffset = 128; // ZERO
|
||||
}
|
||||
} // End mayLoad = 0, mayStore = 1
|
||||
}
|
||||
|
||||
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
|
||||
|
@ -1940,18 +1940,20 @@ def V_SUB_F64 : InstSI <
|
||||
|
||||
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
|
||||
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins sgpr_class:$src, i32imm:$frame_idx),
|
||||
"", []
|
||||
>;
|
||||
|
||||
def _RESTORE : InstSI <
|
||||
(outs sgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx),
|
||||
"", []
|
||||
>;
|
||||
let UseNamedOperandTable = 1 in {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
|
||||
SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
|
||||
def _RESTORE : InstSI <
|
||||
(outs sgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
} // End UseNamedOperandTable = 1
|
||||
}
|
||||
|
||||
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
|
||||
@ -1961,17 +1963,20 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
|
||||
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
|
||||
|
||||
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx),
|
||||
"", []
|
||||
>;
|
||||
let UseNamedOperandTable = 1 in {
|
||||
def _SAVE : InstSI <
|
||||
(outs),
|
||||
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
|
||||
SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
|
||||
def _RESTORE : InstSI <
|
||||
(outs vgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx),
|
||||
"", []
|
||||
>;
|
||||
def _RESTORE : InstSI <
|
||||
(outs vgpr_class:$dst),
|
||||
(ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
|
||||
"", []
|
||||
>;
|
||||
} // End UseNamedOperandTable = 1
|
||||
}
|
||||
|
||||
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
|
||||
|
@ -29,6 +29,7 @@ void SIMachineFunctionInfo::anchor() {}
|
||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
: AMDGPUMachineFunction(MF),
|
||||
TIDReg(AMDGPU::NoRegister),
|
||||
HasSpilledVGPRs(false),
|
||||
PSInputAddr(0),
|
||||
NumUserSGPRs(0),
|
||||
LDSWaveSpillSize(0) { }
|
||||
@ -50,7 +51,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
|
||||
struct SpilledReg Spill;
|
||||
|
||||
if (!LaneVGPRs.count(LaneVGPRIdx)) {
|
||||
unsigned LaneVGPR = TRI->findUnusedVGPR(MRI);
|
||||
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
|
||||
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
|
||||
MRI.setPhysRegUsed(LaneVGPR);
|
||||
|
||||
|
@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
|
||||
void anchor() override;
|
||||
|
||||
unsigned TIDReg;
|
||||
bool HasSpilledVGPRs;
|
||||
|
||||
public:
|
||||
|
||||
@ -52,6 +53,8 @@ public:
|
||||
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
|
||||
unsigned getTIDReg() const { return TIDReg; };
|
||||
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
|
||||
bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
|
||||
void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
|
||||
|
||||
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
|
||||
};
|
||||
|
196
lib/Target/R600/SIPrepareScratchRegs.cpp
Normal file
196
lib/Target/R600/SIPrepareScratchRegs.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// This pass loads scratch pointer and scratch offset into a register or a
|
||||
/// frame index which can be used anywhere in the program. These values will
|
||||
/// be used for spilling VGPRs.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIDefines.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SIPrepareScratchRegs : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
const char *getPassName() const override {
|
||||
return "SI prepare scratch registers";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SIPrepareScratchRegs::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSIPrepareScratchRegs() {
|
||||
return new SIPrepareScratchRegs();
|
||||
}
|
||||
|
||||
bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
||||
MachineBasicBlock *Entry = MF.begin();
|
||||
MachineBasicBlock::iterator I = Entry->begin();
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
|
||||
// FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
|
||||
// run this pass.
|
||||
if (!MFI->hasSpilledVGPRs())
|
||||
return false;
|
||||
|
||||
unsigned ScratchPtrPreloadReg =
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
|
||||
unsigned ScratchOffsetPreloadReg =
|
||||
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
|
||||
|
||||
if (!Entry->isLiveIn(ScratchPtrPreloadReg))
|
||||
Entry->addLiveIn(ScratchPtrPreloadReg);
|
||||
|
||||
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
|
||||
Entry->addLiveIn(ScratchOffsetPreloadReg);
|
||||
|
||||
// Load the scratch pointer
|
||||
unsigned ScratchPtrReg =
|
||||
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
|
||||
int ScratchPtrFI = -1;
|
||||
|
||||
if (ScratchPtrReg != AMDGPU::NoRegister) {
|
||||
// Found an SGPR to use.
|
||||
MRI.setPhysRegUsed(ScratchPtrReg);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
|
||||
.addReg(ScratchPtrPreloadReg);
|
||||
} else {
|
||||
// No SGPR is available, we must spill.
|
||||
ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
|
||||
.addReg(ScratchPtrPreloadReg)
|
||||
.addFrameIndex(ScratchPtrFI);
|
||||
}
|
||||
|
||||
// Load the scratch offset.
|
||||
unsigned ScratchOffsetReg =
|
||||
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
|
||||
int ScratchOffsetFI = ~0;
|
||||
|
||||
if (ScratchOffsetReg != AMDGPU::NoRegister) {
|
||||
// Found an SGPR to use
|
||||
MRI.setPhysRegUsed(ScratchOffsetReg);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
|
||||
.addReg(ScratchOffsetPreloadReg);
|
||||
} else {
|
||||
// No SGPR is available, we must spill.
|
||||
ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
|
||||
.addReg(ScratchOffsetPreloadReg)
|
||||
.addFrameIndex(ScratchOffsetFI);
|
||||
}
|
||||
|
||||
|
||||
// Now that we have the scratch pointer and offset values, we need to
|
||||
// add them to all the SI_SPILL_V* instructions.
|
||||
|
||||
RegScavenger RS;
|
||||
bool UseRegScavenger =
|
||||
(ScratchPtrReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister);
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
if (UseRegScavenger)
|
||||
RS.enterBasicBlock(&MBB);
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
switch(MI.getOpcode()) {
|
||||
default: break;;
|
||||
case AMDGPU::SI_SPILL_V512_SAVE:
|
||||
case AMDGPU::SI_SPILL_V256_SAVE:
|
||||
case AMDGPU::SI_SPILL_V128_SAVE:
|
||||
case AMDGPU::SI_SPILL_V96_SAVE:
|
||||
case AMDGPU::SI_SPILL_V64_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V64_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V128_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V256_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE:
|
||||
|
||||
// Scratch Pointer
|
||||
if (ScratchPtrReg == AMDGPU::NoRegister) {
|
||||
ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
|
||||
ScratchPtrReg)
|
||||
.addFrameIndex(ScratchPtrFI)
|
||||
.addReg(AMDGPU::NoRegister)
|
||||
.addReg(AMDGPU::NoRegister);
|
||||
} else if (!MBB.isLiveIn(ScratchPtrReg)) {
|
||||
MBB.addLiveIn(ScratchPtrReg);
|
||||
}
|
||||
|
||||
if (ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
|
||||
ScratchOffsetReg)
|
||||
.addFrameIndex(ScratchOffsetFI)
|
||||
.addReg(AMDGPU::NoRegister)
|
||||
.addReg(AMDGPU::NoRegister);
|
||||
} else if (!MBB.isLiveIn(ScratchOffsetReg)) {
|
||||
MBB.addLiveIn(ScratchOffsetReg);
|
||||
}
|
||||
|
||||
if (ScratchPtrReg == AMDGPU::NoRegister ||
|
||||
ScratchOffsetReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF.getFunction()->getContext();
|
||||
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
|
||||
ScratchPtrReg = AMDGPU::SGPR0;
|
||||
ScratchOffsetReg = AMDGPU::SGPR0;
|
||||
}
|
||||
MI.getOperand(2).setReg(ScratchPtrReg);
|
||||
MI.getOperand(3).setReg(ScratchOffsetReg);
|
||||
|
||||
break;
|
||||
}
|
||||
if (UseRegScavenger)
|
||||
RS.forward();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
@ -23,6 +23,7 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
|
||||
#include "llvm/Support/Debug.h"
|
||||
using namespace llvm;
|
||||
|
||||
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
|
||||
@ -94,6 +95,84 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
|
||||
}
|
||||
}
|
||||
|
||||
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
unsigned Value,
|
||||
unsigned ScratchPtr,
|
||||
unsigned ScratchOffset,
|
||||
int64_t Offset,
|
||||
RegScavenger *RS) const {
|
||||
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
const MachineFunction *MF = MI->getParent()->getParent();
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
|
||||
|
||||
bool RanOutOfSGPRs = false;
|
||||
unsigned SOffset = ScratchOffset;
|
||||
|
||||
unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
|
||||
if (RsrcReg == AMDGPU::NoRegister) {
|
||||
RanOutOfSGPRs = true;
|
||||
RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
|
||||
}
|
||||
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned Size = NumSubRegs * 4;
|
||||
|
||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||
0xffffffff; // Size
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
|
||||
getSubReg(RsrcReg, AMDGPU::sub0_sub1))
|
||||
.addReg(ScratchPtr)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
|
||||
getSubReg(RsrcReg, AMDGPU::sub2))
|
||||
.addImm(Rsrc & 0xffffffff)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
|
||||
getSubReg(RsrcReg, AMDGPU::sub3))
|
||||
.addImm(Rsrc >> 32)
|
||||
.addReg(RsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
if (!isUInt<12>(Offset + Size)) {
|
||||
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
|
||||
if (SOffset == AMDGPU::NoRegister) {
|
||||
RanOutOfSGPRs = true;
|
||||
SOffset = AMDGPU::SGPR0;
|
||||
}
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
|
||||
.addReg(ScratchOffset)
|
||||
.addImm(Offset);
|
||||
Offset = 0;
|
||||
}
|
||||
|
||||
if (RanOutOfSGPRs)
|
||||
Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
|
||||
|
||||
for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
|
||||
unsigned SubReg = NumSubRegs > 1 ?
|
||||
getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
|
||||
Value;
|
||||
bool IsKill = (i == e - 1);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
|
||||
.addReg(SubReg, getDefRegState(IsLoad))
|
||||
.addReg(RsrcReg, getKillRegState(IsKill))
|
||||
.addImm(Offset)
|
||||
.addReg(SOffset, getKillRegState(IsKill))
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
|
||||
}
|
||||
}
|
||||
|
||||
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
@ -162,7 +241,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
|
||||
.addReg(Spill.VGPR)
|
||||
.addImm(Spill.Lane);
|
||||
.addImm(Spill.Lane)
|
||||
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
|
||||
if (isM0) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addReg(SubReg);
|
||||
@ -179,71 +259,24 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
case AMDGPU::SI_SPILL_V128_SAVE:
|
||||
case AMDGPU::SI_SPILL_V96_SAVE:
|
||||
case AMDGPU::SI_SPILL_V64_SAVE:
|
||||
case AMDGPU::SI_SPILL_V32_SAVE: {
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned SrcReg = MI->getOperand(0).getReg();
|
||||
int64_t Offset = FrameInfo->getObjectOffset(Index);
|
||||
unsigned Size = NumSubRegs * 4;
|
||||
unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
||||
|
||||
for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
|
||||
unsigned SubReg = NumSubRegs > 1 ?
|
||||
getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
|
||||
SrcReg;
|
||||
Offset += (i * 4);
|
||||
MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);
|
||||
|
||||
unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
|
||||
Offset, Size);
|
||||
|
||||
if (AddrReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
|
||||
AddrReg = AMDGPU::VGPR0;
|
||||
}
|
||||
|
||||
// Store the value in LDS
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
|
||||
.addImm(0) // gds
|
||||
.addReg(AddrReg, RegState::Kill) // addr
|
||||
.addReg(SubReg) // data0
|
||||
.addImm(0); // offset
|
||||
}
|
||||
|
||||
case AMDGPU::SI_SPILL_V32_SAVE:
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
|
||||
FrameInfo->getObjectOffset(Index), RS);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::SI_SPILL_V32_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V64_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V128_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V256_RESTORE:
|
||||
case AMDGPU::SI_SPILL_V512_RESTORE: {
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
int64_t Offset = FrameInfo->getObjectOffset(Index);
|
||||
unsigned Size = NumSubRegs * 4;
|
||||
unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
|
||||
|
||||
// FIXME: We could use DS_READ_B64 here to optimize for larger registers.
|
||||
for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
|
||||
unsigned SubReg = NumSubRegs > 1 ?
|
||||
getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
|
||||
DstReg;
|
||||
|
||||
Offset += (i * 4);
|
||||
unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
|
||||
Offset, Size);
|
||||
if (AddrReg == AMDGPU::NoRegister) {
|
||||
LLVMContext &Ctx = MF->getFunction()->getContext();
|
||||
Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
|
||||
AddrReg = AMDGPU::VGPR0;
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
|
||||
.addImm(0) // gds
|
||||
.addReg(AddrReg, RegState::Kill) // addr
|
||||
.addImm(0); //offset
|
||||
}
|
||||
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
|
||||
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
|
||||
FrameInfo->getObjectOffset(Index), RS);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
@ -431,9 +464,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
||||
/// \brief Returns a register that is not used at any point in the function.
|
||||
/// If all registers are used, then this function will return
|
||||
// AMDGPU::NoRegister.
|
||||
unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const {
|
||||
|
||||
const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
|
||||
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const {
|
||||
|
||||
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
|
||||
I != E; ++I) {
|
||||
|
@ -105,7 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
|
||||
unsigned getPreloadedValue(const MachineFunction &MF,
|
||||
enum PreloadedValue Value) const;
|
||||
|
||||
unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const;
|
||||
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const;
|
||||
|
||||
private:
|
||||
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, unsigned Value,
|
||||
unsigned ScratchPtr, unsigned ScratchOffset,
|
||||
int64_t Offset, RegScavenger *RS) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
Loading…
Reference in New Issue
Block a user