R600/SI: Set the ATC bit on all resource descriptors for the HSA runtime

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-12-02 17:05:41 +00:00
parent fcf4242b9b
commit 15e1919a76
7 changed files with 45 additions and 9 deletions

View File

@ -1012,6 +1012,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &GLC, SDValue &SLC,
SDValue &TFE) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@ -1019,7 +1021,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
APInt::getAllOnesValue(32).getZExtValue(); // Size
SDLoc DL(Addr);

View File

@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
InstrItins(getInstrItineraryForCPU(GPU)) {
InstrItins(getInstrItineraryForCPU(GPU)),
TargetTriple(TT) {
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
TLInfo.reset(new R600TargetLowering(TM));

View File

@ -68,6 +68,7 @@ private:
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
Triple TargetTriple;
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
@ -217,6 +218,9 @@ public:
bool r600ALUEncoding() const {
return R600ALUInst;
}
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
};
} // End namespace llvm

View File

@ -2030,6 +2030,8 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
#if 1
// XXX - Workaround for moveToVALU not handling different register class
// inserts for REG_SEQUENCE.
@ -2039,7 +2041,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
};
@ -2063,7 +2065,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
};
@ -2110,7 +2112,9 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size
return buildRSRC(DAG, DL, Ptr, 0, Rsrc);

View File

@ -1580,6 +1580,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
// Zero64 = 0
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
@ -1589,12 +1590,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
SRsrcFormatLo)
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
.addImm(RsrcDataFormat & 0xFFFFFFFF);
// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
SRsrcFormatHi)
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
.addImm(RsrcDataFormat >> 32);
// NewSRsrc = {Zero64, SRsrcFormat}
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
@ -1798,13 +1799,14 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
.addImm(0);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
.addImm(RsrcDataFormat & 0xFFFFFFFF);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
.addImm(RsrcDataFormat >> 32);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
.addReg(DWord0)
.addImm(AMDGPU::sub0)
@ -2413,3 +2415,11 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
return &MI.getOperand(Idx);
}
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
if (ST.isAmdHsaOS())
RsrcDataFormat |= (1ULL << 56);
return RsrcDataFormat;
}

View File

@ -308,6 +308,9 @@ public:
unsigned OpName) const {
return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
}
uint64_t getDefaultRsrcDataFormat() const;
};
namespace AMDGPU {

12
test/CodeGen/R600/hsa.ll Normal file
View File

@ -0,0 +1,12 @@
; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
; HSA: {{^}}simple:
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
define void @simple(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}