mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
R600/SI: Add a ComplexPattern for selecting MUBUF _OFFSET variant
This saves us from having to copy a 64-bit 0 value into VGPRs for BUFFER_* instruction which only have a 12-bit immediate offset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215399 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f56c55d003
commit
13f4476c55
@ -88,13 +88,16 @@ private:
|
|||||||
SDValue& Offset);
|
SDValue& Offset);
|
||||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||||
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
|
void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||||
SDValue &ImmOffset) const;
|
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||||
|
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
||||||
|
SDValue &TFE) const;
|
||||||
|
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||||
|
SDValue &Offset) const;
|
||||||
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
||||||
SDValue &SOffset, SDValue &ImmOffset) const;
|
SDValue &SOffset, SDValue &ImmOffset) const;
|
||||||
bool SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
||||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
||||||
SDValue &Idxen, SDValue &GLC, SDValue &SLC,
|
|
||||||
SDValue &TFE) const;
|
SDValue &TFE) const;
|
||||||
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||||
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||||
@ -750,11 +753,23 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
|
|||||||
return isUInt<12>(Imm->getZExtValue());
|
return isUInt<12>(Imm->getZExtValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
|
void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||||
SDValue &Offset,
|
SDValue &VAddr, SDValue &SOffset,
|
||||||
SDValue &ImmOffset) const {
|
SDValue &Offset, SDValue &Offen,
|
||||||
|
SDValue &Idxen, SDValue &Addr64,
|
||||||
|
SDValue &GLC, SDValue &SLC,
|
||||||
|
SDValue &TFE) const {
|
||||||
SDLoc DL(Addr);
|
SDLoc DL(Addr);
|
||||||
|
|
||||||
|
GLC = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
SLC = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
TFE = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
|
||||||
|
Idxen = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
Offen = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
Addr64 = CurDAG->getTargetConstant(0, MVT::i1);
|
||||||
|
SOffset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
|
|
||||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||||
SDValue N0 = Addr.getOperand(0);
|
SDValue N0 = Addr.getOperand(0);
|
||||||
SDValue N1 = Addr.getOperand(1);
|
SDValue N1 = Addr.getOperand(1);
|
||||||
@ -763,37 +778,74 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
|
|||||||
if (isLegalMUBUFImmOffset(C1)) {
|
if (isLegalMUBUFImmOffset(C1)) {
|
||||||
|
|
||||||
if (N0.getOpcode() == ISD::ADD) {
|
if (N0.getOpcode() == ISD::ADD) {
|
||||||
// (add (add N2, N3), C1)
|
// (add (add N2, N3), C1) -> addr64
|
||||||
SDValue N2 = N0.getOperand(0);
|
SDValue N2 = N0.getOperand(0);
|
||||||
SDValue N3 = N0.getOperand(1);
|
SDValue N3 = N0.getOperand(1);
|
||||||
Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
|
Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
|
||||||
Offset = N3;
|
Ptr = N2;
|
||||||
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
|
VAddr = N3;
|
||||||
return true;
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// (add N0, C1)
|
// (add N0, C1) -> offset
|
||||||
Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
|
VAddr = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
Offset = N0;
|
Ptr = N0;
|
||||||
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Addr.getOpcode() == ISD::ADD) {
|
if (Addr.getOpcode() == ISD::ADD) {
|
||||||
// (add N0, N1)
|
// (add N0, N1) -> addr64
|
||||||
SDValue N0 = Addr.getOperand(0);
|
SDValue N0 = Addr.getOperand(0);
|
||||||
SDValue N1 = Addr.getOperand(1);
|
SDValue N1 = Addr.getOperand(1);
|
||||||
Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
|
Addr64 = CurDAG->getTargetConstant(1, MVT::i1);
|
||||||
Offset = N1;
|
Ptr = N0;
|
||||||
ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
|
VAddr = N1;
|
||||||
return true;
|
Offset = CurDAG->getTargetConstant(0, MVT::i16);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// default case
|
// default case -> offset
|
||||||
Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
|
VAddr = CurDAG->getTargetConstant(0, MVT::i32);
|
||||||
Offset = Addr;
|
Ptr = Addr;
|
||||||
ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
|
Offset = CurDAG->getTargetConstant(0, MVT::i16);
|
||||||
return true;
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||||
|
SDValue &VAddr,
|
||||||
|
SDValue &Offset) const {
|
||||||
|
SDValue Ptr, SOffset, Offen, Idxen, Addr64, GLC, SLC, TFE;
|
||||||
|
|
||||||
|
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||||
|
GLC, SLC, TFE);
|
||||||
|
|
||||||
|
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
||||||
|
if (C->getSExtValue()) {
|
||||||
|
SDLoc DL(Addr);
|
||||||
|
SRsrc = wrapAddr64Rsrc(CurDAG, DL, Ptr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr,
|
||||||
|
uint32_t RsrcDword1, uint64_t RsrcDword2And3) {
|
||||||
|
|
||||||
|
SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
|
||||||
|
SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
|
||||||
|
if (RsrcDword1)
|
||||||
|
PtrHi = SDValue(DAG->getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
|
||||||
|
DAG->getConstant(RsrcDword1, MVT::i32)), 0);
|
||||||
|
|
||||||
|
SDValue DataLo = DAG->getTargetConstant(
|
||||||
|
RsrcDword2And3 & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
|
||||||
|
SDValue DataHi = DAG->getTargetConstant(RsrcDword2And3 >> 32, MVT::i32);
|
||||||
|
|
||||||
|
const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
|
||||||
|
return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
|
||||||
|
MVT::v4i32, Ops), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Return a resource descriptor with the 'Add TID' bit enabled
|
/// \brief Return a resource descriptor with the 'Add TID' bit enabled
|
||||||
@ -803,17 +855,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
|
|||||||
static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
|
static SDValue buildScratchRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
|
||||||
|
|
||||||
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
|
||||||
0xffffffff;
|
0xffffffff; // Size
|
||||||
|
|
||||||
SDValue PtrLo = DAG->getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
|
return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
|
||||||
SDValue PtrHi = DAG->getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
|
|
||||||
SDValue DataLo = DAG->getTargetConstant(
|
|
||||||
Rsrc & APInt::getAllOnesValue(32).getZExtValue(), MVT::i32);
|
|
||||||
SDValue DataHi = DAG->getTargetConstant(Rsrc >> 32, MVT::i32);
|
|
||||||
|
|
||||||
const SDValue Ops[] = { PtrLo, PtrHi, DataLo, DataHi };
|
|
||||||
return SDValue(DAG->getMachineNode(AMDGPU::SI_BUFFER_RSRC, DL,
|
|
||||||
MVT::v4i32, Ops), 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
||||||
@ -870,20 +914,25 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc,
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||||
SDValue &VAddr, SDValue &SOffset,
|
SDValue &SOffset, SDValue &Offset,
|
||||||
SDValue &Offset, SDValue &Offen,
|
SDValue &GLC, SDValue &SLC,
|
||||||
SDValue &Idxen, SDValue &GLC,
|
SDValue &TFE) const {
|
||||||
SDValue &SLC, SDValue &TFE) const {
|
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
|
||||||
|
|
||||||
GLC = CurDAG->getTargetConstant(0, MVT::i1);
|
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||||
SLC = CurDAG->getTargetConstant(0, MVT::i1);
|
GLC, SLC, TFE);
|
||||||
TFE = CurDAG->getTargetConstant(0, MVT::i1);
|
|
||||||
|
|
||||||
Idxen = CurDAG->getTargetConstant(0, MVT::i1);
|
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
||||||
Offen = CurDAG->getTargetConstant(1, MVT::i1);
|
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
|
||||||
|
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
|
||||||
return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset);
|
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
|
||||||
|
APInt::getAllOnesValue(32).getZExtValue(); // Size
|
||||||
|
SDLoc DL(Addr);
|
||||||
|
SRsrc = buildRSRC(CurDAG, DL, Ptr, 0, Rsrc);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
||||||
|
@ -45,6 +45,33 @@ static SDValue findChainOperand(SDNode *Load) {
|
|||||||
return LastOp;
|
return LastOp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Returns true if both nodes have the same value for the given
|
||||||
|
/// operand \p Op, or if both nodes do not have this operand.
|
||||||
|
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
|
||||||
|
unsigned Opc0 = N0->getMachineOpcode();
|
||||||
|
unsigned Opc1 = N1->getMachineOpcode();
|
||||||
|
|
||||||
|
int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName);
|
||||||
|
int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName);
|
||||||
|
|
||||||
|
if (Op0Idx == -1 && Op1Idx == -1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
|
||||||
|
if ((Op0Idx == -1 && Op1Idx != -1) ||
|
||||||
|
(Op1Idx == -1 && Op0Idx != -1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// getNamedOperandIdx returns the index for the MachineInstr's operands,
|
||||||
|
// which includes the result as the first operand. We are indexing into the
|
||||||
|
// MachineSDNode's operands, so we need to skip the result operand to get
|
||||||
|
// the real index.
|
||||||
|
--Op0Idx;
|
||||||
|
--Op1Idx;
|
||||||
|
|
||||||
|
return N0->getOperand(Op0Idx) == N0->getOperand(Op1Idx);
|
||||||
|
}
|
||||||
|
|
||||||
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||||
int64_t &Offset0,
|
int64_t &Offset0,
|
||||||
int64_t &Offset1) const {
|
int64_t &Offset1) const {
|
||||||
@ -98,32 +125,35 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
|||||||
|
|
||||||
// MUBUF and MTBUF can access the same addresses.
|
// MUBUF and MTBUF can access the same addresses.
|
||||||
if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
|
if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) {
|
||||||
// Skip if an SGPR offset is applied. I don't think we ever emit any of
|
|
||||||
// variants that use this currently.
|
|
||||||
int SoffsetIdx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::soffset);
|
|
||||||
if (SoffsetIdx != -1)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// getNamedOperandIdx returns the index for the MachineInstr's operands,
|
|
||||||
// which includes the result as the first operand. We are indexing into the
|
|
||||||
// MachineSDNode's operands, so we need to skip the result operand to get
|
|
||||||
// the real index.
|
|
||||||
--SoffsetIdx;
|
|
||||||
|
|
||||||
// Check chain.
|
|
||||||
if (findChainOperand(Load0) != findChainOperand(Load1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// MUBUF and MTBUF have vaddr at different indices.
|
// MUBUF and MTBUF have vaddr at different indices.
|
||||||
int VaddrIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::vaddr) - 1;
|
if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
|
||||||
int VaddrIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::vaddr) - 1;
|
findChainOperand(Load0) != findChainOperand(Load1) ||
|
||||||
if (Load0->getOperand(VaddrIdx0) != Load1->getOperand(VaddrIdx1))
|
!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
|
||||||
|
!nodesHaveSameOperandValue(Load1, Load1, AMDGPU::OpName::srsrc))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset) - 1;
|
int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
|
||||||
int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset) - 1;
|
int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
|
||||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(OffIdx0))->getZExtValue();
|
|
||||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(OffIdx1))->getZExtValue();
|
if (OffIdx0 == -1 || OffIdx1 == -1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// getNamedOperandIdx returns the index for MachineInstrs. Since they
|
||||||
|
// inlcude the output in the operand list, but SDNodes don't, we need to
|
||||||
|
// subtract the index by one.
|
||||||
|
--OffIdx0;
|
||||||
|
--OffIdx1;
|
||||||
|
|
||||||
|
SDValue Off0 = Load0->getOperand(OffIdx0);
|
||||||
|
SDValue Off1 = Load1->getOperand(OffIdx1);
|
||||||
|
|
||||||
|
// The offset might be a FrameIndexSDNode.
|
||||||
|
if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
|
||||||
|
Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1276,105 +1306,128 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||||||
// Legalize MUBUF* instructions
|
// Legalize MUBUF* instructions
|
||||||
// FIXME: If we start using the non-addr64 instructions for compute, we
|
// FIXME: If we start using the non-addr64 instructions for compute, we
|
||||||
// may need to legalize them here.
|
// may need to legalize them here.
|
||||||
|
int SRsrcIdx =
|
||||||
|
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
|
||||||
|
if (SRsrcIdx != -1) {
|
||||||
|
// We have an MUBUF instruction
|
||||||
|
MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx);
|
||||||
|
unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass;
|
||||||
|
if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()),
|
||||||
|
RI.getRegClass(SRsrcRC))) {
|
||||||
|
// The operands are legal.
|
||||||
|
// FIXME: We may need to legalize operands besided srsrc.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
MachineBasicBlock &MBB = *MI->getParent();
|
||||||
AMDGPU::OpName::srsrc);
|
// Extract the the ptr from the resource descriptor.
|
||||||
int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
|
||||||
AMDGPU::OpName::vaddr);
|
|
||||||
if (SRsrcIdx != -1 && VAddrIdx != -1) {
|
|
||||||
const TargetRegisterClass *VAddrRC =
|
|
||||||
RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
|
|
||||||
|
|
||||||
if(VAddrRC->getSize() == 8 &&
|
// SRsrcPtrLo = srsrc:sub0
|
||||||
MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
|
unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
|
||||||
// We have a MUBUF instruction that uses a 64-bit vaddr register and
|
&AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
|
||||||
// srsrc has the incorrect register class. In order to fix this, we
|
|
||||||
// need to extract the pointer from the resource descriptor (srsrc),
|
|
||||||
// add it to the value of vadd, then store the result in the vaddr
|
|
||||||
// operand. Then, we need to set the pointer field of the resource
|
|
||||||
// descriptor to zero.
|
|
||||||
|
|
||||||
MachineBasicBlock &MBB = *MI->getParent();
|
// SRsrcPtrHi = srsrc:sub1
|
||||||
MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
|
unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
|
||||||
MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
|
&AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
|
||||||
unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
|
|
||||||
unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
|
||||||
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
|
||||||
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
|
||||||
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
|
||||||
unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
|
||||||
unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
|
||||||
unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
|
|
||||||
|
|
||||||
// SRsrcPtrLo = srsrc:sub0
|
// Create an empty resource descriptor
|
||||||
SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
|
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
&AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
|
unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||||
|
unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||||
|
unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
|
||||||
|
|
||||||
// SRsrcPtrHi = srsrc:sub1
|
// Zero64 = 0
|
||||||
SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
|
||||||
&AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
|
Zero64)
|
||||||
|
.addImm(0);
|
||||||
|
|
||||||
// VAddrLo = vaddr:sub0
|
// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
|
||||||
VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
||||||
&AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
|
SRsrcFormatLo)
|
||||||
|
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
|
||||||
|
|
||||||
// VAddrHi = vaddr:sub1
|
// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
|
||||||
VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
||||||
&AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
|
SRsrcFormatHi)
|
||||||
|
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
|
||||||
|
|
||||||
// NewVaddrLo = SRsrcPtrLo + VAddrLo
|
// NewSRsrc = {Zero64, SRsrcFormat}
|
||||||
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
||||||
|
NewSRsrc)
|
||||||
|
.addReg(Zero64)
|
||||||
|
.addImm(AMDGPU::sub0_sub1)
|
||||||
|
.addReg(SRsrcFormatLo)
|
||||||
|
.addImm(AMDGPU::sub2)
|
||||||
|
.addReg(SRsrcFormatHi)
|
||||||
|
.addImm(AMDGPU::sub3);
|
||||||
|
|
||||||
|
MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
|
||||||
|
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
||||||
|
unsigned NewVAddrLo;
|
||||||
|
unsigned NewVAddrHi;
|
||||||
|
if (VAddr) {
|
||||||
|
// This is already an ADDR64 instruction so we need to add the pointer
|
||||||
|
// extracted from the resource descriptor to the current value of VAddr.
|
||||||
|
NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||||
|
NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||||
|
|
||||||
|
// NewVaddrLo = SRsrcPtrLo + VAddr:sub0
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
|
||||||
NewVAddrLo)
|
NewVAddrLo)
|
||||||
.addReg(SRsrcPtrLo)
|
.addReg(SRsrcPtrLo)
|
||||||
.addReg(VAddrLo)
|
.addReg(VAddr->getReg(), 0, AMDGPU::sub0)
|
||||||
.addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
|
.addReg(AMDGPU::VCC, RegState::ImplicitDefine);
|
||||||
|
|
||||||
// NewVaddrHi = SRsrcPtrHi + VAddrHi
|
// NewVaddrHi = SRsrcPtrHi + VAddr:sub1
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
|
||||||
NewVAddrHi)
|
NewVAddrHi)
|
||||||
.addReg(SRsrcPtrHi)
|
.addReg(SRsrcPtrHi)
|
||||||
.addReg(VAddrHi)
|
.addReg(VAddr->getReg(), 0, AMDGPU::sub1)
|
||||||
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
|
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
|
||||||
.addReg(AMDGPU::VCC, RegState::Implicit);
|
.addReg(AMDGPU::VCC, RegState::Implicit);
|
||||||
|
|
||||||
// NewVaddr = {NewVaddrHi, NewVaddrLo}
|
} else {
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
// This instructions is the _OFFSET variant, so we need to convert it to
|
||||||
NewVAddr)
|
// ADDR64.
|
||||||
.addReg(NewVAddrLo)
|
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
|
||||||
.addImm(AMDGPU::sub0)
|
MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
|
||||||
.addReg(NewVAddrHi)
|
MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
|
||||||
.addImm(AMDGPU::sub1);
|
assert(SOffset->isImm() && SOffset->getImm() == 0 && "Legalizing MUBUF "
|
||||||
|
"with non-zero soffset is not implemented");
|
||||||
|
|
||||||
// Zero64 = 0
|
// Create the new instruction.
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
|
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
|
||||||
Zero64)
|
MachineInstr *Addr64 =
|
||||||
.addImm(0);
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
|
||||||
|
.addOperand(*VData)
|
||||||
|
.addOperand(*SRsrc)
|
||||||
|
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
|
||||||
|
// This will be replaced later
|
||||||
|
// with the new value of vaddr.
|
||||||
|
.addOperand(*Offset);
|
||||||
|
|
||||||
// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
|
MI->removeFromParent();
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
MI = Addr64;
|
||||||
SRsrcFormatLo)
|
|
||||||
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
|
|
||||||
|
|
||||||
// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
|
NewVAddrLo = SRsrcPtrLo;
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
NewVAddrHi = SRsrcPtrHi;
|
||||||
SRsrcFormatHi)
|
VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
|
||||||
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
|
SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
|
||||||
|
|
||||||
// NewSRsrc = {Zero64, SRsrcFormat}
|
|
||||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
|
||||||
NewSRsrc)
|
|
||||||
.addReg(Zero64)
|
|
||||||
.addImm(AMDGPU::sub0_sub1)
|
|
||||||
.addReg(SRsrcFormatLo)
|
|
||||||
.addImm(AMDGPU::sub2)
|
|
||||||
.addReg(SRsrcFormatHi)
|
|
||||||
.addImm(AMDGPU::sub3);
|
|
||||||
|
|
||||||
// Update the instruction to use NewVaddr
|
|
||||||
MI->getOperand(VAddrIdx).setReg(NewVAddr);
|
|
||||||
// Update the instruction to use NewSRsrc
|
|
||||||
MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewVaddr = {NewVaddrHi, NewVaddrLo}
|
||||||
|
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
||||||
|
NewVAddr)
|
||||||
|
.addReg(NewVAddrLo)
|
||||||
|
.addImm(AMDGPU::sub0)
|
||||||
|
.addReg(NewVAddrHi)
|
||||||
|
.addImm(AMDGPU::sub1);
|
||||||
|
|
||||||
|
|
||||||
|
// Update the instruction to use NewVaddr
|
||||||
|
VAddr->setReg(NewVAddr);
|
||||||
|
// Update the instruction to use NewSRsrc
|
||||||
|
SRsrc->setReg(NewSRsrc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,6 +209,7 @@ namespace AMDGPU {
|
|||||||
int getCommuteRev(uint16_t Opcode);
|
int getCommuteRev(uint16_t Opcode);
|
||||||
int getCommuteOrig(uint16_t Opcode);
|
int getCommuteOrig(uint16_t Opcode);
|
||||||
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
int getMCOpcode(uint16_t Opcode, unsigned Gen);
|
||||||
|
int getAddr64Inst(uint16_t Opcode);
|
||||||
|
|
||||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||||
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
|
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
|
||||||
|
@ -194,6 +194,7 @@ def tfe : Operand <i1> {
|
|||||||
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
||||||
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
|
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
|
||||||
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
|
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
|
||||||
|
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
|
||||||
|
|
||||||
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
|
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
|
||||||
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
||||||
@ -901,6 +902,11 @@ class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
|
|||||||
let mayLoad = 1;
|
let mayLoad = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MUBUFAddr64Table <bit is_addr64> {
|
||||||
|
|
||||||
|
bit IsAddr64 = is_addr64;
|
||||||
|
}
|
||||||
|
|
||||||
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||||
op,
|
op,
|
||||||
(outs),
|
(outs),
|
||||||
@ -927,7 +933,11 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
|
|||||||
(ins SReg_128:$srsrc,
|
(ins SReg_128:$srsrc,
|
||||||
mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
|
mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
|
||||||
slc:$slc, tfe:$tfe),
|
slc:$slc, tfe:$tfe),
|
||||||
asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
|
asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
|
||||||
|
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
|
||||||
|
i32:$soffset, i16:$offset,
|
||||||
|
i1:$glc, i1:$slc, i1:$tfe)))]>,
|
||||||
|
MUBUFAddr64Table<0>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let offen = 1, idxen = 0 in {
|
let offen = 1, idxen = 0 in {
|
||||||
@ -959,7 +969,7 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
|
|||||||
(ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
|
(ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
|
||||||
asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
|
asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
|
||||||
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
|
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
|
||||||
i64:$vaddr, i16:$offset)))]>;
|
i64:$vaddr, i16:$offset)))]>, MUBUFAddr64Table<1>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -979,6 +989,18 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
|
|||||||
[]
|
[]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
let offen = 0, idxen = 0, vaddr = 0 in {
|
||||||
|
def _OFFSET : MUBUF <
|
||||||
|
op, (outs),
|
||||||
|
(ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
|
||||||
|
SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
|
||||||
|
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
|
||||||
|
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||||
|
i16:$offset, i1:$glc, i1:$slc,
|
||||||
|
i1:$tfe))]
|
||||||
|
>, MUBUFAddr64Table<0>;
|
||||||
|
} // offen = 0, idxen = 0, vaddr = 0
|
||||||
|
|
||||||
let offen = 1, idxen = 0 in {
|
let offen = 1, idxen = 0 in {
|
||||||
def _OFFEN : MUBUF <
|
def _OFFEN : MUBUF <
|
||||||
op, (outs),
|
op, (outs),
|
||||||
@ -997,7 +1019,8 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
|
|||||||
(ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
|
(ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
|
||||||
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
|
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
|
||||||
[(st store_vt:$vdata,
|
[(st store_vt:$vdata,
|
||||||
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]> {
|
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]>, MUBUFAddr64Table<1>
|
||||||
|
{
|
||||||
|
|
||||||
let mayLoad = 0;
|
let mayLoad = 0;
|
||||||
let mayStore = 1;
|
let mayStore = 1;
|
||||||
@ -1216,4 +1239,12 @@ def getMCOpcode : InstrMapping {
|
|||||||
let ValueCols = [[!cast<string>(SISubtarget.SI)]];
|
let ValueCols = [[!cast<string>(SISubtarget.SI)]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def getAddr64Inst : InstrMapping {
|
||||||
|
let FilterClass = "MUBUFAddr64Table";
|
||||||
|
let RowFields = ["NAME"];
|
||||||
|
let ColFields = ["IsAddr64"];
|
||||||
|
let KeyCol = ["0"];
|
||||||
|
let ValueCols = [["1"]];
|
||||||
|
}
|
||||||
|
|
||||||
include "SIInstructions.td"
|
include "SIInstructions.td"
|
||||||
|
@ -236,8 +236,8 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
|
; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
|
||||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 {{addr64$}}
|
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
|
||||||
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} offset:0x10
|
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:0x10
|
||||||
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
|
||||||
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
; SI: BUFFER_STORE_DWORD [[RESULT]],
|
||||||
; SI: S_ENDPGM
|
; SI: S_ENDPGM
|
||||||
|
@ -87,8 +87,8 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @zextload_global_i8_to_i64
|
; FUNC-LABEL: @zextload_global_i8_to_i64
|
||||||
; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
|
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||||
; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
|
; SI-DAG: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
|
||||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||||
@ -99,8 +99,8 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @zextload_global_i16_to_i64
|
; FUNC-LABEL: @zextload_global_i16_to_i64
|
||||||
; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
|
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||||
; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
|
; SI-DAG: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
|
||||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
|
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
|
||||||
@ -111,8 +111,8 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @zextload_global_i32_to_i64
|
; FUNC-LABEL: @zextload_global_i32_to_i64
|
||||||
; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
|
; SI-DAG: S_MOV_B32 [[ZERO:s[0-9]+]], 0{{$}}
|
||||||
; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
|
; SI-DAG: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
|
||||||
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
; MUBUF load with an immediate byte offset that fits into 12-bits
|
; MUBUF load with an immediate byte offset that fits into 12-bits
|
||||||
; CHECK-LABEL: @mubuf_load0
|
; CHECK-LABEL: @mubuf_load0
|
||||||
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0
|
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x30,0xe0
|
||||||
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i32 addrspace(1)* %in, i64 1
|
%0 = getelementptr i32 addrspace(1)* %in, i64 1
|
||||||
@ -17,7 +17,7 @@ entry:
|
|||||||
|
|
||||||
; MUBUF load with the largest possible immediate offset
|
; MUBUF load with the largest possible immediate offset
|
||||||
; CHECK-LABEL: @mubuf_load1
|
; CHECK-LABEL: @mubuf_load1
|
||||||
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f,0x20,0xe0
|
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x20,0xe0
|
||||||
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
|
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
|
||||||
@ -28,7 +28,7 @@ entry:
|
|||||||
|
|
||||||
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
|
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
|
||||||
; CHECK-LABEL: @mubuf_load2
|
; CHECK-LABEL: @mubuf_load2
|
||||||
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80
|
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
|
||||||
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
|
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
|
||||||
@ -40,7 +40,7 @@ entry:
|
|||||||
; MUBUF load with a 12-bit immediate offset and a register offset
|
; MUBUF load with a 12-bit immediate offset and a register offset
|
||||||
; CHECK-LABEL: @mubuf_load3
|
; CHECK-LABEL: @mubuf_load3
|
||||||
; CHECK-NOT: ADD
|
; CHECK-NOT: ADD
|
||||||
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0
|
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x30,0xe0
|
||||||
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
|
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
|
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
|
||||||
@ -56,7 +56,7 @@ entry:
|
|||||||
|
|
||||||
; MUBUF store with an immediate byte offset that fits into 12-bits
|
; MUBUF store with an immediate byte offset that fits into 12-bits
|
||||||
; CHECK-LABEL: @mubuf_store0
|
; CHECK-LABEL: @mubuf_store0
|
||||||
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80,0x70,0xe0
|
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0x4 ; encoding: [0x04,0x00,0x70,0xe0
|
||||||
define void @mubuf_store0(i32 addrspace(1)* %out) {
|
define void @mubuf_store0(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i32 addrspace(1)* %out, i64 1
|
%0 = getelementptr i32 addrspace(1)* %out, i64 1
|
||||||
@ -66,7 +66,7 @@ entry:
|
|||||||
|
|
||||||
; MUBUF store with the largest possible immediate offset
|
; MUBUF store with the largest possible immediate offset
|
||||||
; CHECK-LABEL: @mubuf_store1
|
; CHECK-LABEL: @mubuf_store1
|
||||||
; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f,0x60,0xe0
|
; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:0xfff ; encoding: [0xff,0x0f,0x60,0xe0
|
||||||
|
|
||||||
define void @mubuf_store1(i8 addrspace(1)* %out) {
|
define void @mubuf_store1(i8 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
@ -77,7 +77,7 @@ entry:
|
|||||||
|
|
||||||
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
|
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
|
||||||
; CHECK-LABEL: @mubuf_store2
|
; CHECK-LABEL: @mubuf_store2
|
||||||
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
|
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
|
||||||
define void @mubuf_store2(i32 addrspace(1)* %out) {
|
define void @mubuf_store2(i32 addrspace(1)* %out) {
|
||||||
entry:
|
entry:
|
||||||
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
|
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
|
||||||
|
@ -118,7 +118,7 @@ for.end:
|
|||||||
|
|
||||||
; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
|
; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
|
||||||
; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0
|
; SI-PROMOTE-DAG: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x2 ; encoding: [0x02,0x10,0x68,0xe0
|
||||||
; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
|
; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
|
||||||
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
|
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
|
||||||
entry:
|
entry:
|
||||||
%0 = alloca [2 x i16]
|
%0 = alloca [2 x i16]
|
||||||
|
@ -9,8 +9,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
|
|||||||
; ordering the loads so that the lower address loads come first.
|
; ordering the loads so that the lower address loads come first.
|
||||||
|
|
||||||
; FUNC-LABEL: @cluster_global_arg_loads
|
; FUNC-LABEL: @cluster_global_arg_loads
|
||||||
; SI: BUFFER_LOAD_DWORD [[REG0:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
|
; SI-DAG: BUFFER_LOAD_DWORD [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||||
; SI: BUFFER_LOAD_DWORD [[REG1:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
|
; SI-DAG: BUFFER_LOAD_DWORD [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:0x4
|
||||||
; SI: BUFFER_STORE_DWORD [[REG0]]
|
; SI: BUFFER_STORE_DWORD [[REG0]]
|
||||||
; SI: BUFFER_STORE_DWORD [[REG1]]
|
; SI: BUFFER_STORE_DWORD [[REG1]]
|
||||||
define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
|
define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
|
||||||
@ -22,5 +22,20 @@ define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)*
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking
|
||||||
|
; an MUBUF load which does not have a vaddr operand.
|
||||||
|
; FUNC-LABEL: @same_base_ptr_crash
|
||||||
|
; SI: BUFFER_LOAD_DWORD
|
||||||
|
; SI: BUFFER_LOAD_DWORD
|
||||||
|
define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
|
||||||
|
entry:
|
||||||
|
%out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
|
||||||
|
%tmp0 = load i32 addrspace(1)* %out
|
||||||
|
%tmp1 = load i32 addrspace(1)* %out1
|
||||||
|
%tmp2 = add i32 %tmp0, %tmp1
|
||||||
|
store i32 %tmp2, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
attributes #0 = { nounwind }
|
attributes #0 = { nounwind }
|
||||||
attributes #1 = { nounwind readnone }
|
attributes #1 = { nounwind readnone }
|
||||||
|
@ -75,9 +75,9 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @sext_in_reg_i1_to_i64
|
; FUNC-LABEL: @sext_in_reg_i1_to_i64
|
||||||
|
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
||||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||||
; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
|
; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
|
||||||
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
|
||||||
%c = add i64 %a, %b
|
%c = add i64 %a, %b
|
||||||
@ -88,9 +88,9 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @sext_in_reg_i8_to_i64
|
; FUNC-LABEL: @sext_in_reg_i8_to_i64
|
||||||
|
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
||||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||||
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||||
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
|
|
||||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||||
@ -112,9 +112,9 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: @sext_in_reg_i16_to_i64
|
; FUNC-LABEL: @sext_in_reg_i16_to_i64
|
||||||
|
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
||||||
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
|
||||||
; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
|
; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
|
||||||
; SI: S_MOV_B32 {{s[0-9]+}}, -1
|
|
||||||
; SI: BUFFER_STORE_DWORDX2
|
; SI: BUFFER_STORE_DWORDX2
|
||||||
|
|
||||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
|
||||||
|
|
||||||
; SI-CHECK: @test
|
; SI-CHECK: @test
|
||||||
; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0
|
; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0{{$}}
|
||||||
; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
|
; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
|
||||||
; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
|
; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
|
||||||
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
|
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
|
||||||
|
Loading…
Reference in New Issue
Block a user