mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-19 19:31:50 +00:00
R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU()
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204476 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1f1c0495d0
commit
a1d28f6dd7
@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
addPass(createR600VectorRegMerger(*TM));
|
||||
} else {
|
||||
addPass(createSIFixSGPRCopiesPass(*TM));
|
||||
// SIFixSGPRCopies can generate a lot of duplicate instructions,
|
||||
// so we need to run MachineCSE afterwards.
|
||||
addPass(&MachineCSEID);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -25,8 +25,6 @@
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
|
||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
|
||||
.addImm(0);
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
|
||||
.addImm(RSRC_DATA_FORMAT >> 32);
|
||||
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
|
||||
.addReg(SubRegHiLo)
|
||||
.addImm(AMDGPU::sub0)
|
||||
|
@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
let EXP_CNT = 1;
|
||||
|
||||
let neverHasSideEffects = 1;
|
||||
let UseNamedOperandTable = 1;
|
||||
}
|
||||
|
||||
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
|
@ -558,6 +558,32 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
|
||||
MO.ChangeToRegister(Reg, false);
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineOperand &SuperReg,
|
||||
const TargetRegisterClass *SuperRC,
|
||||
unsigned SubIdx,
|
||||
const TargetRegisterClass *SubRC)
|
||||
const {
|
||||
assert(SuperReg.isReg());
|
||||
|
||||
unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
|
||||
unsigned SubReg = MRI.createVirtualRegister(SubRC);
|
||||
|
||||
// Just in case the super register is itself a sub-register, copy it to a new
|
||||
// value so we don't need to wory about merging its subreg index with the
|
||||
// SubIdx passed to this function. The register coalescer should be able to
|
||||
// eliminate this extra copy.
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
|
||||
NewSuperReg)
|
||||
.addOperand(SuperReg);
|
||||
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
|
||||
SubReg)
|
||||
.addReg(NewSuperReg, 0, SubIdx);
|
||||
return SubReg;
|
||||
}
|
||||
|
||||
void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
||||
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
@ -675,6 +701,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
||||
MI->getOperand(i).setReg(DstReg);
|
||||
}
|
||||
}
|
||||
|
||||
// Legalize MUBUF* instructions
|
||||
// FIXME: If we start using the non-addr64 instructions for compute, we
|
||||
// may need to legalize them here.
|
||||
|
||||
int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::srsrc);
|
||||
int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::vaddr);
|
||||
if (SRsrcIdx != -1 && VAddrIdx != -1) {
|
||||
const TargetRegisterClass *VAddrRC =
|
||||
RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
|
||||
|
||||
if(VAddrRC->getSize() == 8 &&
|
||||
MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
|
||||
// We have a MUBUF instruction that uses a 64-bit vaddr register and
|
||||
// srsrc has the incorrect register class. In order to fix this, we
|
||||
// need to extract the pointer from the resource descriptor (srsrc),
|
||||
// add it to the value of vadd, then store the result in the vaddr
|
||||
// operand. Then, we need to set the pointer field of the resource
|
||||
// descriptor to zero.
|
||||
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
|
||||
MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
|
||||
unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
|
||||
unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
|
||||
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||
unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
|
||||
|
||||
// SRsrcPtrLo = srsrc:sub0
|
||||
SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
|
||||
&AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
|
||||
|
||||
// SRsrcPtrHi = srsrc:sub1
|
||||
SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
|
||||
&AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
|
||||
|
||||
// VAddrLo = vaddr:sub0
|
||||
VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
|
||||
&AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
|
||||
|
||||
// VAddrHi = vaddr:sub1
|
||||
VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
|
||||
&AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
|
||||
|
||||
// NewVaddrLo = SRsrcPtrLo + VAddrLo
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
|
||||
NewVAddrLo)
|
||||
.addReg(SRsrcPtrLo)
|
||||
.addReg(VAddrLo)
|
||||
.addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
|
||||
|
||||
// NewVaddrHi = SRsrcPtrHi + VAddrHi
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
|
||||
NewVAddrHi)
|
||||
.addReg(SRsrcPtrHi)
|
||||
.addReg(VAddrHi)
|
||||
.addReg(AMDGPU::VCC, RegState::ImplicitDefine)
|
||||
.addReg(AMDGPU::VCC, RegState::Implicit);
|
||||
|
||||
// NewVaddr = {NewVaddrHi, NewVaddrLo}
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
||||
NewVAddr)
|
||||
.addReg(NewVAddrLo)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(NewVAddrHi)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
// Zero64 = 0
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
|
||||
Zero64)
|
||||
.addImm(0);
|
||||
|
||||
// SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
||||
SRsrcFormatLo)
|
||||
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
|
||||
|
||||
// SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
|
||||
SRsrcFormatHi)
|
||||
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
|
||||
|
||||
// NewSRsrc = {Zero64, SRsrcFormat}
|
||||
BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
|
||||
NewSRsrc)
|
||||
.addReg(Zero64)
|
||||
.addImm(AMDGPU::sub0_sub1)
|
||||
.addReg(SRsrcFormatLo)
|
||||
.addImm(AMDGPU::sub2)
|
||||
.addReg(SRsrcFormatHi)
|
||||
.addImm(AMDGPU::sub3);
|
||||
|
||||
// Update the instruction to use NewVaddr
|
||||
MI->getOperand(VAddrIdx).setReg(NewVAddr);
|
||||
// Update the instruction to use NewSRsrc
|
||||
MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
@ -731,8 +861,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
}
|
||||
|
||||
unsigned NewOpcode = getVALUOp(*Inst);
|
||||
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
|
||||
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
|
||||
// We cannot move this instruction to the VALU, so we should try to
|
||||
// legalize its operands instead.
|
||||
legalizeOperands(Inst);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Use the new VALU Opcode.
|
||||
const MCInstrDesc &NewDesc = get(NewOpcode);
|
||||
|
@ -25,6 +25,13 @@ class SIInstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
|
||||
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineOperand &SuperReg,
|
||||
const TargetRegisterClass *SuperRC,
|
||||
unsigned SubIdx,
|
||||
const TargetRegisterClass *SubRC) const;
|
||||
|
||||
public:
|
||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
@ -142,6 +149,9 @@ namespace AMDGPU {
|
||||
int getCommuteRev(uint16_t Opcode);
|
||||
int getCommuteOrig(uint16_t Opcode);
|
||||
|
||||
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
|
||||
|
||||
|
||||
} // End namespace AMDGPU
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; In this test both the pointer and the offset operands to the
|
||||
; BUFFER_LOAD instructions end up being stored in vgprs. This
|
||||
@ -8,8 +8,14 @@
|
||||
; (low 64-bits of srsrc).
|
||||
|
||||
; CHECK-LABEL: @mubuf
|
||||
|
||||
; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
|
||||
; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
|
||||
|
||||
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
|
||||
; instructions
|
||||
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
|
||||
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
|
||||
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
Loading…
x
Reference in New Issue
Block a user