R600/SI: Teach moveToVALU how to handle some SMRD instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207660 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-04-30 15:31:29 +00:00
parent fa2e88da1c
commit 1d8e31fc7a
3 changed files with 77 additions and 1 deletions

View File

@ -548,6 +548,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
}
}
@ -910,6 +913,44 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
}
void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
MachineBasicBlock *MBB = MI->getParent();
switch (MI->getOpcode()) {
case AMDGPU::S_LOAD_DWORD_SGPR:
case AMDGPU::S_LOAD_DWORDX2_SGPR:
case AMDGPU::S_LOAD_DWORDX4_SGPR:
unsigned NewOpcode = getVALUOp(*MI);
unsigned Offset = MI->getOperand(2).getReg();
unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
unsigned DWord0 = Offset;
unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
.addImm(0);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
.addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
.addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
.addReg(DWord0)
.addImm(AMDGPU::sub0)
.addReg(DWord1)
.addImm(AMDGPU::sub1)
.addReg(DWord2)
.addImm(AMDGPU::sub2)
.addReg(DWord3)
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
MI->getOperand(2).setReg(MI->getOperand(1).getReg());
MI->getOperand(1).setReg(SRsrc);
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
}
}
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
SmallVector<MachineInstr *, 128> Worklist;
Worklist.push_back(&TopInst);
@ -920,9 +961,15 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned Opcode = Inst->getOpcode();
unsigned NewOpcode = getVALUOp(*Inst);
// Handle some special cases
switch (Opcode) {
default:
if (isSMRD(Inst->getOpcode())) {
moveSMRDToVALU(Inst, MRI);
}
break;
case AMDGPU::S_MOV_B64: {
DebugLoc DL = Inst->getDebugLoc();
@ -973,7 +1020,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
llvm_unreachable("Moving this op to VALU not implemented");
}
unsigned NewOpcode = getVALUOp(*Inst);
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.

View File

@ -137,6 +137,8 @@ public:
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
/// \brief Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
/// VALU if necessary.

View File

@ -46,3 +46,31 @@ declare i32 @llvm.r600.read.tidig.x() #1
declare i32 @llvm.r600.read.tidig.y() #1
attributes #1 = { nounwind readnone }
; Test moving an SMRD instruction to the VALU
; CHECK-LABEL: @smrd_valu
; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
; CHECK: BUFFER_STORE_DWORD [[OUT]]
define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
entry:
%0 = icmp ne i32 %a, 0
br i1 %0, label %if, label %else
if:
%1 = load i32 addrspace(2)* addrspace(1)* %in
br label %endif
else:
%2 = getelementptr i32 addrspace(2)* addrspace(1)* %in
%3 = load i32 addrspace(2)* addrspace(1)* %2
br label %endif
endif:
%4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
%5 = getelementptr i32 addrspace(2)* %4, i32 3000
%6 = load i32 addrspace(2)* %5
store i32 %6, i32 addrspace(1)* %out
ret void
}