R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU()

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204476 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-01 18:33:56 +00:00 · 2014-03-21 15:51:57 +00:00 · 2014-03-21 15:51:57 +00:00 · a1d28f6dd7
commit a1d28f6dd7
parent 1f1c0495d0
6 changed files with 157 additions and 5 deletions
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
    addPass(createR600VectorRegMerger(*TM));
  } else {
    addPass(createSIFixSGPRCopiesPass(*TM));
+    // SIFixSGPRCopies can generate a lot of duplicate instructions,
+    // so we need to run MachineCSE afterwards.
+    addPass(&MachineCSEID);
  }
  return false;
 }
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@ -25,8 +25,6 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/Function.h"

-const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
-
 using namespace llvm;

 SITargetLowering::SITargetLowering(TargetMachine &TM) :
@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
            .addImm(0);
    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
-            .addImm(RSRC_DATA_FORMAT >> 32);
+            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
            .addReg(SubRegHiLo)
            .addImm(AMDGPU::sub0)
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@ -369,6 +369,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
  let EXP_CNT = 1;

  let neverHasSideEffects = 1;
+  let UseNamedOperandTable = 1;
 }

 class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@ -558,6 +558,32 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
  MO.ChangeToRegister(Reg, false);
 }

+unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
+                                         MachineRegisterInfo &MRI,
+                                         MachineOperand &SuperReg,
+                                         const TargetRegisterClass *SuperRC,
+                                         unsigned SubIdx,
+                                         const TargetRegisterClass *SubRC)
+                                         const {
+  assert(SuperReg.isReg());
+
+  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+  unsigned SubReg = MRI.createVirtualRegister(SubRC);
+
+  // Just in case the super register is itself a sub-register, copy it to a new
+  // value so we don't need to wory about merging its subreg index with the
+  // SubIdx passed to this function.  The register coalescer should be able to
+  // eliminate this extra copy.
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+          NewSuperReg)
+          .addOperand(SuperReg);
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
+          SubReg)
+          .addReg(NewSuperReg, 0, SubIdx);
+  return SubReg;
+}
+
 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
@ -675,6 +701,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
      MI->getOperand(i).setReg(DstReg);
    }
  }
+
+  // Legalize MUBUF* instructions
+  // FIXME: If we start using the non-addr64 instructions for compute, we
+  // may need to legalize them here.
+
+  int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                            AMDGPU::OpName::srsrc);
+  int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                             AMDGPU::OpName::vaddr);
+  if (SRsrcIdx != -1 && VAddrIdx != -1) {
+    const TargetRegisterClass *VAddrRC =
+        RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
+
+    if(VAddrRC->getSize() == 8 &&
+       MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
+      // We have a MUBUF instruction that uses a 64-bit vaddr register and
+      // srsrc has the incorrect register class.  In order to fix this, we
+      // need to extract the pointer from the resource descriptor (srsrc),
+      // add it to the value of vadd,  then store the result in the vaddr
+      // operand.  Then, we need to set the pointer field of the resource
+      // descriptor to zero.
+
+      MachineBasicBlock &MBB = *MI->getParent();
+      MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
+      MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
+      unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
+      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+      unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+
+      // SRsrcPtrLo = srsrc:sub0
+      SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
+          &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+      // SRsrcPtrHi = srsrc:sub1
+      SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
+          &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+      // VAddrLo = vaddr:sub0
+      VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
+          &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
+
+      // VAddrHi = vaddr:sub1
+      VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
+          &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
+
+      // NewVaddrLo = SRsrcPtrLo + VAddrLo
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
+              NewVAddrLo)
+              .addReg(SRsrcPtrLo)
+              .addReg(VAddrLo)
+              .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
+
+      // NewVaddrHi = SRsrcPtrHi + VAddrHi
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
+              NewVAddrHi)
+              .addReg(SRsrcPtrHi)
+              .addReg(VAddrHi)
+              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
+              .addReg(AMDGPU::VCC, RegState::Implicit);
+
+      // NewVaddr = {NewVaddrHi, NewVaddrLo}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+              NewVAddr)
+              .addReg(NewVAddrLo)
+              .addImm(AMDGPU::sub0)
+              .addReg(NewVAddrHi)
+              .addImm(AMDGPU::sub1);
+
+      // Zero64 = 0
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
+              Zero64)
+              .addImm(0);
+
+      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+              SRsrcFormatLo)
+              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+
+      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+              SRsrcFormatHi)
+              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+
+      // NewSRsrc = {Zero64, SRsrcFormat}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
+              NewSRsrc)
+              .addReg(Zero64)
+              .addImm(AMDGPU::sub0_sub1)
+              .addReg(SRsrcFormatLo)
+              .addImm(AMDGPU::sub2)
+              .addReg(SRsrcFormatHi)
+              .addImm(AMDGPU::sub3);
+
+      // Update the instruction to use NewVaddr
+      MI->getOperand(VAddrIdx).setReg(NewVAddr);
+      // Update the instruction to use NewSRsrc
+      MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
+    }
+  }
 }

 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
@ -731,8 +861,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
    }

    unsigned NewOpcode = getVALUOp(*Inst);
-    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+      // We cannot move this instruction to the VALU, so we should try to
+      // legalize its operands instead.
+      legalizeOperands(Inst);
      continue;
+    }

    // Use the new VALU Opcode.
    const MCInstrDesc &NewDesc = get(NewOpcode);
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@ -25,6 +25,13 @@ class SIInstrInfo : public AMDGPUInstrInfo {
 private:
  const SIRegisterInfo RI;

+  unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
+                              MachineRegisterInfo &MRI,
+                              MachineOperand &SuperReg,
+                              const TargetRegisterClass *SuperRC,
+                              unsigned SubIdx,
+                              const TargetRegisterClass *SubRC) const;
+
 public:
  explicit SIInstrInfo(AMDGPUTargetMachine &tm);

@ -142,6 +149,9 @@ namespace AMDGPU {
  int getCommuteRev(uint16_t Opcode);
  int getCommuteOrig(uint16_t Opcode);

+  const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
+
+
 } // End namespace AMDGPU

 } // End namespace llvm
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI  | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s

 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@ -8,8 +8,14 @@
 ; (low 64-bits of srsrc).

 ; CHECK-LABEL: @mubuf
+
 ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
 ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+
+; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
+; instructions
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
  %0 = call i32 @llvm.r600.read.tidig.x() #1