mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	R600/SI: Implement VGPR register spilling for compute at -O0 v3
VGPRs are spilled to LDS.  This still needs more testing, but
we need to at least enable it at -O0, because the fast register
allocator spills all registers that are live at the end of blocks
and without this some future commits will break the
flat-address-space.ll test.
v2: Only calculate thread id once
v3: Move insertion of spill instructions to
    SIRegisterInfo::eliminateFrameIndex()
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218348 91177308-0d34-0410-b5e6-96231b3b80d8
			
			
This commit is contained in:
		@@ -377,8 +377,12 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
 | 
				
			|||||||
    LDSAlignShift = 9;
 | 
					    LDSAlignShift = 9;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
 | 
				
			||||||
 | 
					                          MFI->getMaximumWorkGroupSize(MF);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned LDSBlocks =
 | 
					  unsigned LDSBlocks =
 | 
				
			||||||
    RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
 | 
					     RoundUpToAlignment(MFI->LDSSize + LDSSpillSize,
 | 
				
			||||||
 | 
						                      1 << LDSAlignShift) >> LDSAlignShift;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Scratch is allocated in 256 dword blocks.
 | 
					  // Scratch is allocated in 256 dword blocks.
 | 
				
			||||||
  unsigned ScratchAlignShift = 10;
 | 
					  unsigned ScratchAlignShift = 10;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -21,6 +21,7 @@
 | 
				
			|||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
					#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
					#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
				
			||||||
#include "llvm/IR/Function.h"
 | 
					#include "llvm/IR/Function.h"
 | 
				
			||||||
 | 
					#include "llvm/CodeGen/RegisterScavenging.h"
 | 
				
			||||||
#include "llvm/MC/MCInstrDesc.h"
 | 
					#include "llvm/MC/MCInstrDesc.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using namespace llvm;
 | 
					using namespace llvm;
 | 
				
			||||||
@@ -433,6 +434,19 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
 | 
				
			|||||||
  return Opcode;
 | 
					  return Opcode;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 | 
				
			||||||
 | 
					  const TargetMachine &TM = MF->getTarget();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXME: Even though it can cause problems, we need to enable
 | 
				
			||||||
 | 
					  // spilling at -O0, since the fast register allocator always
 | 
				
			||||||
 | 
					  // spills registers that are live at the end of blocks.
 | 
				
			||||||
 | 
					  return MFI->getShaderType() == ShaderType::COMPUTE &&
 | 
				
			||||||
 | 
					         TM.getOptLevel() == CodeGenOpt::None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 | 
					void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 | 
				
			||||||
                                      MachineBasicBlock::iterator MI,
 | 
					                                      MachineBasicBlock::iterator MI,
 | 
				
			||||||
                                      unsigned SrcReg, bool isKill,
 | 
					                                      unsigned SrcReg, bool isKill,
 | 
				
			||||||
@@ -442,32 +456,41 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 | 
				
			|||||||
  MachineFunction *MF = MBB.getParent();
 | 
					  MachineFunction *MF = MBB.getParent();
 | 
				
			||||||
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
					  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
				
			||||||
  DebugLoc DL = MBB.findDebugLoc(MI);
 | 
					  DebugLoc DL = MBB.findDebugLoc(MI);
 | 
				
			||||||
 | 
					  int Opcode = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (RI.hasVGPRs(RC)) {
 | 
					  if (RI.isSGPRClass(RC)) {
 | 
				
			||||||
    LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
					 | 
				
			||||||
    Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
 | 
					 | 
				
			||||||
    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
 | 
					 | 
				
			||||||
            .addReg(SrcReg);
 | 
					 | 
				
			||||||
  } else if (RI.isSGPRClass(RC)) {
 | 
					 | 
				
			||||||
    // We are only allowed to create one new instruction when spilling
 | 
					    // We are only allowed to create one new instruction when spilling
 | 
				
			||||||
    // registers, so we need to use pseudo instruction for spilling
 | 
					    // registers, so we need to use pseudo instruction for spilling
 | 
				
			||||||
    // SGPRs.
 | 
					    // SGPRs.
 | 
				
			||||||
    unsigned Opcode;
 | 
					 | 
				
			||||||
    switch (RC->getSize() * 8) {
 | 
					    switch (RC->getSize() * 8) {
 | 
				
			||||||
    case 32:  Opcode = AMDGPU::SI_SPILL_S32_SAVE;  break;
 | 
					      case 32:  Opcode = AMDGPU::SI_SPILL_S32_SAVE;  break;
 | 
				
			||||||
    case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
 | 
					      case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
 | 
				
			||||||
    case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
 | 
					      case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
 | 
				
			||||||
    case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
 | 
					      case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
 | 
				
			||||||
    case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
 | 
					      case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
 | 
				
			||||||
    default: llvm_unreachable("Cannot spill register class");
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
 | 
				
			||||||
 | 
					    switch(RC->getSize() * 8) {
 | 
				
			||||||
 | 
					      case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
 | 
				
			||||||
 | 
					      case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
 | 
				
			||||||
 | 
					      case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
 | 
				
			||||||
 | 
					      case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
 | 
				
			||||||
 | 
					      case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
 | 
				
			||||||
 | 
					      case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Opcode != -1) {
 | 
				
			||||||
    FrameInfo->setObjectAlignment(FrameIndex, 4);
 | 
					    FrameInfo->setObjectAlignment(FrameIndex, 4);
 | 
				
			||||||
    BuildMI(MBB, MI, DL, get(Opcode))
 | 
					    BuildMI(MBB, MI, DL, get(Opcode))
 | 
				
			||||||
            .addReg(SrcReg)
 | 
					            .addReg(SrcReg)
 | 
				
			||||||
            .addFrameIndex(FrameIndex);
 | 
					            .addFrameIndex(FrameIndex);
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    llvm_unreachable("VGPR spilling not supported");
 | 
					    LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
				
			||||||
 | 
					    Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
 | 
				
			||||||
 | 
					                  " spill register");
 | 
				
			||||||
 | 
					    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
 | 
				
			||||||
 | 
					            .addReg(SrcReg);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -479,31 +502,138 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 | 
				
			|||||||
  MachineFunction *MF = MBB.getParent();
 | 
					  MachineFunction *MF = MBB.getParent();
 | 
				
			||||||
  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
					  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
				
			||||||
  DebugLoc DL = MBB.findDebugLoc(MI);
 | 
					  DebugLoc DL = MBB.findDebugLoc(MI);
 | 
				
			||||||
 | 
					  int Opcode = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (RI.hasVGPRs(RC)) {
 | 
					  if (RI.isSGPRClass(RC)){
 | 
				
			||||||
    LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
					 | 
				
			||||||
    Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
 | 
					 | 
				
			||||||
    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
 | 
					 | 
				
			||||||
            .addImm(0);
 | 
					 | 
				
			||||||
  } else if (RI.isSGPRClass(RC)){
 | 
					 | 
				
			||||||
    unsigned Opcode;
 | 
					 | 
				
			||||||
    switch(RC->getSize() * 8) {
 | 
					    switch(RC->getSize() * 8) {
 | 
				
			||||||
    case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
 | 
					      case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
 | 
				
			||||||
    case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
 | 
					      case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
 | 
				
			||||||
    case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
 | 
					      case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
 | 
				
			||||||
    case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
 | 
					      case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
 | 
				
			||||||
    case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
 | 
					      case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
 | 
				
			||||||
    default: llvm_unreachable("Cannot spill register class");
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					  } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
 | 
				
			||||||
 | 
					    switch(RC->getSize() * 8) {
 | 
				
			||||||
 | 
					      case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
 | 
				
			||||||
 | 
					      case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
 | 
				
			||||||
 | 
					      case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
 | 
				
			||||||
 | 
					      case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
 | 
				
			||||||
 | 
					      case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
 | 
				
			||||||
 | 
					      case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Opcode != -1) {
 | 
				
			||||||
    FrameInfo->setObjectAlignment(FrameIndex, 4);
 | 
					    FrameInfo->setObjectAlignment(FrameIndex, 4);
 | 
				
			||||||
    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
 | 
					    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
 | 
				
			||||||
            .addFrameIndex(FrameIndex);
 | 
					            .addFrameIndex(FrameIndex);
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    llvm_unreachable("VGPR spilling not supported");
 | 
					    LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
				
			||||||
 | 
					    Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
 | 
				
			||||||
 | 
					                  " restore register");
 | 
				
			||||||
 | 
					    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
 | 
				
			||||||
 | 
					            .addReg(AMDGPU::VGPR0);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// \param @Offset Offset in bytes of the FrameIndex being spilled
 | 
				
			||||||
 | 
					unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
 | 
				
			||||||
 | 
					                                               MachineBasicBlock::iterator MI,
 | 
				
			||||||
 | 
					                                               RegScavenger *RS, unsigned TmpReg,
 | 
				
			||||||
 | 
					                                               unsigned FrameOffset,
 | 
				
			||||||
 | 
					                                               unsigned Size) const {
 | 
				
			||||||
 | 
					  MachineFunction *MF = MBB.getParent();
 | 
				
			||||||
 | 
					  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 | 
				
			||||||
 | 
					  const AMDGPUSubtarget &ST = MF->getTarget().getSubtarget<AMDGPUSubtarget>();
 | 
				
			||||||
 | 
					  const SIRegisterInfo *TRI =
 | 
				
			||||||
 | 
					      static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
 | 
				
			||||||
 | 
					  DebugLoc DL = MBB.findDebugLoc(MI);
 | 
				
			||||||
 | 
					  unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
 | 
				
			||||||
 | 
					  unsigned WavefrontSize = ST.getWavefrontSize();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned TIDReg = MFI->getTIDReg();
 | 
				
			||||||
 | 
					  if (!MFI->hasCalculatedTID()) {
 | 
				
			||||||
 | 
					    MachineBasicBlock &Entry = MBB.getParent()->front();
 | 
				
			||||||
 | 
					    MachineBasicBlock::iterator Insert = Entry.front();
 | 
				
			||||||
 | 
					    DebugLoc DL = Insert->getDebugLoc();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    TIDReg = RI.findUnusedVGPR(MF->getRegInfo());
 | 
				
			||||||
 | 
					    if (TIDReg == AMDGPU::NoRegister)
 | 
				
			||||||
 | 
					      return TIDReg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (MFI->getShaderType() == ShaderType::COMPUTE &&
 | 
				
			||||||
 | 
					        WorkGroupSize > WavefrontSize) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
 | 
				
			||||||
 | 
					      unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
 | 
				
			||||||
 | 
					      unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
 | 
				
			||||||
 | 
					      unsigned InputPtrReg =
 | 
				
			||||||
 | 
					          TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
 | 
				
			||||||
 | 
					      static const unsigned TIDIGRegs[3] = {
 | 
				
			||||||
 | 
					        TIDIGXReg, TIDIGYReg, TIDIGZReg
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					      for (unsigned Reg : TIDIGRegs) {
 | 
				
			||||||
 | 
					        if (!Entry.isLiveIn(Reg))
 | 
				
			||||||
 | 
					          Entry.addLiveIn(Reg);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      RS->enterBasicBlock(&Entry);
 | 
				
			||||||
 | 
					      unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
 | 
				
			||||||
 | 
					      unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
 | 
				
			||||||
 | 
					              .addReg(InputPtrReg)
 | 
				
			||||||
 | 
					              .addImm(SI::KernelInputOffsets::NGROUPS_Z);
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
 | 
				
			||||||
 | 
					              .addReg(InputPtrReg)
 | 
				
			||||||
 | 
					              .addImm(SI::KernelInputOffsets::NGROUPS_Y);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // NGROUPS.X * NGROUPS.Y
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1)
 | 
				
			||||||
 | 
					              .addReg(STmp1)
 | 
				
			||||||
 | 
					              .addReg(STmp0);
 | 
				
			||||||
 | 
					      // (NGROUPS.X * NGROUPS.Y) * TIDIG.X
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
 | 
				
			||||||
 | 
					              .addReg(STmp1)
 | 
				
			||||||
 | 
					              .addReg(TIDIGXReg);
 | 
				
			||||||
 | 
					      // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg)
 | 
				
			||||||
 | 
					              .addReg(STmp0)
 | 
				
			||||||
 | 
					              .addReg(TIDIGYReg)
 | 
				
			||||||
 | 
					              .addReg(TIDReg);
 | 
				
			||||||
 | 
					      // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg)
 | 
				
			||||||
 | 
					              .addReg(TIDReg)
 | 
				
			||||||
 | 
					              .addReg(TIDIGZReg);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      // Get the wave id
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
 | 
				
			||||||
 | 
					              TIDReg)
 | 
				
			||||||
 | 
					              .addImm(-1)
 | 
				
			||||||
 | 
					              .addImm(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32),
 | 
				
			||||||
 | 
					              TIDReg)
 | 
				
			||||||
 | 
					              .addImm(-1)
 | 
				
			||||||
 | 
					              .addReg(TIDReg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32),
 | 
				
			||||||
 | 
					            TIDReg)
 | 
				
			||||||
 | 
					            .addImm(2)
 | 
				
			||||||
 | 
					            .addReg(TIDReg);
 | 
				
			||||||
 | 
					    MFI->setTIDReg(TIDReg);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Add FrameIndex to LDS offset
 | 
				
			||||||
 | 
					  unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
 | 
				
			||||||
 | 
					  BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
 | 
				
			||||||
 | 
					          .addImm(LDSOffset)
 | 
				
			||||||
 | 
					          .addReg(TIDReg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return TmpReg;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
 | 
					void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
 | 
				
			||||||
                             int Count) const {
 | 
					                             int Count) const {
 | 
				
			||||||
  while (Count > 0) {
 | 
					  while (Count > 0) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -79,6 +79,13 @@ public:
 | 
				
			|||||||
                   unsigned DestReg, unsigned SrcReg,
 | 
					                   unsigned DestReg, unsigned SrcReg,
 | 
				
			||||||
                   bool KillSrc) const override;
 | 
					                   bool KillSrc) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB,
 | 
				
			||||||
 | 
					                                    MachineBasicBlock::iterator MI,
 | 
				
			||||||
 | 
					                                    RegScavenger *RS,
 | 
				
			||||||
 | 
					                                    unsigned TmpReg,
 | 
				
			||||||
 | 
					                                    unsigned Offset,
 | 
				
			||||||
 | 
					                                    unsigned Size) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void storeRegToStackSlot(MachineBasicBlock &MBB,
 | 
					  void storeRegToStackSlot(MachineBasicBlock &MBB,
 | 
				
			||||||
                           MachineBasicBlock::iterator MI,
 | 
					                           MachineBasicBlock::iterator MI,
 | 
				
			||||||
                           unsigned SrcReg, bool isKill, int FrameIndex,
 | 
					                           unsigned SrcReg, bool isKill, int FrameIndex,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1826,6 +1826,27 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 | 
				
			|||||||
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 | 
					defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 | 
				
			||||||
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 | 
					defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
 | 
				
			||||||
 | 
					  def _SAVE : InstSI <
 | 
				
			||||||
 | 
					    (outs),
 | 
				
			||||||
 | 
					    (ins vgpr_class:$src, i32imm:$frame_idx),
 | 
				
			||||||
 | 
					    "", []
 | 
				
			||||||
 | 
					  >;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _RESTORE : InstSI <
 | 
				
			||||||
 | 
					    (outs vgpr_class:$dst),
 | 
				
			||||||
 | 
					    (ins i32imm:$frame_idx),
 | 
				
			||||||
 | 
					    "", []
 | 
				
			||||||
 | 
					  >;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm SI_SPILL_V32  : SI_SPILL_VGPR <VReg_32>;
 | 
				
			||||||
 | 
					defm SI_SPILL_V64  : SI_SPILL_VGPR <VReg_64>;
 | 
				
			||||||
 | 
					defm SI_SPILL_V96  : SI_SPILL_VGPR <VReg_96>;
 | 
				
			||||||
 | 
					defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
 | 
				
			||||||
 | 
					defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 | 
				
			||||||
 | 
					defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let Defs = [SCC] in {
 | 
					let Defs = [SCC] in {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def SI_CONSTDATA_PTR : InstSI <
 | 
					def SI_CONSTDATA_PTR : InstSI <
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,8 +10,9 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "SIMachineFunctionInfo.h"
 | 
					#include "SIMachineFunctionInfo.h"
 | 
				
			||||||
 | 
					#include "AMDGPUSubtarget.h"
 | 
				
			||||||
#include "SIInstrInfo.h"
 | 
					#include "SIInstrInfo.h"
 | 
				
			||||||
#include "SIRegisterInfo.h"
 | 
					#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
 | 
					#include "llvm/CodeGen/MachineFrameInfo.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
					#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
				
			||||||
#include "llvm/IR/Function.h"
 | 
					#include "llvm/IR/Function.h"
 | 
				
			||||||
@@ -27,29 +28,18 @@ void SIMachineFunctionInfo::anchor() {}
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 | 
					SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 | 
				
			||||||
  : AMDGPUMachineFunction(MF),
 | 
					  : AMDGPUMachineFunction(MF),
 | 
				
			||||||
 | 
					    TIDReg(AMDGPU::NoRegister),
 | 
				
			||||||
    PSInputAddr(0),
 | 
					    PSInputAddr(0),
 | 
				
			||||||
    NumUserSGPRs(0) { }
 | 
					    NumUserSGPRs(0),
 | 
				
			||||||
 | 
					    LDSWaveSpillSize(0) { }
 | 
				
			||||||
/// \brief Returns a register that is not used at any point in the function.
 | 
					 | 
				
			||||||
///        If all registers are used, then this function will return
 | 
					 | 
				
			||||||
//         AMDGPU::NoRegister.
 | 
					 | 
				
			||||||
static unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) {
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
 | 
					 | 
				
			||||||
       I != E; ++I) {
 | 
					 | 
				
			||||||
    if (!MRI.isPhysRegUsed(*I))
 | 
					 | 
				
			||||||
      return *I;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return AMDGPU::NoRegister;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
 | 
					SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
 | 
				
			||||||
                                                       MachineFunction *MF,
 | 
					                                                       MachineFunction *MF,
 | 
				
			||||||
                                                       unsigned FrameIndex,
 | 
					                                                       unsigned FrameIndex,
 | 
				
			||||||
                                                       unsigned SubIdx) {
 | 
					                                                       unsigned SubIdx) {
 | 
				
			||||||
  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
					  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
 | 
				
			||||||
 | 
					  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(
 | 
				
			||||||
 | 
					      MF->getTarget().getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
 | 
				
			||||||
  MachineRegisterInfo &MRI = MF->getRegInfo();
 | 
					  MachineRegisterInfo &MRI = MF->getRegInfo();
 | 
				
			||||||
  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
 | 
					  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
 | 
				
			||||||
  Offset += SubIdx * 4;
 | 
					  Offset += SubIdx * 4;
 | 
				
			||||||
@@ -60,7 +50,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
 | 
				
			|||||||
  struct SpilledReg Spill;
 | 
					  struct SpilledReg Spill;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (!LaneVGPRs.count(LaneVGPRIdx)) {
 | 
					  if (!LaneVGPRs.count(LaneVGPRIdx)) {
 | 
				
			||||||
    unsigned LaneVGPR = findUnusedVGPR(MRI);
 | 
					    unsigned LaneVGPR = TRI->findUnusedVGPR(MRI);
 | 
				
			||||||
    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
 | 
					    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
 | 
				
			||||||
    MRI.setPhysRegUsed(LaneVGPR);
 | 
					    MRI.setPhysRegUsed(LaneVGPR);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -76,3 +66,11 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
 | 
				
			|||||||
  Spill.Lane = Lane;
 | 
					  Spill.Lane = Lane;
 | 
				
			||||||
  return Spill;
 | 
					  return Spill;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
 | 
				
			||||||
 | 
					                                              const MachineFunction &MF) const {
 | 
				
			||||||
 | 
					  const AMDGPUSubtarget &ST = MF.getTarget().getSubtarget<AMDGPUSubtarget>();
 | 
				
			||||||
 | 
					  // FIXME: We should get this information from kernel attributes if it
 | 
				
			||||||
 | 
					  // is available.
 | 
				
			||||||
 | 
					  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -16,6 +16,7 @@
 | 
				
			|||||||
#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
 | 
					#define LLVM_LIB_TARGET_R600_SIMACHINEFUNCTIONINFO_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "AMDGPUMachineFunction.h"
 | 
					#include "AMDGPUMachineFunction.h"
 | 
				
			||||||
 | 
					#include "SIRegisterInfo.h"
 | 
				
			||||||
#include <map>
 | 
					#include <map>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace llvm {
 | 
					namespace llvm {
 | 
				
			||||||
@@ -26,6 +27,9 @@ class MachineRegisterInfo;
 | 
				
			|||||||
/// tells the hardware which interpolation parameters to load.
 | 
					/// tells the hardware which interpolation parameters to load.
 | 
				
			||||||
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
 | 
					class SIMachineFunctionInfo : public AMDGPUMachineFunction {
 | 
				
			||||||
  void anchor() override;
 | 
					  void anchor() override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned TIDReg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct SpilledReg {
 | 
					  struct SpilledReg {
 | 
				
			||||||
@@ -44,6 +48,12 @@ public:
 | 
				
			|||||||
  unsigned PSInputAddr;
 | 
					  unsigned PSInputAddr;
 | 
				
			||||||
  unsigned NumUserSGPRs;
 | 
					  unsigned NumUserSGPRs;
 | 
				
			||||||
  std::map<unsigned, unsigned> LaneVGPRs;
 | 
					  std::map<unsigned, unsigned> LaneVGPRs;
 | 
				
			||||||
 | 
					  unsigned LDSWaveSpillSize;
 | 
				
			||||||
 | 
					  bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
 | 
				
			||||||
 | 
					  unsigned getTIDReg() const { return TIDReg; };
 | 
				
			||||||
 | 
					  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // End namespace llvm
 | 
					} // End namespace llvm
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 | 
				
			|||||||
  Reserved.set(AMDGPU::EXEC);
 | 
					  Reserved.set(AMDGPU::EXEC);
 | 
				
			||||||
  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
 | 
					  Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
 | 
				
			||||||
  Reserved.set(AMDGPU::FLAT_SCR);
 | 
					  Reserved.set(AMDGPU::FLAT_SCR);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
 | 
				
			||||||
 | 
					  Reserved.set(AMDGPU::VGPR255);
 | 
				
			||||||
 | 
					  Reserved.set(AMDGPU::VGPR254);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return Reserved;
 | 
					  return Reserved;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -51,18 +56,31 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
 | 
				
			|||||||
  switch (Op) {
 | 
					  switch (Op) {
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S512_SAVE:
 | 
					  case AMDGPU::SI_SPILL_S512_SAVE:
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S512_RESTORE:
 | 
					  case AMDGPU::SI_SPILL_S512_RESTORE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V512_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V512_RESTORE:
 | 
				
			||||||
    return 16;
 | 
					    return 16;
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S256_SAVE:
 | 
					  case AMDGPU::SI_SPILL_S256_SAVE:
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S256_RESTORE:
 | 
					  case AMDGPU::SI_SPILL_S256_RESTORE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V256_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V256_RESTORE:
 | 
				
			||||||
    return 8;
 | 
					    return 8;
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S128_SAVE:
 | 
					  case AMDGPU::SI_SPILL_S128_SAVE:
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S128_RESTORE:
 | 
					  case AMDGPU::SI_SPILL_S128_RESTORE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V128_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V128_RESTORE:
 | 
				
			||||||
    return 4;
 | 
					    return 4;
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V96_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V96_RESTORE:
 | 
				
			||||||
 | 
					    return 3;
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S64_SAVE:
 | 
					  case AMDGPU::SI_SPILL_S64_SAVE:
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S64_RESTORE:
 | 
					  case AMDGPU::SI_SPILL_S64_RESTORE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V64_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V64_RESTORE:
 | 
				
			||||||
    return 2;
 | 
					    return 2;
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S32_SAVE:
 | 
					  case AMDGPU::SI_SPILL_S32_SAVE:
 | 
				
			||||||
  case AMDGPU::SI_SPILL_S32_RESTORE:
 | 
					  case AMDGPU::SI_SPILL_S32_RESTORE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V32_SAVE:
 | 
				
			||||||
 | 
					  case AMDGPU::SI_SPILL_V32_RESTORE:
 | 
				
			||||||
    return 1;
 | 
					    return 1;
 | 
				
			||||||
  default: llvm_unreachable("Invalid spill opcode");
 | 
					  default: llvm_unreachable("Invalid spill opcode");
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -139,6 +157,81 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 | 
				
			|||||||
      break;
 | 
					      break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // VGPR register spill
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V512_SAVE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V256_SAVE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V128_SAVE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V96_SAVE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V64_SAVE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V32_SAVE: {
 | 
				
			||||||
 | 
					      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
 | 
				
			||||||
 | 
					      unsigned SrcReg = MI->getOperand(0).getReg();
 | 
				
			||||||
 | 
					      int64_t Offset = FrameInfo->getObjectOffset(Index);
 | 
				
			||||||
 | 
					      unsigned Size = NumSubRegs * 4;
 | 
				
			||||||
 | 
					      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
 | 
				
			||||||
 | 
					        unsigned SubReg = NumSubRegs > 1 ?
 | 
				
			||||||
 | 
					            getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
 | 
				
			||||||
 | 
					            SrcReg;
 | 
				
			||||||
 | 
					        Offset += (i * 4);
 | 
				
			||||||
 | 
					        MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
 | 
				
			||||||
 | 
					                                                         Offset, Size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (AddrReg == AMDGPU::NoRegister) {
 | 
				
			||||||
 | 
					           LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
				
			||||||
 | 
					           Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
 | 
				
			||||||
 | 
					           AddrReg = AMDGPU::VGPR0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Store the value in LDS
 | 
				
			||||||
 | 
					        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
 | 
				
			||||||
 | 
					                .addImm(0) // gds
 | 
				
			||||||
 | 
					                .addReg(AddrReg, RegState::Kill) // addr
 | 
				
			||||||
 | 
					                .addReg(SubReg) // data0
 | 
				
			||||||
 | 
					                .addImm(0); // offset
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      MI->eraseFromParent();
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V32_RESTORE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V64_RESTORE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V128_RESTORE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V256_RESTORE:
 | 
				
			||||||
 | 
					    case AMDGPU::SI_SPILL_V512_RESTORE: {
 | 
				
			||||||
 | 
					      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
 | 
				
			||||||
 | 
					      unsigned DstReg = MI->getOperand(0).getReg();
 | 
				
			||||||
 | 
					      int64_t Offset = FrameInfo->getObjectOffset(Index);
 | 
				
			||||||
 | 
					      unsigned Size = NumSubRegs * 4;
 | 
				
			||||||
 | 
					      unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // FIXME: We could use DS_READ_B64 here to optimize for larger registers.
 | 
				
			||||||
 | 
					      for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
 | 
				
			||||||
 | 
					        unsigned SubReg = NumSubRegs > 1 ?
 | 
				
			||||||
 | 
					            getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
 | 
				
			||||||
 | 
					            DstReg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Offset += (i * 4);
 | 
				
			||||||
 | 
					        unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
 | 
				
			||||||
 | 
					                                                          Offset, Size);
 | 
				
			||||||
 | 
					        if (AddrReg == AMDGPU::NoRegister) {
 | 
				
			||||||
 | 
					           LLVMContext &Ctx = MF->getFunction()->getContext();
 | 
				
			||||||
 | 
					           Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
 | 
				
			||||||
 | 
					           AddrReg = AMDGPU::VGPR0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
 | 
				
			||||||
 | 
					                .addImm(0) // gds
 | 
				
			||||||
 | 
					                .addReg(AddrReg, RegState::Kill) // addr
 | 
				
			||||||
 | 
					                .addImm(0); //offset
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      MI->eraseFromParent();
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    default: {
 | 
					    default: {
 | 
				
			||||||
      int64_t Offset = FrameInfo->getObjectOffset(Index);
 | 
					      int64_t Offset = FrameInfo->getObjectOffset(Index);
 | 
				
			||||||
      FIOp.ChangeToImmediate(Offset);
 | 
					      FIOp.ChangeToImmediate(Offset);
 | 
				
			||||||
@@ -173,8 +266,12 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
 | 
				
			|||||||
    &AMDGPU::SReg_32RegClass,
 | 
					    &AMDGPU::SReg_32RegClass,
 | 
				
			||||||
    &AMDGPU::VReg_64RegClass,
 | 
					    &AMDGPU::VReg_64RegClass,
 | 
				
			||||||
    &AMDGPU::SReg_64RegClass,
 | 
					    &AMDGPU::SReg_64RegClass,
 | 
				
			||||||
 | 
					    &AMDGPU::VReg_96RegClass,
 | 
				
			||||||
 | 
					    &AMDGPU::VReg_128RegClass,
 | 
				
			||||||
    &AMDGPU::SReg_128RegClass,
 | 
					    &AMDGPU::SReg_128RegClass,
 | 
				
			||||||
    &AMDGPU::SReg_256RegClass
 | 
					    &AMDGPU::VReg_256RegClass,
 | 
				
			||||||
 | 
					    &AMDGPU::SReg_256RegClass,
 | 
				
			||||||
 | 
					    &AMDGPU::VReg_512RegClass
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for (const TargetRegisterClass *BaseClass : BaseClasses) {
 | 
					  for (const TargetRegisterClass *BaseClass : BaseClasses) {
 | 
				
			||||||
@@ -335,3 +432,19 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  llvm_unreachable("unexpected preloaded value type");
 | 
					  llvm_unreachable("unexpected preloaded value type");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// \brief Returns a register that is not used at any point in the function.
 | 
				
			||||||
 | 
					///        If all registers are used, then this function will return
 | 
				
			||||||
 | 
					//         AMDGPU::NoRegister.
 | 
				
			||||||
 | 
					unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
 | 
				
			||||||
 | 
					       I != E; ++I) {
 | 
				
			||||||
 | 
					    if (!MRI.isPhysRegUsed(*I))
 | 
				
			||||||
 | 
					      return *I;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return AMDGPU::NoRegister;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -100,6 +100,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
 | 
				
			|||||||
  unsigned getPreloadedValue(const MachineFunction &MF,
 | 
					  unsigned getPreloadedValue(const MachineFunction &MF,
 | 
				
			||||||
                             enum PreloadedValue Value) const;
 | 
					                             enum PreloadedValue Value) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // End namespace llvm
 | 
					} // End namespace llvm
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user