R600: Simplify handling of private address space

The AMDGPUIndirectAddressing pass was previously responsible for
lowering private loads and stores to indirect addressing instructions.
However, this pass was buggy and way too complicated.  The only
advantage it had over the new simplified code was that it saved one
instruction per direct write to private memory.  This optimization
likely has a minimal impact on performance, and we may be able
to duplicate it using some other transformation.

For the private address space, we now:
1. Lower private loads/store to Register(Load|Store) instructions
2. Reserve part of the register file as 'private memory'
3. After regalloc lower the Register(Load|Store) instructions to
   MOV instructions that use indirect addressing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193179 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2013-10-22 18:19:10 +00:00
parent 34adeaf8b9
commit 04c559569f
14 changed files with 134 additions and 436 deletions

View File

@ -45,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.

View File

@ -1,345 +0,0 @@
//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// Instructions can use indirect addressing to index the register file as if it
/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
/// to either a COPY or a MOV that uses indirect addressing.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
private:
static char ID;
const AMDGPUInstrInfo *TII;
bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
public:
AMDGPUIndirectAddressingPass(TargetMachine &tm) :
MachineFunctionPass(ID),
TII(0)
{ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "R600 Handle indirect addressing"; }
};
} // End anonymous namespace
char AMDGPUIndirectAddressingPass::ID = 0;
FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
return new AMDGPUIndirectAddressingPass(tm);
}
bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo());
int IndirectBegin = TII->getIndirectIndexBegin(MF);
int IndirectEnd = TII->getIndirectIndexEnd(MF);
if (IndirectBegin == -1) {
// No indirect addressing, we can skip this pass
assert(IndirectEnd == -1);
return false;
}
// The map keeps track of the indirect address that is represented by
// each virtual register. The key is the register and the value is the
// indirect address it uses.
std::map<unsigned, unsigned> RegisterAddressMap;
// First pass - Lower all of the RegisterStore instructions and track which
// registers are live.
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
// This map keeps track of the current live indirect registers.
// The key is the address and the value is the register
std::map<unsigned, unsigned> LiveAddressRegisterMap;
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next) {
Next = llvm::next(I);
MachineInstr &MI = *I;
if (!TII->isRegisterStore(MI)) {
continue;
}
// Lower RegisterStore
unsigned RegIndex = MI.getOperand(2).getImm();
unsigned Channel = MI.getOperand(3).getImm();
unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
const TargetRegisterClass *IndirectStoreRegClass =
TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
// Direct register access.
unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
.addOperand(MI.getOperand(0));
RegisterAddressMap[DstReg] = Address;
LiveAddressRegisterMap[Address] = DstReg;
} else {
// Indirect register access.
MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
MI.getOperand(0).getReg(), // Value
Address,
MI.getOperand(1).getReg()); // Offset
for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
unsigned Addr = TII->calculateIndirectAddress(i, Channel);
unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
RegisterAddressMap[DstReg] = Addr;
LiveAddressRegisterMap[Addr] = DstReg;
}
}
MI.eraseFromParent();
}
// Update the live-ins of the succesor blocks
for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
SuccEnd = MBB.succ_end();
SuccEnd != Succ; ++Succ) {
std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
for (Key = LiveAddressRegisterMap.begin(),
KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
(*Succ)->addLiveIn(Key->second);
}
}
}
// Second pass - Lower the RegisterLoad instructions
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
// Key is the address and the value is the register
std::map<unsigned, unsigned> LiveAddressRegisterMap;
MachineBasicBlock &MBB = *BB;
MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
while (LI != MBB.livein_end()) {
std::vector<unsigned> PhiRegisters;
// Make sure this live in is used for indirect addressing
if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
++LI;
continue;
}
unsigned Address = RegisterAddressMap[*LI];
LiveAddressRegisterMap[Address] = *LI;
PhiRegisters.push_back(*LI);
// Check if there are other live in registers which map to the same
// indirect address.
for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
LE = MBB.livein_end();
LJ != LE; ++LJ) {
unsigned Reg = *LJ;
if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
continue;
}
if (RegisterAddressMap[Reg] == Address) {
PhiRegisters.push_back(Reg);
}
}
if (PhiRegisters.size() == 1) {
// We don't need to insert a Phi instruction, so we can just add the
// registers to the live list for the block.
LiveAddressRegisterMap[Address] = *LI;
MBB.removeLiveIn(*LI);
} else {
// We need to insert a PHI, because we have the same address being
// written in multiple predecessor blocks.
const TargetRegisterClass *PhiDstClass =
TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
MBB.findDebugLoc(MBB.begin()),
TII->get(AMDGPU::PHI), PhiDstReg);
for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
RE = PhiRegisters.end();
RI != RE; ++RI) {
unsigned Reg = *RI;
MachineInstr *DefInst = MRI.getVRegDef(Reg);
assert(DefInst);
MachineBasicBlock *RegBlock = DefInst->getParent();
Phi.addReg(Reg);
Phi.addMBB(RegBlock);
MBB.removeLiveIn(Reg);
}
RegisterAddressMap[PhiDstReg] = Address;
LiveAddressRegisterMap[Address] = PhiDstReg;
}
LI = MBB.livein_begin();
}
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next) {
Next = llvm::next(I);
MachineInstr &MI = *I;
if (!TII->isRegisterLoad(MI)) {
if (MI.getOpcode() == AMDGPU::PHI) {
continue;
}
// Check for indirect register defs
for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
OpIdx < NumOperands; ++OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
if (MO.isReg() && MO.isDef() &&
RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
unsigned Reg = MO.getReg();
unsigned LiveAddress = RegisterAddressMap[Reg];
// Chain the live-ins
if (LiveAddressRegisterMap.find(LiveAddress) !=
LiveAddressRegisterMap.end()) {
MI.addOperand(MachineOperand::CreateReg(
LiveAddressRegisterMap[LiveAddress],
false, // isDef
true, // isImp
true)); // isKill
}
LiveAddressRegisterMap[LiveAddress] = Reg;
}
}
continue;
}
const TargetRegisterClass *SuperIndirectRegClass =
TII->getSuperIndirectRegClass();
const TargetRegisterClass *IndirectLoadRegClass =
TII->getIndirectAddrLoadRegClass();
unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
unsigned RegIndex = MI.getOperand(2).getImm();
unsigned Channel = MI.getOperand(3).getImm();
unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
// Direct register access
unsigned Reg = LiveAddressRegisterMap[Address];
unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
if (regHasExplicitDef(MRI, Reg)) {
// If the register we are reading from has an explicit def, then that
// means it was written via a direct register access (i.e. COPY
// or other instruction that doesn't use indirect addressing). In
// this case we know where the value has been stored, so we can just
// issue a copy.
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
MI.getOperand(0).getReg())
.addReg(Reg);
} else {
// If the register we are reading has an implicit def, then that
// means it was written by an indirect register access (i.e. An
// instruction that uses indirect addressing.
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
MI.getOperand(0).getReg())
.addReg(AddrReg)
.addReg(Reg, RegState::Implicit);
}
} else {
// Indirect register access
// Note on REQ_SEQUENCE instructions: You can't actually use the register
// it defines unless you have an instruction that takes the defined
// register class as an operand.
MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
TII->get(AMDGPU::REG_SEQUENCE),
IndirectReg);
for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
unsigned Addr = TII->calculateIndirectAddress(i, Channel);
if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
continue;
}
unsigned Reg = LiveAddressRegisterMap[Addr];
// We only need to use REG_SEQUENCE for explicit defs, since the
// register coalescer won't do anything with the implicit defs.
if (!regHasExplicitDef(MRI, Reg)) {
continue;
}
// Insert a REQ_SEQUENCE instruction to force the register allocator
// to allocate the virtual register to the correct physical register.
Sequence.addReg(LiveAddressRegisterMap[Addr]);
Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
}
MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
MI.getOperand(0).getReg(), // Value
Address,
MI.getOperand(1).getReg()); // Offset
Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
}
MI.eraseFromParent();
}
}
return false;
}
bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
unsigned Reg) const {
MachineInstr *DefInstr = MRI.getVRegDef(Reg);
if (!DefInstr) {
return false;
}
if (DefInstr->getOpcode() == AMDGPU::PHI) {
bool Explicit = false;
for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
E = DefInstr->operands_end();
I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg() || MO.isDef()) {
continue;
}
Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
}
return Explicit;
}
return DefInstr->getOperand(0).isReg() &&
DefInstr->getOperand(0).getReg() == Reg;
}

View File

@ -118,6 +118,46 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(!"Not Implemented");
}
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
MachineBasicBlock *MBB = MI->getParent();
switch(MI->getOpcode()) {
default:
if (isRegisterLoad(*MI)) {
unsigned RegIndex = MI->getOperand(2).getImm();
unsigned Channel = MI->getOperand(3).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI->getOperand(1).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
getIndirectAddrRegClass()->getRegister(Address));
} else {
buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
Address, OffsetReg);
}
} else if (isRegisterStore(*MI)) {
unsigned RegIndex = MI->getOperand(2).getImm();
unsigned Channel = MI->getOperand(3).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI->getOperand(1).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
MI->getOperand(0).getReg());
} else {
buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
calculateIndirectAddress(RegIndex, Channel),
OffsetReg);
}
} else {
return false;
}
}
MBB->erase(MI);
return true;
}
MachineInstr *
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,

View File

@ -87,6 +87,8 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
@ -160,14 +162,9 @@ public:
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const = 0;
/// \returns The register class to be used for storing values to an
/// "Indirect Address" .
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
unsigned SourceReg) const = 0;
/// \returns The register class to be used for loading values from
/// an "Indirect Address" .
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
/// \returns The register class to be used for loading and storing values
/// from an "Indirect Address" .
virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
/// \brief Build instruction(s) for an indirect register write.
///
@ -185,19 +182,21 @@ public:
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const = 0;
/// \returns the register class whose sub registers are the set of all
/// possible registers that can be used for indirect addressing.
virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
/// \brief Build a MOV instruction.
virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const = 0;
/// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
/// equivalent opcode that writes \p Channels Channels.
int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
};
namespace AMDGPU {

View File

@ -139,12 +139,6 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
// This callbacks this pass uses are not implemented yet on SI.
addPass(createAMDGPUIndirectAddressingPass(*TM));
}
return false;
}

View File

@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
AMDILISelLowering.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
AMDGPUIndirectAddressing.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp

View File

@ -210,6 +210,14 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
}
bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
if (!isALUInstr(MI->getOpcode())) {
return false;
@ -1086,13 +1094,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
return RegIndex;
}
const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
unsigned SourceReg) const {
return &AMDGPU::R600_TReg32RegClass;
}
const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
return &AMDGPU::TRegMemRegClass;
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@ -1131,10 +1134,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
return Mov;
}
const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
return &AMDGPU::IndirectRegRegClass;
}
unsigned R600InstrInfo::getMaxAlusPerClause() const {
return 115;
}
@ -1272,6 +1271,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
return MovImm;
}
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const {
return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}

View File

@ -82,6 +82,8 @@ namespace llvm {
bool usesTextureCache(const MachineInstr *MI) const;
bool mustBeLastInClause(unsigned Opcode) const;
bool usesAddressRegister(MachineInstr *MI) const;
bool definesAddressRegister(MachineInstr *MI) const;
bool readsLDSSrcReg(const MachineInstr *MI) const;
/// \returns The operand index for the given source number. Legal values
@ -203,10 +205,7 @@ namespace llvm {
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
unsigned SourceReg) const;
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@ -218,8 +217,6 @@ namespace llvm {
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const;
virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
unsigned getMaxAlusPerClause() const;
///buildDefaultInstruction - This function returns a MachineInstr with
@ -246,6 +243,10 @@ namespace llvm {
unsigned DstReg,
uint64_t Imm) const;
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const;
/// \brief Get the index of Op in the MachineInstr.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.

View File

@ -206,6 +206,14 @@ public:
return false;
}
}
bool ARDef = TII->definesAddressRegister(MII) ||
TII->definesAddressRegister(MIJ);
bool ARUse = TII->usesAddressRegister(MII) ||
TII->usesAddressRegister(MIJ);
if (ARDef && ARUse)
return false;
return true;
}

View File

@ -41,18 +41,13 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
E = AMDGPU::TRegMemRegClass.end();
I != E; ++I) {
Reserved.set(*I);
}
const R600InstrInfo *RII =
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);

View File

@ -39,8 +39,6 @@ foreach Index = 0-127 in {
// Indirect addressing offset registers
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
Index, Chan>;
def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
@ -210,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add (sequence "T%u_XY", 0, 63))>;
//===----------------------------------------------------------------------===//
// Register classes for indirect addressing
//===----------------------------------------------------------------------===//
// Super register for all the Indirect Registers. This register class is used
// by the REG_SEQUENCE instruction to specify the registers to use for direct
// reads / writes which may be written / read by an indirect address.
class IndirectSuper<string n, list<Register> subregs> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices =
[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
}
def IndirectSuperReg : IndirectSuper<"Indirect",
[TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
>;
def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
// This register class defines the registers that are the storage units for
// the "Indirect Addressing" pseudo memory space.
// XXX: Only use the X channel, until we support wider stack widths
def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "TRegMem%u_X", 0, 16))
>;

View File

@ -197,6 +197,13 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg,
unsigned SrcReg) const {
assert(!"Not Implemented");
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
switch(Opcode) {
default: return false;
@ -346,12 +353,7 @@ int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
llvm_unreachable("Unimplemented");
}
const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
unsigned SourceReg) const {
llvm_unreachable("Unimplemented");
}
const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
llvm_unreachable("Unimplemented");
}
@ -370,7 +372,3 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
unsigned Address, unsigned OffsetReg) const {
llvm_unreachable("Unimplemented");
}
const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
llvm_unreachable("Unimplemented");
}

View File

@ -41,6 +41,9 @@ public:
bool NewMI=false) const;
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const;
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
@ -62,10 +65,7 @@ public:
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
unsigned SourceReg) const;
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@ -78,8 +78,6 @@ public:
unsigned ValueReg,
unsigned Address,
unsigned OffsetReg) const;
virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
};
namespace AMDGPU {

View File

@ -63,3 +63,42 @@ entry:
store i32 %0, i32 addrspace(1)* %out
ret void
}
; Test direct access of a private array inside a loop. The private array
; loads and stores should be lowered to copies, so there shouldn't be any
; MOVA instructions.
; CHECK: @direct_loop
; CHECK-NOT: MOVA_INT
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%prv_array_const = alloca [2 x i32]
%prv_array = alloca [2 x i32]
%a = load i32 addrspace(1)* %in
%b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
%b = load i32 addrspace(1)* %b_src_ptr
%a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
store i32 %a, i32* %a_dst_ptr
%b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
store i32 %b, i32* %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
%x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
%x = load i32* %x_ptr
%y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
%y = load i32* %y_ptr
%xy = add i32 %x, %y
store i32 %xy, i32* %y_ptr
%count = add i32 %inc, 1
%done = icmp eq i32 %count, 4095
br i1 %done, label %for.end, label %for.body
for.end:
%value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
%value = load i32* %value_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}