mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-03 18:32:50 +00:00
R600/SI: Add support for private address space load/store
Private address space is emulated using the register file with MOVRELS and MOVRELD instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194626 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b52bf6a3b3
commit
a2b4eb6d15
@ -309,6 +309,40 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
||||
SDLoc(N), N->getValueType(0), Ops);
|
||||
}
|
||||
case AMDGPUISD::REGISTER_LOAD: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
break;
|
||||
SDValue Addr, Offset;
|
||||
|
||||
SelectADDRIndirect(N->getOperand(1), Addr, Offset);
|
||||
const SDValue Ops[] = {
|
||||
Addr,
|
||||
Offset,
|
||||
CurDAG->getTargetConstant(0, MVT::i32),
|
||||
N->getOperand(0),
|
||||
};
|
||||
return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
|
||||
CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
|
||||
Ops);
|
||||
}
|
||||
case AMDGPUISD::REGISTER_STORE: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
break;
|
||||
SDValue Addr, Offset;
|
||||
SelectADDRIndirect(N->getOperand(2), Addr, Offset);
|
||||
const SDValue Ops[] = {
|
||||
N->getOperand(1),
|
||||
Addr,
|
||||
Offset,
|
||||
CurDAG->getTargetConstant(0, MVT::i32),
|
||||
N->getOperand(0),
|
||||
};
|
||||
return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
|
||||
CurDAG->getVTList(MVT::Other),
|
||||
Ops);
|
||||
}
|
||||
}
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
@ -250,8 +251,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
// AMDGPU DAG lowering
|
||||
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
|
||||
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
|
||||
}
|
||||
@ -326,6 +327,21 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
|
||||
&Args[0], Args.size());
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const AMDGPUFrameLowering *TFL =
|
||||
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
|
||||
|
||||
FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
|
||||
assert(FIN);
|
||||
|
||||
unsigned FrameIndex = FIN->getIndex();
|
||||
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
|
||||
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
|
||||
Op.getValueType());
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -563,7 +579,8 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
||||
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
Store->getValue().getValueType().isVector()) {
|
||||
return SplitVectorStore(Op, DAG);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ private:
|
||||
void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &Args,
|
||||
unsigned Start, unsigned Count) const;
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -120,31 +120,43 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
|
||||
bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
int OffsetOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
|
||||
// addr is a custom operand with multiple MI operands, and only the
|
||||
// first MI operand is given a name.
|
||||
int RegOpIdx = OffsetOpIdx + 1;
|
||||
int ChanOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
|
||||
|
||||
if (isRegisterLoad(*MI)) {
|
||||
unsigned RegIndex = MI->getOperand(2).getImm();
|
||||
unsigned Channel = MI->getOperand(3).getImm();
|
||||
int DstOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
|
||||
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
|
||||
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
|
||||
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
|
||||
unsigned OffsetReg = MI->getOperand(1).getReg();
|
||||
unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
|
||||
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
buildMovInstr(MBB, MI, MI->getOperand(0).getReg(),
|
||||
buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
|
||||
getIndirectAddrRegClass()->getRegister(Address));
|
||||
} else {
|
||||
buildIndirectRead(MBB, MI, MI->getOperand(0).getReg(),
|
||||
buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
|
||||
Address, OffsetReg);
|
||||
}
|
||||
} else if (isRegisterStore(*MI)) {
|
||||
unsigned RegIndex = MI->getOperand(2).getImm();
|
||||
unsigned Channel = MI->getOperand(3).getImm();
|
||||
int ValOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
|
||||
AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
|
||||
unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
|
||||
unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
|
||||
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
|
||||
unsigned OffsetReg = MI->getOperand(1).getReg();
|
||||
unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
|
||||
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
|
||||
MI->getOperand(0).getReg());
|
||||
MI->getOperand(ValOpIdx).getReg());
|
||||
} else {
|
||||
buildIndirectWrite(MBB, MI, MI->getOperand(0).getReg(),
|
||||
calculateIndirectAddress(RegIndex, Channel),
|
||||
OffsetReg);
|
||||
buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
|
||||
calculateIndirectAddress(RegIndex, Channel),
|
||||
OffsetReg);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
@ -260,6 +272,57 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
|
||||
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
|
||||
}
|
||||
|
||||
int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
int Offset = -1;
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (MRI.livein_empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
|
||||
for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
|
||||
LE = MRI.livein_end();
|
||||
LI != LE; ++LI) {
|
||||
unsigned Reg = LI->first;
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg) ||
|
||||
!IndirectRC->contains(Reg))
|
||||
continue;
|
||||
|
||||
unsigned RegIndex;
|
||||
unsigned RegEnd;
|
||||
for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
|
||||
++RegIndex) {
|
||||
if (IndirectRC->getRegister(RegIndex) == Reg)
|
||||
break;
|
||||
}
|
||||
Offset = std::max(Offset, (int)RegIndex);
|
||||
}
|
||||
|
||||
return Offset + 1;
|
||||
}
|
||||
|
||||
int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
|
||||
int Offset = 0;
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Variable sized objects are not supported
|
||||
assert(!MFI->hasVarSizedObjects());
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
|
||||
|
||||
return getIndirectIndexBegin(MF) + Offset;
|
||||
}
|
||||
|
||||
|
||||
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const {
|
||||
|
@ -99,6 +99,14 @@ protected:
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr *LoadMI) const;
|
||||
/// \returns the smallest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||
|
||||
/// \returns the largest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||
|
||||
public:
|
||||
bool canFoldMemoryOperand(const MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops) const;
|
||||
@ -144,14 +152,6 @@ public:
|
||||
virtual unsigned getIEQOpcode() const = 0;
|
||||
virtual bool isMov(unsigned opcode) const = 0;
|
||||
|
||||
/// \returns the smallest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
|
||||
|
||||
/// \returns the largest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
|
||||
|
||||
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
|
||||
/// \p Channel
|
||||
///
|
||||
|
@ -35,6 +35,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
||||
}
|
||||
|
||||
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
|
||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||
|
||||
def COND_EQ : PatLeaf <
|
||||
(cond),
|
||||
@ -277,6 +278,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
||||
|
||||
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
||||
ComplexPattern addrPat> {
|
||||
let UseNamedOperandTable = 1 in {
|
||||
|
||||
def RegisterLoad : AMDGPUShaderInst <
|
||||
(outs dstClass:$dst),
|
||||
(ins addrClass:$addr, i32imm:$chan),
|
||||
@ -295,6 +298,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
||||
let isRegisterStore = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // End isCodeGenOnly = 1, isPseudo = 1
|
||||
|
||||
|
@ -50,6 +50,10 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
|
||||
assert(!"Unimplemented"); return NULL;
|
||||
}
|
||||
|
||||
virtual unsigned getHWRegIndex(unsigned Reg) const {
|
||||
assert(!"Unimplemented"); return 0;
|
||||
}
|
||||
|
||||
/// \returns the sub reg enum value for the given \p Channel
|
||||
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
|
||||
unsigned getSubRegFromChannel(unsigned Channel) const;
|
||||
|
@ -519,7 +519,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
||||
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
|
||||
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
|
||||
case ISD::INTRINSIC_VOID: {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
@ -843,20 +842,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
false, false, false, 0);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const AMDGPUFrameLowering *TFL =
|
||||
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
|
||||
|
||||
FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
|
||||
assert(FIN);
|
||||
|
||||
unsigned FrameIndex = FIN->getIndex();
|
||||
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
|
||||
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
|
||||
}
|
||||
|
||||
bool R600TargetLowering::isZero(SDValue Op) const {
|
||||
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return Cst->isNullValue();
|
||||
|
@ -59,7 +59,6 @@ private:
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
|
||||
|
@ -1024,67 +1024,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||
return 2;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
int Offset = 0;
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (MRI.livein_empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
|
||||
LE = MRI.livein_end();
|
||||
LI != LE; ++LI) {
|
||||
Offset = std::max(Offset,
|
||||
GET_REG_INDEX(RI.getEncodingValue(LI->first)));
|
||||
}
|
||||
|
||||
return Offset + 1;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
|
||||
int Offset = 0;
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Variable sized objects are not supported
|
||||
assert(!MFI->hasVarSizedObjects());
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
|
||||
|
||||
return getIndirectIndexBegin(MF) + Offset;
|
||||
}
|
||||
|
||||
std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
|
||||
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
|
||||
const MachineFunction &MF) const {
|
||||
const AMDGPUFrameLowering *TFL =
|
||||
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
|
||||
std::vector<unsigned> Regs;
|
||||
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
int End = getIndirectIndexEnd(MF);
|
||||
|
||||
if (End == -1) {
|
||||
return Regs;
|
||||
}
|
||||
if (End == -1)
|
||||
return;
|
||||
|
||||
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
|
||||
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
|
||||
Regs.push_back(SuperReg);
|
||||
Reserved.set(SuperReg);
|
||||
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
|
||||
Regs.push_back(Reg);
|
||||
Reserved.set(Reg);
|
||||
}
|
||||
}
|
||||
return Regs;
|
||||
}
|
||||
|
||||
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
|
||||
|
@ -193,14 +193,9 @@ namespace llvm {
|
||||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||
SDNode *Node) const { return 1;}
|
||||
|
||||
/// \returns a list of all the registers that may be accesed using indirect
|
||||
/// addressing.
|
||||
std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||
|
||||
/// \brief Reserve the registers that may be accesed using indirect addressing.
|
||||
void reserveIndirectRegisters(BitVector &Reserved,
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const;
|
||||
|
@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
|
||||
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
|
||||
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
|
||||
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
|
||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||
|
||||
|
||||
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
|
||||
|
@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
|
||||
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
BitVector Reserved(getNumRegs());
|
||||
|
||||
const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
|
||||
Reserved.set(AMDGPU::ZERO);
|
||||
Reserved.set(AMDGPU::HALF);
|
||||
Reserved.set(AMDGPU::ONE);
|
||||
@ -48,14 +50,8 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
const R600InstrInfo *RII =
|
||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
|
||||
for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
|
||||
E = IndirectRegs.end();
|
||||
I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
TII->reserveIndirectRegisters(Reserved, MF);
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
@ -73,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
|
||||
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
|
||||
return GET_REG_INDEX(getEncodingValue(Reg));
|
||||
}
|
||||
|
||||
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
||||
MVT VT) const {
|
||||
switch(VT.SimpleTy) {
|
||||
|
@ -39,6 +39,8 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
|
||||
/// \brief get the HW encoding for a register's channel.
|
||||
unsigned getHWRegChan(unsigned reg) const;
|
||||
|
||||
virtual unsigned getHWRegIndex(unsigned Reg) const;
|
||||
|
||||
/// \brief get the register class of the specified type to use in the
|
||||
/// CFGStructurizer
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||
|
@ -75,6 +75,19 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||
|
||||
// We need to custom lower loads/stores from private memory
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i64, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i128, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
@ -95,6 +108,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
|
||||
|
||||
@ -106,6 +120,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::SELECT_CC);
|
||||
|
||||
@ -122,6 +137,8 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
bool *IsFast) const {
|
||||
// XXX: This depends on the address space and also we may want to revist
|
||||
// the alignment values we specify in the DataLayout.
|
||||
if (!VT.isSimple() || VT == MVT::Other)
|
||||
return false;
|
||||
return VT.bitsGT(MVT::i32);
|
||||
}
|
||||
|
||||
@ -350,6 +367,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::SI_RegisterStorePseudo: {
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
|
||||
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
|
||||
Reg);
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
|
||||
MIB.addOperand(MI->getOperand(i));
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
return BB;
|
||||
}
|
||||
@ -395,7 +425,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
|
||||
if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
||||
if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
Op.getValueType().isVector()) {
|
||||
SDValue MergedValues[2] = {
|
||||
SplitVectorLoad(Op, DAG),
|
||||
@ -403,20 +434,13 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
};
|
||||
return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
|
||||
} else {
|
||||
return SDValue();
|
||||
return LowerLOAD(Op, DAG);
|
||||
}
|
||||
}
|
||||
case ISD::STORE: {
|
||||
StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
|
||||
if (Store->getValue().getValueType().isVector() &&
|
||||
Store->getValue().getValueType().getVectorNumElements() >= 8)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
else
|
||||
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
}
|
||||
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
|
||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||
case ISD::ANY_EXTEND: // Fall-through
|
||||
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
|
||||
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
|
||||
@ -628,6 +652,30 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
|
||||
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SDValue();
|
||||
|
||||
SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
|
||||
Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
|
||||
DAG.getConstant(2, MVT::i32));
|
||||
|
||||
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
|
||||
Load->getChain(), Ptr,
|
||||
DAG.getTargetConstant(0, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
SDValue MergedValues[2] = {
|
||||
Ret,
|
||||
Load->getChain()
|
||||
};
|
||||
return DAG.getMergeValues(MergedValues, 2, DL);
|
||||
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
@ -685,6 +733,56 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
|
||||
return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
EVT VT = Store->getMemoryVT();
|
||||
|
||||
SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
|
||||
if (Ret.getNode())
|
||||
return Ret;
|
||||
|
||||
if (VT.isVector() && VT.getVectorNumElements() >= 8)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
|
||||
if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SDValue();
|
||||
|
||||
SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
|
||||
DAG.getConstant(2, MVT::i32));
|
||||
SDValue Chain = Store->getChain();
|
||||
SmallVector<SDValue, 8> Values;
|
||||
|
||||
if (VT == MVT::i64) {
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
|
||||
Store->getValue(), DAG.getConstant(i, MVT::i32)));
|
||||
}
|
||||
} else if (VT == MVT::i128) {
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
for (unsigned j = 0; j < 2; ++j) {
|
||||
Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
|
||||
DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
|
||||
Store->getValue(), DAG.getConstant(i, MVT::i32)),
|
||||
DAG.getConstant(j, MVT::i32)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Values.push_back(Store->getValue());
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < Values.size(); ++i) {
|
||||
SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
|
||||
Ptr, DAG.getConstant(i, MVT::i32));
|
||||
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Values[i], PartPtr,
|
||||
DAG.getTargetConstant(0, MVT::i32));
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
|
||||
SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
@ -25,8 +25,10 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue Chain, unsigned Offset) const;
|
||||
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
@ -186,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
|
||||
|
||||
RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
|
||||
|
||||
if (!Op.isReg())
|
||||
if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
|
||||
return std::make_pair(0, 0);
|
||||
|
||||
unsigned Reg = Op.getReg();
|
||||
|
@ -230,7 +230,8 @@ MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg,
|
||||
unsigned SrcReg) const {
|
||||
llvm_unreachable("Not Implemented");
|
||||
return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
|
||||
DstReg) .addReg(SrcReg);
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isMov(unsigned Opcode) const {
|
||||
@ -603,17 +604,8 @@ unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
|
||||
return RegIndex;
|
||||
}
|
||||
|
||||
|
||||
int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
return &AMDGPU::VReg_32RegClass;
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
|
||||
@ -621,7 +613,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address, unsigned OffsetReg) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
const DebugLoc &DL = MBB->findDebugLoc(I);
|
||||
unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
|
||||
getIndirectIndexBegin(*MBB->getParent()));
|
||||
|
||||
return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
|
||||
.addReg(IndirectBaseReg, RegState::Define)
|
||||
.addOperand(I->getOperand(0))
|
||||
.addReg(IndirectBaseReg)
|
||||
.addReg(OffsetReg)
|
||||
.addImm(0)
|
||||
.addReg(ValueReg);
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
|
||||
@ -629,5 +631,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address, unsigned OffsetReg) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
const DebugLoc &DL = MBB->findDebugLoc(I);
|
||||
unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
|
||||
getIndirectIndexBegin(*MBB->getParent()));
|
||||
|
||||
return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
|
||||
.addOperand(I->getOperand(0))
|
||||
.addOperand(I->getOperand(1))
|
||||
.addReg(IndirectBaseReg)
|
||||
.addReg(OffsetReg)
|
||||
.addImm(0);
|
||||
|
||||
}
|
||||
|
||||
void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
|
||||
const MachineFunction &MF) const {
|
||||
int End = getIndirectIndexEnd(MF);
|
||||
int Begin = getIndirectIndexBegin(MF);
|
||||
|
||||
if (End == -1)
|
||||
return;
|
||||
|
||||
|
||||
for (int Index = Begin; Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
|
||||
|
||||
for (int Index = std::max(0, Index - 1); Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
|
||||
|
||||
for (int Index = std::max(0, Index - 2); Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
|
||||
|
||||
for (int Index = std::max(0, Index - 3); Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
|
||||
|
||||
for (int Index = std::max(0, Index - 7); Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
|
||||
|
||||
for (int Index = std::max(0, Index - 15); Index <= End; ++Index)
|
||||
Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
|
||||
}
|
||||
|
@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
|
||||
MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned OffsetVGPR,
|
||||
unsigned MovRelOp,
|
||||
unsigned Dst,
|
||||
unsigned Src0) const;
|
||||
// If you add or remove instructions from this function, you will
|
||||
|
||||
public:
|
||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
@ -58,9 +66,6 @@ public:
|
||||
|
||||
virtual bool verifyInstruction(const MachineInstr *MI,
|
||||
StringRef &ErrInfo) const;
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||
|
||||
bool isSALUInstr(const MachineInstr &MI) const;
|
||||
unsigned getVALUOp(const MachineInstr &MI) const;
|
||||
@ -114,7 +119,12 @@ public:
|
||||
unsigned ValueReg,
|
||||
unsigned Address,
|
||||
unsigned OffsetReg) const;
|
||||
};
|
||||
void reserveIndirectRegisters(BitVector &Reserved,
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
|
||||
unsigned SavReg, unsigned IndexReg) const;
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
|
@ -121,6 +121,10 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def FRAMEri64 : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI assembler operands
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1293,6 +1293,36 @@ def SI_KILL : InstSI <
|
||||
|
||||
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
|
||||
|
||||
//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
|
||||
|
||||
let UseNamedOperandTable = 1 in {
|
||||
|
||||
def SI_RegisterLoad : AMDGPUShaderInst <
|
||||
(outs VReg_32:$dst, SReg_64:$temp),
|
||||
(ins FRAMEri64:$addr, i32imm:$chan),
|
||||
"", []
|
||||
> {
|
||||
let isRegisterLoad = 1;
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
class SIRegStore<dag outs> : AMDGPUShaderInst <
|
||||
outs,
|
||||
(ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
|
||||
"", []
|
||||
> {
|
||||
let isRegisterStore = 1;
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
def SI_RegisterStorePseudo : SIRegStore<(outs)>;
|
||||
} // End usesCustomInserter = 1
|
||||
def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
|
||||
|
||||
|
||||
} // End UseNamedOperandTable = 1
|
||||
|
||||
def SI_INDIRECT_SRC : InstSI <
|
||||
(outs VReg_32:$dst, SReg_64:$temp),
|
||||
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
|
||||
@ -1309,6 +1339,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
|
||||
let Constraints = "$src = $dst";
|
||||
}
|
||||
|
||||
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
|
||||
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
|
||||
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
|
||||
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
|
||||
@ -1988,7 +2019,7 @@ def : Pat<
|
||||
(V_CMP_U_F32_e64 $src0, $src1)
|
||||
>;
|
||||
|
||||
//============================================================================//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -1999,6 +2030,11 @@ def : Pat <
|
||||
(i32 (EXTRACT_SUBREG $x, sub1)), sub1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (trunc i64:$a)),
|
||||
(EXTRACT_SUBREG $a, sub0)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(or i64:$a, i64:$b),
|
||||
(INSERT_SUBREG
|
||||
|
@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Vec = MI.getOperand(2).getReg();
|
||||
unsigned Off = MI.getOperand(4).getImm();
|
||||
unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
|
||||
if (!SubReg)
|
||||
SubReg = Vec;
|
||||
|
||||
MachineInstr *MovRel =
|
||||
MachineInstr *MovRel =
|
||||
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
|
||||
.addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
|
||||
.addReg(SubReg + Off)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit)
|
||||
.addReg(Vec, RegState::Implicit);
|
||||
|
||||
@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Off = MI.getOperand(4).getImm();
|
||||
unsigned Val = MI.getOperand(5).getReg();
|
||||
unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
|
||||
if (!SubReg)
|
||||
SubReg = Dst;
|
||||
|
||||
MachineInstr *MovRel =
|
||||
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
|
||||
.addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
|
||||
.addReg(SubReg + Off, RegState::Define)
|
||||
.addReg(Val)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit)
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
@ -477,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
IndirectSrc(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_INDIRECT_DST_V1:
|
||||
case AMDGPU::SI_INDIRECT_DST_V2:
|
||||
case AMDGPU::SI_INDIRECT_DST_V4:
|
||||
case AMDGPU::SI_INDIRECT_DST_V8:
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "SIInstrInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -26,6 +27,9 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
|
||||
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
BitVector Reserved(getNumRegs());
|
||||
Reserved.set(AMDGPU::EXEC);
|
||||
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
|
||||
TII->reserveIndirectRegisters(Reserved, MF);
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
@ -51,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
|
||||
return getEncodingValue(Reg);
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
|
||||
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
|
||||
|
||||
|
@ -42,6 +42,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
|
||||
/// CFGStructurizer
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||
|
||||
virtual unsigned getHWRegIndex(unsigned Reg) const;
|
||||
|
||||
/// \brief Return the 'base' register class for this register.
|
||||
/// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
|
||||
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
|
||||
|
@ -299,8 +299,6 @@ entry:
|
||||
; R600-CHECK: 31
|
||||
; SI-CHECK-LABEL: @load_i64_sext
|
||||
; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
|
||||
; SI-CHECK: V_LSHL_B64 [[LSHL:v\[[0-9]:[0-9]\]]], [[VAL]], 32
|
||||
; SI-CHECK: V_ASHR_I64 v{{\[[0-9]:[0-9]\]}}, [[LSHL]], 32
|
||||
|
||||
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
entry:
|
||||
|
@ -1,16 +1,24 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
|
||||
|
||||
; This test checks that uses and defs of the AR register happen in the same
|
||||
; instruction clause.
|
||||
|
||||
; CHECK: @mova_same_clause
|
||||
; CHECK: MOVA_INT
|
||||
; CHECK-NOT: ALU clause
|
||||
; CHECK: 0 + AR.x
|
||||
; CHECK: MOVA_INT
|
||||
; CHECK-NOT: ALU clause
|
||||
; CHECK: 0 + AR.x
|
||||
; R600-CHECK-LABEL: @mova_same_clause
|
||||
; R600-CHECK: MOVA_INT
|
||||
; R600-CHECK-NOT: ALU clause
|
||||
; R600-CHECK: 0 + AR.x
|
||||
; R600-CHECK: MOVA_INT
|
||||
; R600-CHECK-NOT: ALU clause
|
||||
; R600-CHECK: 0 + AR.x
|
||||
|
||||
; SI-CHECK-LABEL: @mova_same_clause
|
||||
; SI-CHECK: V_READFIRSTLANE
|
||||
; SI-CHECK: V_MOVRELD
|
||||
; SI-CHECK: S_CBRANCH
|
||||
; SI-CHECK: V_READFIRSTLANE
|
||||
; SI-CHECK: V_MOVRELD
|
||||
; SI-CHECK: S_CBRANCH
|
||||
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
|
||||
entry:
|
||||
%stack = alloca [5 x i32], align 4
|
||||
@ -38,9 +46,10 @@ entry:
|
||||
; XXX: This generated code has unnecessary MOVs, we should be able to optimize
|
||||
; this.
|
||||
|
||||
; CHECK: @multiple_structs
|
||||
; CHECK-NOT: MOVA_INT
|
||||
|
||||
; R600-CHECK-LABEL: @multiple_structs
|
||||
; R600-CHECK-NOT: MOVA_INT
|
||||
; SI-CHECK-LABEL: @multiple_structs
|
||||
; SI-CHECK-NOT: V_MOVREL
|
||||
%struct.point = type { i32, i32 }
|
||||
|
||||
define void @multiple_structs(i32 addrspace(1)* %out) {
|
||||
@ -68,8 +77,10 @@ entry:
|
||||
; loads and stores should be lowered to copies, so there shouldn't be any
|
||||
; MOVA instructions.
|
||||
|
||||
; CHECK: @direct_loop
|
||||
; CHECK-NOT: MOVA_INT
|
||||
; R600-CHECK-LABLE: @direct_loop
|
||||
; R600-CHECK-NOT: MOVA_INT
|
||||
; SI-CHECK-LABEL: @direct_loop
|
||||
; SI-CHECK-NOT: V_MOVREL
|
||||
|
||||
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
entry:
|
@ -43,7 +43,7 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
|
||||
;EG-CHECK: ASHR
|
||||
|
||||
;SI-CHECK-LABEL: @ashr_i64
|
||||
;SI-CHECK: V_ASHR_I64
|
||||
;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
|
||||
define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = sext i32 %in to i64
|
||||
|
@ -1,8 +1,7 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: @unaligned_load_store_i32:
|
||||
; SI: V_ADD_I32_e64 [[REG:v[0-9]+]]
|
||||
; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]]
|
||||
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
|
||||
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
|
||||
%v = load i32 addrspace(3)* %p, align 1
|
||||
store i32 %v, i32 addrspace(3)* %r, align 1
|
||||
@ -10,8 +9,7 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r
|
||||
}
|
||||
|
||||
; SI-LABEL: @unaligned_load_store_v4i32:
|
||||
; SI: V_ADD_I32_e64 [[REG:v[0-9]+]]
|
||||
; DS_READ_U8 {{v[0-9]+}}, 0, [[REG]]
|
||||
; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
|
||||
define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
|
||||
%v = load <4 x i32> addrspace(3)* %p, align 1
|
||||
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user