mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-10 02:25:47 +00:00
R600/SI: Store constant initializer data in constant memory
This implements a solution for constant initializers suggested by Vadim Girlin, where we store the data after the shader code and then use the S_GETPC instruction to compute its address. This saves use the trouble of creating a new buffer for constant data and then having to pass the pointer to the kernel via user SGPRs or the input buffer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213530 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -63,6 +63,14 @@ extern char &SIFixSGPRLiveRangesID;
|
|||||||
|
|
||||||
extern Target TheAMDGPUTarget;
|
extern Target TheAMDGPUTarget;
|
||||||
|
|
||||||
|
namespace AMDGPU {
|
||||||
|
enum TargetIndex {
|
||||||
|
TI_CONSTDATA_START
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
|
||||||
|
|
||||||
} // End namespace llvm
|
} // End namespace llvm
|
||||||
|
|
||||||
namespace ShaderType {
|
namespace ShaderType {
|
||||||
|
@@ -85,6 +85,16 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|||||||
DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
|
DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
|
||||||
|
|
||||||
|
// This label is used to mark the end of the .text section.
|
||||||
|
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
|
||||||
|
OutStreamer.SwitchSection(TLOF.getTextSection());
|
||||||
|
MCSymbol *EndOfTextLabel =
|
||||||
|
OutContext.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
|
||||||
|
OutStreamer.EmitLabel(EndOfTextLabel);
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||||
SetupMachineFunction(MF);
|
SetupMachineFunction(MF);
|
||||||
|
|
||||||
|
@@ -70,6 +70,8 @@ public:
|
|||||||
/// Implemented in AMDGPUMCInstLower.cpp
|
/// Implemented in AMDGPUMCInstLower.cpp
|
||||||
void EmitInstruction(const MachineInstr *MI) override;
|
void EmitInstruction(const MachineInstr *MI) override;
|
||||||
|
|
||||||
|
void EmitEndOfAsmFile(Module &M) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool DisasmEnabled;
|
bool DisasmEnabled;
|
||||||
std::vector<std::string> DisasmLines, HexLines;
|
std::vector<std::string> DisasmLines, HexLines;
|
||||||
|
@@ -21,7 +21,6 @@
|
|||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
#include "R600MachineFunctionInfo.h"
|
#include "R600MachineFunctionInfo.h"
|
||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
|
||||||
#include "llvm/CodeGen/CallingConvLower.h"
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
@@ -1177,21 +1176,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return DAG.getMergeValues(Ops, DL);
|
return DAG.getMergeValues(Ops, DL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lower loads constant address space global variable loads
|
|
||||||
if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
|
|
||||||
isa<GlobalVariable>(
|
|
||||||
GetUnderlyingObject(Load->getMemOperand()->getValue()))) {
|
|
||||||
|
|
||||||
|
|
||||||
SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
|
|
||||||
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
|
|
||||||
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
|
|
||||||
DAG.getConstant(2, MVT::i32));
|
|
||||||
return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
|
|
||||||
Load->getChain(), Ptr,
|
|
||||||
DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
|
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
|
||||||
ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
|
ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
@@ -2222,6 +2206,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(CVT_F32_UBYTE2)
|
NODE_NAME_CASE(CVT_F32_UBYTE2)
|
||||||
NODE_NAME_CASE(CVT_F32_UBYTE3)
|
NODE_NAME_CASE(CVT_F32_UBYTE3)
|
||||||
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
|
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
|
||||||
|
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||||
NODE_NAME_CASE(STORE_MSKOR)
|
NODE_NAME_CASE(STORE_MSKOR)
|
||||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||||
}
|
}
|
||||||
|
@@ -78,8 +78,8 @@ protected:
|
|||||||
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
|
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
|
||||||
const TargetRegisterClass *RC,
|
const TargetRegisterClass *RC,
|
||||||
unsigned Reg, EVT VT) const;
|
unsigned Reg, EVT VT) const;
|
||||||
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
/// \brief Split a vector load into multiple scalar loads.
|
/// \brief Split a vector load into multiple scalar loads.
|
||||||
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
|
SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
|
||||||
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
||||||
@@ -233,6 +233,8 @@ enum {
|
|||||||
/// T2|v.z| | | |
|
/// T2|v.z| | | |
|
||||||
/// T3|v.w| | | |
|
/// T3|v.w| | | |
|
||||||
BUILD_VERTICAL_VECTOR,
|
BUILD_VERTICAL_VECTOR,
|
||||||
|
/// Pointer to the start of the shader's constant data.
|
||||||
|
CONST_DATA_PTR,
|
||||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||||
STORE_MSKOR,
|
STORE_MSKOR,
|
||||||
LOAD_CONSTANT,
|
LOAD_CONSTANT,
|
||||||
|
@@ -22,7 +22,9 @@
|
|||||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
#include "llvm/CodeGen/MachineInstr.h"
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
#include "llvm/IR/Constants.h"
|
#include "llvm/IR/Constants.h"
|
||||||
|
#include "llvm/IR/GlobalVariable.h"
|
||||||
#include "llvm/MC/MCCodeEmitter.h"
|
#include "llvm/MC/MCCodeEmitter.h"
|
||||||
|
#include "llvm/MC/MCContext.h"
|
||||||
#include "llvm/MC/MCExpr.h"
|
#include "llvm/MC/MCExpr.h"
|
||||||
#include "llvm/MC/MCInst.h"
|
#include "llvm/MC/MCInst.h"
|
||||||
#include "llvm/MC/MCObjectStreamer.h"
|
#include "llvm/MC/MCObjectStreamer.h"
|
||||||
@@ -77,6 +79,20 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
|||||||
case MachineOperand::MO_MachineBasicBlock:
|
case MachineOperand::MO_MachineBasicBlock:
|
||||||
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
|
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
|
||||||
MO.getMBB()->getSymbol(), Ctx));
|
MO.getMBB()->getSymbol(), Ctx));
|
||||||
|
break;
|
||||||
|
case MachineOperand::MO_GlobalAddress: {
|
||||||
|
const GlobalValue *GV = MO.getGlobal();
|
||||||
|
MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(GV->getName()));
|
||||||
|
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case MachineOperand::MO_TargetIndex: {
|
||||||
|
assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
|
||||||
|
MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
|
||||||
|
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
|
||||||
|
MCOp = MCOperand::CreateExpr(Expr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
OutMI.addOperand(MCOp);
|
OutMI.addOperand(MCOp);
|
||||||
}
|
}
|
||||||
|
@@ -45,7 +45,7 @@ public:
|
|||||||
AMDGPUAsmBackend(const Target &T)
|
AMDGPUAsmBackend(const Target &T)
|
||||||
: MCAsmBackend() {}
|
: MCAsmBackend() {}
|
||||||
|
|
||||||
unsigned getNumFixupKinds() const override { return 0; };
|
unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
|
||||||
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
||||||
uint64_t Value, bool IsPCRel) const override;
|
uint64_t Value, bool IsPCRel) const override;
|
||||||
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
||||||
@@ -77,16 +77,37 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
|
|||||||
unsigned DataSize, uint64_t Value,
|
unsigned DataSize, uint64_t Value,
|
||||||
bool IsPCRel) const {
|
bool IsPCRel) const {
|
||||||
|
|
||||||
uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
|
switch ((unsigned)Fixup.getKind()) {
|
||||||
assert(Fixup.getKind() == FK_PCRel_4);
|
default: llvm_unreachable("Unknown fixup kind");
|
||||||
*Dst = (Value - 4) / 4;
|
case AMDGPU::fixup_si_sopp_br: {
|
||||||
|
uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
|
||||||
|
*Dst = (Value - 4) / 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::fixup_si_rodata: {
|
||||||
|
uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
|
||||||
|
*Dst = Value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::fixup_si_end_of_text: {
|
||||||
|
uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
|
||||||
|
// The value points to the last instruction in the text section, so we
|
||||||
|
// need to add 4 bytes to get to the start of the constants.
|
||||||
|
*Dst = Value + 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
|
const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
|
||||||
MCFixupKind Kind) const {
|
MCFixupKind Kind) const {
|
||||||
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
|
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
|
||||||
// name offset bits flags
|
// name offset bits flags
|
||||||
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }
|
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
|
||||||
|
{ "fixup_si_rodata", 0, 32, 0 },
|
||||||
|
{ "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
|
||||||
};
|
};
|
||||||
|
|
||||||
if (Kind < FirstTargetFixupKind)
|
if (Kind < FirstTargetFixupKind)
|
||||||
|
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "AMDGPUMCTargetDesc.h"
|
#include "AMDGPUMCTargetDesc.h"
|
||||||
#include "llvm/MC/MCELFObjectWriter.h"
|
#include "llvm/MC/MCELFObjectWriter.h"
|
||||||
|
#include "llvm/MC/MCFixup.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
@@ -21,7 +22,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
|
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
|
||||||
bool IsPCRel) const override {
|
bool IsPCRel) const override {
|
||||||
llvm_unreachable("Not implemented");
|
return Fixup.getKind();
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@@ -18,6 +18,12 @@ enum Fixups {
|
|||||||
/// 16-bit PC relative fixup for SOPP branch instructions.
|
/// 16-bit PC relative fixup for SOPP branch instructions.
|
||||||
fixup_si_sopp_br = FirstTargetFixupKind,
|
fixup_si_sopp_br = FirstTargetFixupKind,
|
||||||
|
|
||||||
|
/// fixup for global addresses with constant initializers
|
||||||
|
fixup_si_rodata,
|
||||||
|
|
||||||
|
/// fixup for offset from instruction to end of text section
|
||||||
|
fixup_si_end_of_text,
|
||||||
|
|
||||||
// Marker
|
// Marker
|
||||||
LastTargetFixupKind,
|
LastTargetFixupKind,
|
||||||
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
|
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
|
||||||
|
@@ -13,6 +13,7 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
||||||
#include "MCTargetDesc/AMDGPUFixupKinds.h"
|
#include "MCTargetDesc/AMDGPUFixupKinds.h"
|
||||||
@@ -40,6 +41,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|||||||
void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
|
void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
|
||||||
const MCInstrInfo &MCII;
|
const MCInstrInfo &MCII;
|
||||||
const MCRegisterInfo &MRI;
|
const MCRegisterInfo &MRI;
|
||||||
|
MCContext &Ctx;
|
||||||
|
|
||||||
/// \brief Can this operand also contain immediate values?
|
/// \brief Can this operand also contain immediate values?
|
||||||
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
|
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
|
||||||
@@ -50,7 +52,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|||||||
public:
|
public:
|
||||||
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
||||||
MCContext &ctx)
|
MCContext &ctx)
|
||||||
: MCII(mcii), MRI(mri) { }
|
: MCII(mcii), MRI(mri), Ctx(ctx) { }
|
||||||
|
|
||||||
~SIMCCodeEmitter() { }
|
~SIMCCodeEmitter() { }
|
||||||
|
|
||||||
@@ -97,6 +99,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
|
|||||||
Imm.I = MO.getImm();
|
Imm.I = MO.getImm();
|
||||||
else if (MO.isFPImm())
|
else if (MO.isFPImm())
|
||||||
Imm.F = MO.getFPImm();
|
Imm.F = MO.getFPImm();
|
||||||
|
else if (MO.isExpr())
|
||||||
|
return 255;
|
||||||
else
|
else
|
||||||
return ~0;
|
return ~0;
|
||||||
|
|
||||||
@@ -164,8 +168,13 @@ void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||||||
IntFloatUnion Imm;
|
IntFloatUnion Imm;
|
||||||
if (Op.isImm())
|
if (Op.isImm())
|
||||||
Imm.I = Op.getImm();
|
Imm.I = Op.getImm();
|
||||||
else
|
else if (Op.isFPImm())
|
||||||
Imm.F = Op.getFPImm();
|
Imm.F = Op.getFPImm();
|
||||||
|
else {
|
||||||
|
assert(Op.isExpr());
|
||||||
|
// This will be replaced with a fixup value.
|
||||||
|
Imm.I = 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned j = 0; j < 4; j++) {
|
for (unsigned j = 0; j < 4; j++) {
|
||||||
OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
|
OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
|
||||||
@@ -198,6 +207,22 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|||||||
if (MO.isReg())
|
if (MO.isReg())
|
||||||
return MRI.getEncodingValue(MO.getReg());
|
return MRI.getEncodingValue(MO.getReg());
|
||||||
|
|
||||||
|
if (MO.isExpr()) {
|
||||||
|
const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
|
||||||
|
MCFixupKind Kind;
|
||||||
|
const MCSymbol *Sym =
|
||||||
|
Ctx.GetOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
|
||||||
|
|
||||||
|
if (&Expr->getSymbol() == Sym) {
|
||||||
|
// Add the offset to the beginning of the constant values.
|
||||||
|
Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
|
||||||
|
} else {
|
||||||
|
// This is used for constant data stored in .rodata.
|
||||||
|
Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
|
||||||
|
}
|
||||||
|
Fixups.push_back(MCFixup::Create(4, Expr, Kind, MI.getLoc()));
|
||||||
|
}
|
||||||
|
|
||||||
// Figure out the operand number, needed for isSrcOperand check
|
// Figure out the operand number, needed for isSrcOperand check
|
||||||
unsigned OpNo = 0;
|
unsigned OpNo = 0;
|
||||||
for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
|
for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
#include "R600Defines.h"
|
#include "R600Defines.h"
|
||||||
#include "R600InstrInfo.h"
|
#include "R600InstrInfo.h"
|
||||||
#include "R600MachineFunctionInfo.h"
|
#include "R600MachineFunctionInfo.h"
|
||||||
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
#include "llvm/CodeGen/CallingConvLower.h"
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
@@ -1526,6 +1527,19 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
|||||||
return DAG.getMergeValues(Ops, DL);
|
return DAG.getMergeValues(Ops, DL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Lower loads constant address space global variable loads
|
||||||
|
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
|
||||||
|
isa<GlobalVariable>(
|
||||||
|
GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
|
||||||
|
|
||||||
|
SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
|
||||||
|
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
|
||||||
|
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
|
||||||
|
DAG.getConstant(2, MVT::i32));
|
||||||
|
return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
|
||||||
|
LoadNode->getChain(), Ptr,
|
||||||
|
DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
|
||||||
|
}
|
||||||
|
|
||||||
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
|
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
|
||||||
SDValue MergedValues[2] = {
|
SDValue MergedValues[2] = {
|
||||||
|
@@ -860,6 +860,34 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
|
|||||||
return Chain;
|
return Chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||||
|
SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
||||||
|
|
||||||
|
if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
|
||||||
|
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
|
||||||
|
|
||||||
|
SDLoc DL(GSD);
|
||||||
|
const GlobalValue *GV = GSD->getGlobal();
|
||||||
|
MVT PtrVT = getPointerTy(GSD->getAddressSpace());
|
||||||
|
|
||||||
|
SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
|
||||||
|
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
|
||||||
|
|
||||||
|
SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
|
||||||
|
DAG.getConstant(0, MVT::i32));
|
||||||
|
SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
|
||||||
|
DAG.getConstant(1, MVT::i32));
|
||||||
|
|
||||||
|
SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
|
||||||
|
PtrLo, GA);
|
||||||
|
SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
|
||||||
|
PtrHi, DAG.getConstant(0, MVT::i32),
|
||||||
|
SDValue(Lo.getNode(), 1));
|
||||||
|
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||||
|
@@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||||||
SDValue Chain, unsigned Offset, bool Signed) const;
|
SDValue Chain, unsigned Offset, bool Signed) const;
|
||||||
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||||
|
SelectionDAG &DAG) const override;
|
||||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
@@ -361,6 +361,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
|||||||
MI->eraseFromParent();
|
MI->eraseFromParent();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::SI_CONSTDATA_PTR: {
|
||||||
|
unsigned Reg = MI->getOperand(0).getReg();
|
||||||
|
unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
|
||||||
|
unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
|
||||||
|
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
|
||||||
|
|
||||||
|
// Add 32-bit offset from this instruction to the start of the constant data.
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo)
|
||||||
|
.addReg(RegLo)
|
||||||
|
.addTargetIndex(AMDGPU::TI_CONSTDATA_START)
|
||||||
|
.addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
|
||||||
|
BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
|
||||||
|
.addReg(RegHi)
|
||||||
|
.addImm(0)
|
||||||
|
.addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
|
||||||
|
.addReg(AMDGPU::SCC, RegState::Implicit);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -57,6 +57,10 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
|
|||||||
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
|
def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
|
||||||
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
|
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
|
||||||
|
|
||||||
|
def SIconstdata_ptr : SDNode<
|
||||||
|
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
|
||||||
|
>;
|
||||||
|
|
||||||
// Transformation function, extract the lower 32bit of a 64bit immediate
|
// Transformation function, extract the lower 32bit of a 64bit immediate
|
||||||
def LO32 : SDNodeXForm<imm, [{
|
def LO32 : SDNodeXForm<imm, [{
|
||||||
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
|
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
|
||||||
|
@@ -139,7 +139,11 @@ def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
|
|||||||
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
|
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
|
||||||
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
|
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
|
||||||
////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
|
////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
|
||||||
def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
|
def S_GETPC_B64 : SOP1 <
|
||||||
|
0x0000001f, (outs SReg_64:$dst), (ins), "S_GETPC_B64 $dst", []
|
||||||
|
> {
|
||||||
|
let SSRC0 = 0;
|
||||||
|
}
|
||||||
def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
|
def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
|
||||||
def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
|
def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
|
||||||
def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
|
def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
|
||||||
@@ -1694,6 +1698,16 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
|
|||||||
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
|
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
|
||||||
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
|
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
|
||||||
|
|
||||||
|
let Defs = [SCC] in {
|
||||||
|
|
||||||
|
def SI_CONSTDATA_PTR : InstSI <
|
||||||
|
(outs SReg_64:$dst),
|
||||||
|
(ins),
|
||||||
|
"", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
} // End Defs = [SCC]
|
||||||
|
|
||||||
} // end IsCodeGenOnly, isPseudo
|
} // end IsCodeGenOnly, isPseudo
|
||||||
|
|
||||||
} // end SubtargetPredicate = SI
|
} // end SubtargetPredicate = SI
|
||||||
|
@@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
|
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
|
||||||
|
|
||||||
; XXX: Test on SI once 64-bit adds are supportes.
|
|
||||||
|
|
||||||
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
|
@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
|
||||||
|
|
||||||
; FUNC-LABEL: @float
|
; FUNC-LABEL: @float
|
||||||
|
; FIXME: We should be using S_LOAD_DWORD here.
|
||||||
|
; SI: BUFFER_LOAD_DWORD
|
||||||
|
|
||||||
; EG-DAG: MOV {{\** *}}T2.X
|
; EG-DAG: MOV {{\** *}}T2.X
|
||||||
; EG-DAG: MOV {{\** *}}T3.X
|
; EG-DAG: MOV {{\** *}}T3.X
|
||||||
@@ -29,6 +29,9 @@ entry:
|
|||||||
|
|
||||||
; FUNC-LABEL: @i32
|
; FUNC-LABEL: @i32
|
||||||
|
|
||||||
|
; FIXME: We should be using S_LOAD_DWORD here.
|
||||||
|
; SI: BUFFER_LOAD_DWORD
|
||||||
|
|
||||||
; EG-DAG: MOV {{\** *}}T2.X
|
; EG-DAG: MOV {{\** *}}T2.X
|
||||||
; EG-DAG: MOV {{\** *}}T3.X
|
; EG-DAG: MOV {{\** *}}T3.X
|
||||||
; EG-DAG: MOV {{\** *}}T4.X
|
; EG-DAG: MOV {{\** *}}T4.X
|
||||||
@@ -50,6 +53,7 @@ entry:
|
|||||||
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
|
||||||
|
|
||||||
; FUNC-LABEL: @struct_foo_gv_load
|
; FUNC-LABEL: @struct_foo_gv_load
|
||||||
|
; SI: S_LOAD_DWORD
|
||||||
|
|
||||||
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
||||||
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
%gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
|
||||||
@@ -64,6 +68,8 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
|
|||||||
<1 x i32> <i32 4> ]
|
<1 x i32> <i32 4> ]
|
||||||
|
|
||||||
; FUNC-LABEL: @array_v1_gv_load
|
; FUNC-LABEL: @array_v1_gv_load
|
||||||
|
; FIXME: We should be using S_LOAD_DWORD here.
|
||||||
|
; SI: BUFFER_LOAD_DWORD
|
||||||
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
|
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
|
||||||
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
|
%gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
|
||||||
%load = load <1 x i32> addrspace(2)* %gep, align 4
|
%load = load <1 x i32> addrspace(2)* %gep, align 4
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
; XFAIL: *
|
|
||||||
; REQUIRES: asserts
|
|
||||||
; RUN: llc -march=r600 -mcpu=SI < %s
|
; RUN: llc -march=r600 -mcpu=SI < %s
|
||||||
|
; CHECK: S_ENDPGM
|
||||||
|
|
||||||
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
|
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user