mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
AMDPGU/SI: Use AssertZext node to mask high bit for scratch offsets
Summary: We can safely assume that the high bit of scratch offsets will never be set, because this would require at least 128 GB of GPU memory. Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11225 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242433 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0219a272ec
commit
cac05d9b58
@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
|||||||
"true",
|
"true",
|
||||||
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
|
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
|
||||||
|
|
||||||
|
def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
|
||||||
|
"EnableHugeScratchBuffer",
|
||||||
|
"true",
|
||||||
|
"Enable scratch buffer sizes greater than 128 GB">;
|
||||||
|
|
||||||
class SubtargetFeatureFetchLimit <string Value> :
|
class SubtargetFeatureFetchLimit <string Value> :
|
||||||
SubtargetFeature <"fetch"#Value,
|
SubtargetFeature <"fetch"#Value,
|
||||||
"TexVTXClauseSize",
|
"TexVTXClauseSize",
|
||||||
|
@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||||||
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||||
IsaVersion(ISAVersion0_0_0),
|
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||||
64 * 16, // Maximum stack alignment (long16)
|
64 * 16, // Maximum stack alignment (long16)
|
||||||
0),
|
0),
|
||||||
|
@ -89,6 +89,7 @@ private:
|
|||||||
bool FeatureDisable;
|
bool FeatureDisable;
|
||||||
int LDSBankCount;
|
int LDSBankCount;
|
||||||
unsigned IsaVersion;
|
unsigned IsaVersion;
|
||||||
|
bool EnableHugeScratchBuffer;
|
||||||
|
|
||||||
AMDGPUFrameLowering FrameLowering;
|
AMDGPUFrameLowering FrameLowering;
|
||||||
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
|
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
|
||||||
@ -271,6 +272,10 @@ public:
|
|||||||
return DevName;
|
return DevName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool enableHugeScratchBuffer() const {
|
||||||
|
return EnableHugeScratchBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
bool dumpCode() const {
|
bool dumpCode() const {
|
||||||
return DumpCode;
|
return DumpCode;
|
||||||
}
|
}
|
||||||
|
@ -812,10 +812,29 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
|
|||||||
|
|
||||||
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
|
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
|
||||||
|
SDLoc SL(Op);
|
||||||
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
|
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
|
||||||
unsigned FrameIndex = FINode->getIndex();
|
unsigned FrameIndex = FINode->getIndex();
|
||||||
|
|
||||||
return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
|
// A FrameIndex node represents a 32-bit offset into scratch memory. If
|
||||||
|
// the high bit of a frame index offset were to be set, this would mean
|
||||||
|
// that it represented an offset of ~2GB * 64 = ~128GB from the start of the
|
||||||
|
// scratch buffer, with 64 being the number of threads per wave.
|
||||||
|
//
|
||||||
|
// If we know the machine uses less than 128GB of scratch, then we can
|
||||||
|
// amrk the high bit of the FrameIndex node as known zero,
|
||||||
|
// which is important, because it means in most situations we can
|
||||||
|
// prove that values derived from FrameIndex nodes are non-negative.
|
||||||
|
// This enables us to take advantage of more addressing modes when
|
||||||
|
// accessing scratch buffers, since for scratch reads/writes, the register
|
||||||
|
// offset must always be positive.
|
||||||
|
|
||||||
|
SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
|
||||||
|
if (Subtarget->enableHugeScratchBuffer())
|
||||||
|
return TFI;
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
|
||||||
|
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This transforms the control flow intrinsics to get the branch destination as
|
/// This transforms the control flow intrinsics to get the branch destination as
|
||||||
@ -2034,6 +2053,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isFrameIndexOp(SDValue Op) {
|
||||||
|
if (Op.getOpcode() == ISD::AssertZext)
|
||||||
|
Op = Op.getOperand(0);
|
||||||
|
|
||||||
|
return isa<FrameIndexSDNode>(Op);
|
||||||
|
}
|
||||||
|
|
||||||
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
|
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
|
||||||
/// with frame index operands.
|
/// with frame index operands.
|
||||||
/// LLVM assumes that inputs are to these instructions are registers.
|
/// LLVM assumes that inputs are to these instructions are registers.
|
||||||
@ -2042,7 +2068,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
|
|||||||
|
|
||||||
SmallVector<SDValue, 8> Ops;
|
SmallVector<SDValue, 8> Ops;
|
||||||
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
|
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
|
||||||
if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
|
if (!isFrameIndexOp(Node->getOperand(i))) {
|
||||||
Ops.push_back(Node->getOperand(i));
|
Ops.push_back(Node->getOperand(i));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user