mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	AMDPGU/SI: Use AssertZext node to mask high bit for scratch offsets
Summary: We can safely assume that the high bit of scratch offsets will never be set, because this would require at least 128 GB of GPU memory. Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11225 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242433 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", | |||||||
|         "true", |         "true", | ||||||
|         "VI SGPR initilization bug requiring a fixed SGPR allocation size">; |         "VI SGPR initilization bug requiring a fixed SGPR allocation size">; | ||||||
|  |  | ||||||
|  | def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer", | ||||||
|  |         "EnableHugeScratchBuffer", | ||||||
|  |         "true", | ||||||
|  |         "Enable scratch buffer sizes greater than 128 GB">; | ||||||
|  |  | ||||||
| class SubtargetFeatureFetchLimit <string Value> : | class SubtargetFeatureFetchLimit <string Value> : | ||||||
|                           SubtargetFeature <"fetch"#Value, |                           SubtargetFeature <"fetch"#Value, | ||||||
|         "TexVTXClauseSize", |         "TexVTXClauseSize", | ||||||
|   | |||||||
| @@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, | |||||||
|       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), |       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), | ||||||
|       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), |       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), | ||||||
|       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), |       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), | ||||||
|       IsaVersion(ISAVersion0_0_0), |       IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), | ||||||
|       FrameLowering(TargetFrameLowering::StackGrowsUp, |       FrameLowering(TargetFrameLowering::StackGrowsUp, | ||||||
|                     64 * 16, // Maximum stack alignment (long16) |                     64 * 16, // Maximum stack alignment (long16) | ||||||
|                     0), |                     0), | ||||||
|   | |||||||
| @@ -89,6 +89,7 @@ private: | |||||||
|   bool FeatureDisable; |   bool FeatureDisable; | ||||||
|   int LDSBankCount; |   int LDSBankCount; | ||||||
|   unsigned IsaVersion;  |   unsigned IsaVersion;  | ||||||
|  |   bool EnableHugeScratchBuffer; | ||||||
|  |  | ||||||
|   AMDGPUFrameLowering FrameLowering; |   AMDGPUFrameLowering FrameLowering; | ||||||
|   std::unique_ptr<AMDGPUTargetLowering> TLInfo; |   std::unique_ptr<AMDGPUTargetLowering> TLInfo; | ||||||
| @@ -271,6 +272,10 @@ public: | |||||||
|     return DevName; |     return DevName; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   bool enableHugeScratchBuffer() const { | ||||||
|  |     return EnableHugeScratchBuffer; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   bool dumpCode() const { |   bool dumpCode() const { | ||||||
|     return DumpCode; |     return DumpCode; | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -812,10 +812,29 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) { | |||||||
|  |  | ||||||
| SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { | SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { | ||||||
|  |  | ||||||
|  |   SDLoc SL(Op); | ||||||
|   FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op); |   FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op); | ||||||
|   unsigned FrameIndex = FINode->getIndex(); |   unsigned FrameIndex = FINode->getIndex(); | ||||||
|  |  | ||||||
|   return DAG.getTargetFrameIndex(FrameIndex, MVT::i32); |   // A FrameIndex node represents a 32-bit offset into scratch memory.  If | ||||||
|  |   // the high bit of a frame index offset were to be set, this would mean | ||||||
|  |   // that it represented an offset of ~2GB * 64 = ~128GB from the start of the | ||||||
|  |   // scratch buffer, with 64 being the number of threads per wave. | ||||||
|  |   // | ||||||
|  |   // If we know the machine uses less than 128GB of scratch, then we can | ||||||
|  |   // amrk the high bit of the FrameIndex node as known zero, | ||||||
|  |   // which is important, because it means in most situations we can | ||||||
|  |   // prove that values derived from FrameIndex nodes are non-negative. | ||||||
|  |   // This enables us to take advantage of more addressing modes when | ||||||
|  |   // accessing scratch buffers, since for scratch reads/writes, the register | ||||||
|  |   // offset must always be positive. | ||||||
|  |  | ||||||
|  |   SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32); | ||||||
|  |   if (Subtarget->enableHugeScratchBuffer()) | ||||||
|  |     return TFI; | ||||||
|  |  | ||||||
|  |   return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI, | ||||||
|  |                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31))); | ||||||
| } | } | ||||||
|  |  | ||||||
| /// This transforms the control flow intrinsics to get the branch destination as | /// This transforms the control flow intrinsics to get the branch destination as | ||||||
| @@ -2034,6 +2053,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static bool isFrameIndexOp(SDValue Op) { | ||||||
|  |   if (Op.getOpcode() == ISD::AssertZext) | ||||||
|  |     Op = Op.getOperand(0); | ||||||
|  |  | ||||||
|  |   return isa<FrameIndexSDNode>(Op); | ||||||
|  | } | ||||||
|  |  | ||||||
| /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) | /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) | ||||||
| /// with frame index operands. | /// with frame index operands. | ||||||
| /// LLVM assumes that inputs are to these instructions are registers. | /// LLVM assumes that inputs are to these instructions are registers. | ||||||
| @@ -2042,7 +2068,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, | |||||||
|  |  | ||||||
|   SmallVector<SDValue, 8> Ops; |   SmallVector<SDValue, 8> Ops; | ||||||
|   for (unsigned i = 0; i < Node->getNumOperands(); ++i) { |   for (unsigned i = 0; i < Node->getNumOperands(); ++i) { | ||||||
|     if (!isa<FrameIndexSDNode>(Node->getOperand(i))) { |     if (!isFrameIndexOp(Node->getOperand(i))) { | ||||||
|       Ops.push_back(Node->getOperand(i)); |       Ops.push_back(Node->getOperand(i)); | ||||||
|       continue; |       continue; | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user