diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 569ad3844b2..ef8ef626854 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -98,6 +98,16 @@ def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", "true", "Enable SI load/store optimizer pass">; +// Performance debugging feature. Allow using DS instruction immediate +// offsets even if the base pointer can't be proven to be base. On SI, +// base pointer values that won't give the same result as a 16-bit add +// are not safe to fold, but this will override the conservative test +// for the base pointer. +def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding", + "EnableUnsafeDSOffsetFolding", + "true", + "Force using DS instruction immediate offsets on SI">; + def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index df4461eac4d..240afb75197 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -859,7 +859,8 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || + Subtarget->unsafeDSOffsetFoldingEnabled()) return true; // On Southern Islands instruction with a negative base value and an offset diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 0779d1d786b..bd5abc4f546 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -69,6 +69,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 30f50eb1d2f..90831bfb445 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ private: bool EnablePromoteAlloca; bool EnableIfCvt; bool EnableLoadStoreOpt; + bool EnableUnsafeDSOffsetFolding; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -222,6 +223,10 @@ public: return EnableLoadStoreOpt; } + bool unsafeDSOffsetFoldingEnabled() const { + return EnableUnsafeDSOffsetFolding; + } + unsigned getWavefrontSize() const { return WavefrontSize; } diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index e7e13d6178c..5e4654abd91 100644 --- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() #1