AMDGPU/SI: Add debugging subtarget feature for DS offsets

We don't have a good way to detect most situations where
DS offsets are usable on SI, so add an option to force using
them even if unsafe for debugging performance problems.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241462 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2015-07-06 16:01:58 +00:00
parent 067b86fcab
commit 6fe7acaaf8
5 changed files with 21 additions and 3 deletions

View File

@ -98,6 +98,16 @@ def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
"true",
"Enable SI load/store optimizer pass">;
// Performance debugging feature. Allow using DS instruction immediate
// offsets even if the base pointer can't be proven to be base. On SI,
// base pointer values that won't give the same result as a 16-bit add
// are not safe to fold, but this will override the conservative test
// for the base pointer.
def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
"EnableUnsafeDSOffsetFolding",
"true",
"Force using DS instruction immediate offsets on SI">;
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",

View File

@ -859,7 +859,8 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
(OffsetBits == 8 && !isUInt<8>(Offset)))
return false;
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
Subtarget->unsafeDSOffsetFoldingEnabled())
return true;
// On Southern Islands instruction with a negative base value and an offset

View File

@ -69,6 +69,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),

View File

@ -76,6 +76,7 @@ private:
bool EnablePromoteAlloca;
bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
@ -222,6 +223,10 @@ public:
return EnableLoadStoreOpt;
}
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
unsigned getWavefrontSize() const {
return WavefrontSize;
}

View File

@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
declare i32 @llvm.r600.read.tidig.x() #0
declare void @llvm.AMDGPU.barrier.local() #1