R600/SI: Fix bug with v_interp_p1_f32 instructions on 16 bank lds chips

The src and dst register cannot be the same on chips with 16 lds banks.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238147 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2015-05-25 16:15:54 +00:00
parent cbb915183e
commit 38aad1c16a
6 changed files with 91 additions and 22 deletions

View File

@ -132,6 +132,15 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
!cast<string>(Value),
"The number of LDS banks per compute unit.">;
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
@ -189,7 +198,8 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
FeatureLDSBankCount32]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
@ -199,7 +209,7 @@ def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts]>;
FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
//===----------------------------------------------------------------------===//

View File

@ -72,6 +72,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false),
IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
LDSBankCount(0),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),

View File

@ -76,6 +76,7 @@ private:
bool GCN3Encoding;
bool CIInsts;
bool FeatureDisable;
int LDSBankCount;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@ -228,6 +229,10 @@ public:
return SGPRInitBug;
}
int getLDSBankCount() const {
return LDSBankCount;
}
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {

View File

@ -103,17 +103,24 @@ def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
// Sea Islands
//===----------------------------------------------------------------------===//
def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureSeaIslands]>;
def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
def : ProcessorModel<"hawaii", SIFullSpeedModel,
[FeatureSeaIslands, FeatureFastFMAF32]
def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
[FeatureSeaIslands, FeatureLDSBankCount32]
>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
def : ProcessorModel<"kabini", SIQuarterSpeedModel,
[FeatureSeaIslands, FeatureLDSBankCount16]
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
[FeatureSeaIslands, FeatureLDSBankCount32]
>;
def : ProcessorModel<"hawaii", SIFullSpeedModel,
[FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel,
[FeatureSeaIslands, FeatureLDSBankCount16]>;
//===----------------------------------------------------------------------===//
// Volcanic Islands

View File

@ -34,6 +34,9 @@ def isSI : Predicate<"Subtarget->getGeneration() "
def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
def SWaitMatchClass : AsmOperandClass {
let Name = "SWaitCnt";
let RenderMethod = "addImmOperands";
@ -1436,13 +1439,27 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
let Uses = [M0] in {
// FIXME: Specify SchedRW for VINTRP insturctions.
defm V_INTERP_P1_F32 : VINTRP_m <
0x00000000,
multiclass V_INTERP_P1_F32_m : VINTRP_m <
0x00000000,
(outs VGPR_32:$dst),
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
"v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
[(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;
(i32 imm:$attr)))]
>;
let OtherPredicates = [has32BankLDS] in {
defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
} // End OtherPredicates = [has32BankLDS]
let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst" in {
defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst"
defm V_INTERP_P2_F32 : VINTRP_m <
0x00000001,

View File

@ -1,11 +1,13 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=kabini -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;CHECK-NOT: s_wqm
;CHECK: s_mov_b32
;CHECK-NEXT: v_interp_mov_f32
;CHECK: v_interp_p1_f32
;CHECK: v_interp_p2_f32
;GCN-LABEL: {{^}}main:
;GCN-NOT: s_wqm
;GCN: s_mov_b32
;GCN-NEXT: v_interp_mov_f32
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
main_body:
@ -16,7 +18,33 @@ main_body:
ret void
}
declare void @llvm.AMDGPU.shader.type(i32)
; Thest that v_interp_p1 uses different source and destination registers
; on 16 bank LDS chips.
; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%25 = call float @fabs(float %22)
%26 = call float @fabs(float %23)
%27 = call float @fabs(float %24)
%28 = call i32 @llvm.SI.packf16(float %25, float %26)
%29 = bitcast i32 %28 to float
%30 = call i32 @llvm.SI.packf16(float %27, float 1.000000e+00)
%31 = bitcast i32 %30 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31)
ret void
}
; Function Attrs: readnone
declare float @fabs(float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
@ -28,3 +56,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }