mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-06 04:18:00 +00:00
R600/SI: Add subtarget feature for if f32 fma is fast
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227483 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -48,6 +48,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
|
|||||||
"Enable double precision denormal handling",
|
"Enable double precision denormal handling",
|
||||||
[FeatureFP64]>;
|
[FeatureFP64]>;
|
||||||
|
|
||||||
|
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||||
|
"FastFMAF32",
|
||||||
|
"true",
|
||||||
|
"Assuming f32 fma is at least as fast as mul + add",
|
||||||
|
[]>;
|
||||||
|
|
||||||
// Some instructions do not support denormals despite this flag. Using
|
// Some instructions do not support denormals despite this flag. Using
|
||||||
// fp32 denormals also causes instructions to run at the double
|
// fp32 denormals also causes instructions to run at the double
|
||||||
// precision rate for the device.
|
// precision rate for the device.
|
||||||
|
|||||||
@@ -65,7 +65,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
|
|||||||
: AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
|
: AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
|
||||||
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
|
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
|
||||||
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
|
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
|
||||||
FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
|
FP64Denormals(false), FP32Denormals(false),
|
||||||
|
FastFMAF32(false), CaymanISA(false),
|
||||||
FlatAddressSpace(false), EnableIRStructurizer(true),
|
FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||||
EnablePromoteAlloca(false), EnableIfCvt(true),
|
EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||||
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ private:
|
|||||||
bool FP64;
|
bool FP64;
|
||||||
bool FP64Denormals;
|
bool FP64Denormals;
|
||||||
bool FP32Denormals;
|
bool FP32Denormals;
|
||||||
|
bool FastFMAF32;
|
||||||
bool CaymanISA;
|
bool CaymanISA;
|
||||||
bool FlatAddressSpace;
|
bool FlatAddressSpace;
|
||||||
bool EnableIRStructurizer;
|
bool EnableIRStructurizer;
|
||||||
@@ -127,6 +128,10 @@ public:
|
|||||||
return FP64Denormals;
|
return FP64Denormals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasFastFMAF32() const {
|
||||||
|
return FastFMAF32;
|
||||||
|
}
|
||||||
|
|
||||||
bool hasFlatAddressSpace() const {
|
bool hasFlatAddressSpace() const {
|
||||||
return FlatAddressSpace;
|
return FlatAddressSpace;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,9 +83,13 @@ def : Proc<"cayman", R600_VLIW4_Itin,
|
|||||||
// Southern Islands
|
// Southern Islands
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>;
|
def : ProcessorModel<"SI", SIFullSpeedModel,
|
||||||
|
[FeatureSouthernIslands, FeatureFastFMAF32]
|
||||||
|
>;
|
||||||
|
|
||||||
def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>;
|
def : ProcessorModel<"tahiti", SIFullSpeedModel,
|
||||||
|
[FeatureSouthernIslands, FeatureFastFMAF32]
|
||||||
|
>;
|
||||||
|
|
||||||
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
|
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
|
||||||
|
|
||||||
@@ -105,7 +109,9 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
|||||||
|
|
||||||
def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
||||||
|
|
||||||
def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>;
|
def : ProcessorModel<"hawaii", SIFullSpeedModel,
|
||||||
|
[FeatureSeaIslands, FeatureFastFMAF32]
|
||||||
|
>;
|
||||||
|
|
||||||
def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
||||||
|
|
||||||
|
|||||||
@@ -655,7 +655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
|||||||
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
case MVT::f32:
|
case MVT::f32:
|
||||||
return false; /* There is V_MAD_F32 for f32 */
|
return Subtarget->hasFastFMAF32();
|
||||||
case MVT::f64:
|
case MVT::f64:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
|||||||
Reference in New Issue
Block a user