mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	R600/SI: Add subtarget feature for if f32 fma is fast
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227483 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -48,6 +48,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
 | 
				
			|||||||
        "Enable double precision denormal handling",
 | 
					        "Enable double precision denormal handling",
 | 
				
			||||||
        [FeatureFP64]>;
 | 
					        [FeatureFP64]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
 | 
				
			||||||
 | 
					        "FastFMAF32",
 | 
				
			||||||
 | 
					        "true",
 | 
				
			||||||
 | 
					        "Assuming f32 fma is at least as fast as mul + add",
 | 
				
			||||||
 | 
					        []>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Some instructions do not support denormals despite this flag. Using
 | 
					// Some instructions do not support denormals despite this flag. Using
 | 
				
			||||||
// fp32 denormals also causes instructions to run at the double
 | 
					// fp32 denormals also causes instructions to run at the double
 | 
				
			||||||
// precision rate for the device.
 | 
					// precision rate for the device.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,7 +65,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
 | 
				
			|||||||
    : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
 | 
					    : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false),
 | 
				
			||||||
      DumpCode(false), R600ALUInst(false), HasVertexCache(false),
 | 
					      DumpCode(false), R600ALUInst(false), HasVertexCache(false),
 | 
				
			||||||
      TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
 | 
					      TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
 | 
				
			||||||
      FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
 | 
					      FP64Denormals(false), FP32Denormals(false),
 | 
				
			||||||
 | 
					      FastFMAF32(false), CaymanISA(false),
 | 
				
			||||||
      FlatAddressSpace(false), EnableIRStructurizer(true),
 | 
					      FlatAddressSpace(false), EnableIRStructurizer(true),
 | 
				
			||||||
      EnablePromoteAlloca(false), EnableIfCvt(true),
 | 
					      EnablePromoteAlloca(false), EnableIfCvt(true),
 | 
				
			||||||
      EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
 | 
					      EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -55,6 +55,7 @@ private:
 | 
				
			|||||||
  bool FP64;
 | 
					  bool FP64;
 | 
				
			||||||
  bool FP64Denormals;
 | 
					  bool FP64Denormals;
 | 
				
			||||||
  bool FP32Denormals;
 | 
					  bool FP32Denormals;
 | 
				
			||||||
 | 
					  bool FastFMAF32;
 | 
				
			||||||
  bool CaymanISA;
 | 
					  bool CaymanISA;
 | 
				
			||||||
  bool FlatAddressSpace;
 | 
					  bool FlatAddressSpace;
 | 
				
			||||||
  bool EnableIRStructurizer;
 | 
					  bool EnableIRStructurizer;
 | 
				
			||||||
@@ -127,6 +128,10 @@ public:
 | 
				
			|||||||
    return FP64Denormals;
 | 
					    return FP64Denormals;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bool hasFastFMAF32() const {
 | 
				
			||||||
 | 
					    return FastFMAF32;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  bool hasFlatAddressSpace() const {
 | 
					  bool hasFlatAddressSpace() const {
 | 
				
			||||||
    return FlatAddressSpace;
 | 
					    return FlatAddressSpace;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -83,9 +83,13 @@ def : Proc<"cayman",     R600_VLIW4_Itin,
 | 
				
			|||||||
// Southern Islands
 | 
					// Southern Islands
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"SI",       SIFullSpeedModel, [FeatureSouthernIslands]>;
 | 
					def : ProcessorModel<"SI", SIFullSpeedModel,
 | 
				
			||||||
 | 
					  [FeatureSouthernIslands, FeatureFastFMAF32]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"tahiti",   SIFullSpeedModel, [FeatureSouthernIslands]>;
 | 
					def : ProcessorModel<"tahiti",   SIFullSpeedModel,
 | 
				
			||||||
 | 
					  [FeatureSouthernIslands, FeatureFastFMAF32]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 | 
					def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -105,7 +109,9 @@ def : ProcessorModel<"kabini",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"kaveri",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
 | 
					def : ProcessorModel<"kaveri",     SIQuarterSpeedModel, [FeatureSeaIslands]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"hawaii",     SIFullSpeedModel, [FeatureSeaIslands]>;
 | 
					def : ProcessorModel<"hawaii", SIFullSpeedModel,
 | 
				
			||||||
 | 
					  [FeatureSeaIslands, FeatureFastFMAF32]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
 | 
					def : ProcessorModel<"mullins",    SIQuarterSpeedModel, [FeatureSeaIslands]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -655,7 +655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  switch (VT.getSimpleVT().SimpleTy) {
 | 
					  switch (VT.getSimpleVT().SimpleTy) {
 | 
				
			||||||
  case MVT::f32:
 | 
					  case MVT::f32:
 | 
				
			||||||
    return false; /* There is V_MAD_F32 for f32 */
 | 
					    return Subtarget->hasFastFMAF32();
 | 
				
			||||||
  case MVT::f64:
 | 
					  case MVT::f64:
 | 
				
			||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
  default:
 | 
					  default:
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user