mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-10 02:36:06 +00:00
R600/SI: Add FP mode bits to binary.
The default rounding mode to initialize the mode register needs to be reported to the runtime. Fill in other bits a kernel may be interested in setting for future use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211791 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2711c0a68b
commit
3cd8cf6bbd
@ -36,6 +36,24 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// TODO: This should get the default rounding mode from the kernel. We just set
|
||||
// the default here, but this could change if the OpenCL rounding mode pragmas
|
||||
// are used.
|
||||
//
|
||||
// The denormal mode here should match what is reported by the OpenCL runtime
|
||||
// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
|
||||
// can also be override to flush with the -cl-denorms-are-zero compiler flag.
|
||||
//
|
||||
// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
|
||||
// precision, and leaves single precision to flush all and does not report
|
||||
// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
|
||||
// CL_FP_DENORM for both.
|
||||
static uint32_t getFPMode(MachineFunction &) {
|
||||
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
|
||||
FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
|
||||
FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
|
||||
}
|
||||
|
||||
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
|
||||
MCStreamer &Streamer) {
|
||||
@ -93,6 +111,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
false);
|
||||
OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
|
||||
false);
|
||||
OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
|
||||
false);
|
||||
OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
|
||||
false);
|
||||
} else {
|
||||
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||
OutStreamer.emitRawComment(
|
||||
@ -280,16 +302,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
if (VCCUsed)
|
||||
MaxSGPR += 2;
|
||||
|
||||
ProgInfo.CodeLen = CodeSize;
|
||||
ProgInfo.NumSGPR = MaxSGPR;
|
||||
ProgInfo.NumVGPR = MaxVGPR;
|
||||
ProgInfo.NumSGPR = MaxSGPR;
|
||||
|
||||
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
|
||||
// register.
|
||||
ProgInfo.FloatMode = getFPMode(MF);
|
||||
|
||||
// XXX: Not quite sure what this does, but sc seems to unset this.
|
||||
ProgInfo.IEEEMode = 0;
|
||||
|
||||
// Do not clamp NAN to 0.
|
||||
ProgInfo.DX10Clamp = 0;
|
||||
|
||||
ProgInfo.CodeLen = CodeSize;
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
|
||||
const SIProgramInfo &KernelInfo) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned RsrcReg;
|
||||
switch (MFI->ShaderType) {
|
||||
default: // Fall through
|
||||
@ -299,25 +332,41 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
|
||||
case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
|
||||
}
|
||||
|
||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
|
||||
S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
|
||||
|
||||
unsigned LDSAlignShift;
|
||||
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
// LDS is allocated in 64 dword blocks
|
||||
// LDS is allocated in 64 dword blocks.
|
||||
LDSAlignShift = 8;
|
||||
} else {
|
||||
// LDS is allocated in 128 dword blocks
|
||||
// LDS is allocated in 128 dword blocks.
|
||||
LDSAlignShift = 9;
|
||||
}
|
||||
|
||||
unsigned LDSBlocks =
|
||||
RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
|
||||
RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
|
||||
|
||||
if (MFI->ShaderType == ShaderType::COMPUTE) {
|
||||
OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
|
||||
|
||||
const uint32_t ComputePGMRSrc1 =
|
||||
S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
|
||||
S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
|
||||
S_00B848_PRIORITY(KernelInfo.Priority) |
|
||||
S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
|
||||
S_00B848_PRIV(KernelInfo.Priv) |
|
||||
S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
|
||||
S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
|
||||
S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
|
||||
|
||||
OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
|
||||
|
||||
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
|
||||
OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
|
||||
} else {
|
||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
|
||||
S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
|
||||
}
|
||||
|
||||
if (MFI->ShaderType == ShaderType::PIXEL) {
|
||||
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
|
||||
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
|
||||
|
@ -25,13 +25,28 @@ class AMDGPUAsmPrinter : public AsmPrinter {
|
||||
private:
|
||||
struct SIProgramInfo {
|
||||
SIProgramInfo() :
|
||||
CodeLen(0),
|
||||
NumVGPR(0),
|
||||
NumSGPR(0),
|
||||
NumVGPR(0) {}
|
||||
Priority(0),
|
||||
FloatMode(0),
|
||||
Priv(0),
|
||||
DX10Clamp(0),
|
||||
DebugMode(0),
|
||||
IEEEMode(0),
|
||||
CodeLen(0) {}
|
||||
|
||||
// Fields set in PGM_RSRC1 pm4 packet.
|
||||
uint32_t NumVGPR;
|
||||
uint32_t NumSGPR;
|
||||
uint32_t Priority;
|
||||
uint32_t FloatMode;
|
||||
uint32_t Priv;
|
||||
uint32_t DX10Clamp;
|
||||
uint32_t DebugMode;
|
||||
uint32_t IEEEMode;
|
||||
|
||||
// Bonus information for debugging.
|
||||
uint64_t CodeLen;
|
||||
unsigned NumSGPR;
|
||||
unsigned NumVGPR;
|
||||
};
|
||||
|
||||
void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
|
||||
|
@ -35,4 +35,54 @@ enum {
|
||||
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
|
||||
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
|
||||
|
||||
|
||||
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
|
||||
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
|
||||
#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
|
||||
#define C_00B848_VGPRS 0xFFFFFFC0
|
||||
#define S_00B848_SGPRS(x) (((x) & 0x0F) << 6)
|
||||
#define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F)
|
||||
#define C_00B848_SGPRS 0xFFFFFC3F
|
||||
#define S_00B848_PRIORITY(x) (((x) & 0x03) << 10)
|
||||
#define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03)
|
||||
#define C_00B848_PRIORITY 0xFFFFF3FF
|
||||
#define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12)
|
||||
#define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
|
||||
#define C_00B848_FLOAT_MODE 0xFFF00FFF
|
||||
#define S_00B848_PRIV(x) (((x) & 0x1) << 20)
|
||||
#define G_00B848_PRIV(x) (((x) >> 20) & 0x1)
|
||||
#define C_00B848_PRIV 0xFFEFFFFF
|
||||
#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
|
||||
#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
|
||||
#define C_00B848_DX10_CLAMP 0xFFDFFFFF
|
||||
#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
|
||||
#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
|
||||
#define C_00B848_DEBUG_MODE 0xFFBFFFFF
|
||||
#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
|
||||
#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
|
||||
#define C_00B848_IEEE_MODE 0xFF7FFFFF
|
||||
|
||||
|
||||
// Helpers for setting FLOAT_MODE
|
||||
#define FP_ROUND_ROUND_TO_NEAREST 0
|
||||
#define FP_ROUND_ROUND_TO_INF 1
|
||||
#define FP_ROUND_ROUND_TO_NEGINF 2
|
||||
#define FP_ROUND_ROUND_TO_ZERO 3
|
||||
|
||||
// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
|
||||
// precision.
|
||||
#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
|
||||
#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
|
||||
|
||||
#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
|
||||
#define FP_DENORM_FLUSH_OUT 1
|
||||
#define FP_DENORM_FLUSH_IN 2
|
||||
#define FP_DENORM_FLUSH_NONE 3
|
||||
|
||||
|
||||
// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
|
||||
// precision.
|
||||
#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
|
||||
#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
|
||||
|
||||
#endif // SIDEFINES_H_
|
||||
|
10
test/CodeGen/R600/default-fp-mode.ll
Normal file
10
test/CodeGen/R600/default-fp-mode.ll
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: @test_kernel
|
||||
; SI: FloatMode: 240
|
||||
; SI: IeeeMode: 0
|
||||
define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
|
||||
store float 0.0, float addrspace(1)* %out0
|
||||
store double 0.0, double addrspace(1)* %out1
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user