mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
R600/SI: Add pattern for AMDGPUurecip
21 more little piglits with radeonsi. Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179186 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
74adad6de8
commit
b187f8cd1c
@ -94,6 +94,7 @@ class Constants {
|
||||
int TWO_PI = 0x40c90fdb;
|
||||
int PI = 0x40490fdb;
|
||||
int TWO_PI_INV = 0x3e22f983;
|
||||
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
|
||||
}
|
||||
def CONST : Constants;
|
||||
|
||||
|
@ -1923,10 +1923,11 @@ def : COS_PAT <COS_cm>;
|
||||
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
|
||||
|
||||
// RECIP_UINT emulation for Cayman
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
def : Pat <
|
||||
(AMDGPUurecip R600_Reg32:$src0),
|
||||
(FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
|
||||
(MOV_IMM_I32 0x4f800000)))
|
||||
(MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
|
||||
>;
|
||||
|
||||
|
||||
|
@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
|
||||
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
|
||||
[(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
|
||||
>;
|
||||
//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
|
||||
//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
|
||||
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
|
||||
defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
|
||||
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
|
||||
[(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
|
||||
>;
|
||||
@ -1514,6 +1514,14 @@ def : Pat <
|
||||
(BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
|
||||
>;
|
||||
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
def : Pat <
|
||||
(AMDGPUurecip i32:$src0),
|
||||
(V_CVT_U32_F32_e32
|
||||
(V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
|
||||
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
|
||||
>;
|
||||
|
||||
/********** ================== **********/
|
||||
/********** VOP3 Patterns **********/
|
||||
/********** ================== **********/
|
||||
|
12
test/CodeGen/R600/urecip.ll
Normal file
12
test/CodeGen/R600/urecip.ll
Normal file
@ -0,0 +1,12 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
|
||||
|
||||
;CHECK: V_RCP_IFLAG_F32_e32
|
||||
|
||||
define void @test(i32 %p, i32 %q) {
|
||||
%i = udiv i32 %p, %q
|
||||
%r = bitcast i32 %i to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
Loading…
Reference in New Issue
Block a user