mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
[NVPTX] Add @llvm.nvvm.sqrt.f() intrinsic
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b4b14070a4
commit
b9c26dcb24
@ -405,6 +405,8 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
|||||||
// Sqrt
|
// Sqrt
|
||||||
//
|
//
|
||||||
|
|
||||||
|
def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
|
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
|
||||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||||
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
|
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
|
||||||
|
@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
|
|||||||
" IEEE Compliant F32 div.rnd if avaiable."),
|
" IEEE Compliant F32 div.rnd if avaiable."),
|
||||||
cl::init(2));
|
cl::init(2));
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
UsePrecSqrtF32("nvptx-prec-sqrtf32",
|
||||||
|
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
|
||||||
|
cl::init(true));
|
||||||
|
|
||||||
/// createNVPTXISelDag - This pass converts a legalized DAG into a
|
/// createNVPTXISelDag - This pass converts a legalized DAG into a
|
||||||
/// NVPTX-specific DAG, ready for instruction scheduling.
|
/// NVPTX-specific DAG, ready for instruction scheduling.
|
||||||
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
|
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||||
@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
|
|||||||
|
|
||||||
// Decide how to translate f32 div
|
// Decide how to translate f32 div
|
||||||
do_DIVF32_PREC = UsePrecDivF32;
|
do_DIVF32_PREC = UsePrecDivF32;
|
||||||
|
// Decide how to translate f32 sqrt
|
||||||
|
do_SQRTF32_PREC = UsePrecSqrtF32;
|
||||||
// sm less than sm_20 does not support div.rnd. Use div.full.
|
// sm less than sm_20 does not support div.rnd. Use div.full.
|
||||||
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
|
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
|
||||||
do_DIVF32_PREC = 1;
|
do_DIVF32_PREC = 1;
|
||||||
|
@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
|
|||||||
// Otherwise, use div.full
|
// Otherwise, use div.full
|
||||||
int do_DIVF32_PREC;
|
int do_DIVF32_PREC;
|
||||||
|
|
||||||
|
// If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
|
||||||
|
// is true, then generate the corresponding FTZ version.
|
||||||
|
bool do_SQRTF32_PREC;
|
||||||
|
|
||||||
// If true, add .ftz to f32 instructions.
|
// If true, add .ftz to f32 instructions.
|
||||||
// This is only meaningful for sm_20 and later, as the default
|
// This is only meaningful for sm_20 and later, as the default
|
||||||
// is not ftz.
|
// is not ftz.
|
||||||
|
@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
|
|||||||
def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
|
def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
|
||||||
def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
|
def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
|
||||||
|
|
||||||
|
def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
|
||||||
|
def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
|
||||||
|
|
||||||
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
|
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
|
||||||
|
|
||||||
def true : Predicate<"1">;
|
def true : Predicate<"1">;
|
||||||
|
@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
|
|||||||
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
|
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
|
||||||
Float64Regs, int_nvvm_sqrt_rp_d>;
|
Float64Regs, int_nvvm_sqrt_rp_d>;
|
||||||
|
|
||||||
|
// nvvm_sqrt intrinsic
|
||||||
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
||||||
|
(INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
|
||||||
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
||||||
|
(INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
|
||||||
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
||||||
|
(INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
|
||||||
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
||||||
|
(INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Rsqrt
|
// Rsqrt
|
||||||
//
|
//
|
||||||
|
@ -15,5 +15,12 @@ define ptx_device double @test_fabs(double %d) {
|
|||||||
ret double %x
|
ret double %x
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define float @test_nvvm_sqrt(float %a) {
|
||||||
|
%val = call float @llvm.nvvm.sqrt.f(float %a)
|
||||||
|
ret float %val
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare float @llvm.fabs.f32(float)
|
declare float @llvm.fabs.f32(float)
|
||||||
declare double @llvm.fabs.f64(double)
|
declare double @llvm.fabs.f64(double)
|
||||||
|
declare float @llvm.nvvm.sqrt.f(float)
|
||||||
|
Loading…
Reference in New Issue
Block a user