mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-09 11:25:55 +00:00
R600/SI: Add intrinsic for ldexp
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215734 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -69,4 +69,7 @@ def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
|
|||||||
def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
|
def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
|
||||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||||
|
|
||||||
|
def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
|
||||||
|
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
} // End TargetPrefix = "AMDGPU"
|
} // End TargetPrefix = "AMDGPU"
|
||||||
|
@@ -853,6 +853,10 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
case Intrinsic::AMDGPU_rsq_clamped:
|
case Intrinsic::AMDGPU_rsq_clamped:
|
||||||
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
|
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
|
||||||
|
|
||||||
|
case Intrinsic::AMDGPU_ldexp:
|
||||||
|
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
|
||||||
|
Op.getOperand(2));
|
||||||
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_imax:
|
case AMDGPUIntrinsic::AMDGPU_imax:
|
||||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
||||||
Op.getOperand(2));
|
Op.getOperand(2));
|
||||||
@@ -2168,6 +2172,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(RSQ)
|
NODE_NAME_CASE(RSQ)
|
||||||
NODE_NAME_CASE(RSQ_LEGACY)
|
NODE_NAME_CASE(RSQ_LEGACY)
|
||||||
NODE_NAME_CASE(RSQ_CLAMPED)
|
NODE_NAME_CASE(RSQ_CLAMPED)
|
||||||
|
NODE_NAME_CASE(LDEXP)
|
||||||
NODE_NAME_CASE(DOT4)
|
NODE_NAME_CASE(DOT4)
|
||||||
NODE_NAME_CASE(BFE_U32)
|
NODE_NAME_CASE(BFE_U32)
|
||||||
NODE_NAME_CASE(BFE_I32)
|
NODE_NAME_CASE(BFE_I32)
|
||||||
|
@@ -203,6 +203,7 @@ enum {
|
|||||||
RSQ,
|
RSQ,
|
||||||
RSQ_LEGACY,
|
RSQ_LEGACY,
|
||||||
RSQ_CLAMPED,
|
RSQ_CLAMPED,
|
||||||
|
LDEXP,
|
||||||
DOT4,
|
DOT4,
|
||||||
BFE_U32, // Extract range of bits with zero extension to 32-bits.
|
BFE_U32, // Extract range of bits with zero extension to 32-bits.
|
||||||
BFE_I32, // Extract range of bits with sign extension to 32-bits.
|
BFE_I32, // Extract range of bits with sign extension to 32-bits.
|
||||||
|
@@ -23,6 +23,10 @@ def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
|
|||||||
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
|
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def AMDGPULdExpOp : SDTypeProfile<1, 2,
|
||||||
|
[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
|
||||||
|
>;
|
||||||
|
|
||||||
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
|
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
|
||||||
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
|
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
|
||||||
>;
|
>;
|
||||||
@@ -52,6 +56,8 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
|
|||||||
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
|
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
|
||||||
def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
|
def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
|
||||||
|
|
||||||
|
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
|
||||||
|
|
||||||
// out = max(a, b) a and b are floats
|
// out = max(a, b) a and b are floats
|
||||||
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
||||||
[SDNPCommutative, SDNPAssociative]
|
[SDNPCommutative, SDNPAssociative]
|
||||||
|
@@ -1390,7 +1390,7 @@ defm V_SUBBREV_U32 : VOP2bInst <0x0000002a, "V_SUBBREV_U32",
|
|||||||
} // End isCommutable = 1, Defs = [VCC]
|
} // End isCommutable = 1, Defs = [VCC]
|
||||||
|
|
||||||
defm V_LDEXP_F32 : VOP2Inst <0x0000002b, "V_LDEXP_F32",
|
defm V_LDEXP_F32 : VOP2Inst <0x0000002b, "V_LDEXP_F32",
|
||||||
VOP_F32_F32_F32
|
VOP_F32_F32_I32, AMDGPUldexp
|
||||||
>;
|
>;
|
||||||
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
|
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
|
||||||
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
|
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
|
||||||
@@ -1509,7 +1509,7 @@ defm V_MAX_F64 : VOP3Inst <0x00000167, "V_MAX_F64",
|
|||||||
} // isCommutable = 1
|
} // isCommutable = 1
|
||||||
|
|
||||||
defm V_LDEXP_F64 : VOP3Inst <0x00000168, "V_LDEXP_F64",
|
defm V_LDEXP_F64 : VOP3Inst <0x00000168, "V_LDEXP_F64",
|
||||||
VOP_F32_F32_I32
|
VOP_F64_F64_I32, AMDGPUldexp
|
||||||
>;
|
>;
|
||||||
|
|
||||||
let isCommutable = 1 in {
|
let isCommutable = 1 in {
|
||||||
|
22
test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
Normal file
22
test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||||
|
|
||||||
|
declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
|
||||||
|
declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
|
||||||
|
|
||||||
|
; SI-LABEL: @test_ldexp_f32:
|
||||||
|
; SI: V_LDEXP_F32
|
||||||
|
; SI: S_ENDPGM
|
||||||
|
define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
|
||||||
|
%result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
|
||||||
|
store float %result, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_ldexp_f64:
|
||||||
|
; SI: V_LDEXP_F64
|
||||||
|
; SI: S_ENDPGM
|
||||||
|
define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
|
||||||
|
%result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
|
||||||
|
store double %result, double addrspace(1)* %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
Reference in New Issue
Block a user