mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-07 11:33:44 +00:00
R600: Add intrinsics for mad24
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209456 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bd0283a5f2
commit
f49da4338a
@ -740,6 +740,14 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
|
return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
|
||||||
Op.getOperand(1), Op.getOperand(2));
|
Op.getOperand(1), Op.getOperand(2));
|
||||||
|
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_umad24:
|
||||||
|
return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
|
||||||
|
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||||
|
|
||||||
|
case AMDGPUIntrinsic::AMDGPU_imad24:
|
||||||
|
return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
|
||||||
|
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||||
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
|
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
|
||||||
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
|
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
|
||||||
Op.getOperand(1),
|
Op.getOperand(1),
|
||||||
@ -1432,6 +1440,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(BFM)
|
NODE_NAME_CASE(BFM)
|
||||||
NODE_NAME_CASE(MUL_U24)
|
NODE_NAME_CASE(MUL_U24)
|
||||||
NODE_NAME_CASE(MUL_I24)
|
NODE_NAME_CASE(MUL_I24)
|
||||||
|
NODE_NAME_CASE(MAD_U24)
|
||||||
|
NODE_NAME_CASE(MAD_I24)
|
||||||
NODE_NAME_CASE(URECIP)
|
NODE_NAME_CASE(URECIP)
|
||||||
NODE_NAME_CASE(DOT4)
|
NODE_NAME_CASE(DOT4)
|
||||||
NODE_NAME_CASE(EXPORT)
|
NODE_NAME_CASE(EXPORT)
|
||||||
|
@ -186,6 +186,8 @@ enum {
|
|||||||
BFM, // Insert a range of bits into a 32-bit word.
|
BFM, // Insert a range of bits into a 32-bit word.
|
||||||
MUL_U24,
|
MUL_U24,
|
||||||
MUL_I24,
|
MUL_I24,
|
||||||
|
MAD_U24,
|
||||||
|
MAD_I24,
|
||||||
TEXTURE_FETCH,
|
TEXTURE_FETCH,
|
||||||
EXPORT,
|
EXPORT,
|
||||||
CONST_ADDRESS,
|
CONST_ADDRESS,
|
||||||
|
@ -100,3 +100,10 @@ def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
|
|||||||
def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
|
def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
|
||||||
[SDNPCommutative]
|
[SDNPCommutative]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
|
||||||
|
[]
|
||||||
|
>;
|
||||||
|
def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
|
||||||
|
[]
|
||||||
|
>;
|
||||||
|
@ -423,6 +423,17 @@ class UMUL24Pattern <Instruction UMUL24> : Pat <
|
|||||||
>;
|
>;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
class IMad24Pat<Instruction Inst> : Pat <
|
||||||
|
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
|
||||||
|
(Inst $src0, $src1, $src2)
|
||||||
|
>;
|
||||||
|
|
||||||
|
class UMad24Pat<Instruction Inst> : Pat <
|
||||||
|
(add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
|
||||||
|
(Inst $src0, $src1, $src2)
|
||||||
|
>;
|
||||||
|
|
||||||
|
|
||||||
include "R600Instructions.td"
|
include "R600Instructions.td"
|
||||||
include "R700Instructions.td"
|
include "R700Instructions.td"
|
||||||
include "EvergreenInstructions.td"
|
include "EvergreenInstructions.td"
|
||||||
|
@ -51,6 +51,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
|||||||
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
@ -21,12 +21,14 @@ def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
|
|||||||
let Predicates = [isCayman] in {
|
let Predicates = [isCayman] in {
|
||||||
|
|
||||||
def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
|
def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
|
||||||
[(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))], VecALU
|
[(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU
|
||||||
>;
|
>;
|
||||||
def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
|
def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
|
||||||
[(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
|
[(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def : IMad24Pat<MULADD_INT24_cm>;
|
||||||
|
|
||||||
let isVector = 1 in {
|
let isVector = 1 in {
|
||||||
|
|
||||||
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
|
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
|
||||||
|
@ -1256,13 +1256,14 @@ defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
|
|||||||
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
|
[(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
|
||||||
>;
|
>;
|
||||||
defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
|
defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
|
||||||
[(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))]
|
[(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
|
||||||
>;
|
>;
|
||||||
defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
|
defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
|
||||||
[(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))]
|
[(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
} // End neverHasSideEffects
|
} // End neverHasSideEffects
|
||||||
|
|
||||||
defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
|
defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
|
||||||
defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
|
defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
|
||||||
defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
|
defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
|
||||||
@ -2077,6 +2078,9 @@ def : Pat <
|
|||||||
// VOP3 Patterns
|
// VOP3 Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def : IMad24Pat<V_MAD_I32_I24>;
|
||||||
|
def : UMad24Pat<V_MAD_U32_U24>;
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(fadd f64:$src0, f64:$src1),
|
(fadd f64:$src0, f64:$src1),
|
||||||
(V_ADD_F64 $src0, $src1, (i64 0))
|
(V_ADD_F64 $src0, $src1, (i64 0))
|
||||||
|
14
test/CodeGen/R600/llvm.AMDGPU.imad24.ll
Normal file
14
test/CodeGen/R600/llvm.AMDGPU.imad24.ll
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
|
; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
|
||||||
|
|
||||||
|
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
|
||||||
|
|
||||||
|
; FUNC-LABEL: @test_imad24
|
||||||
|
; SI: V_MAD_I32_I24
|
||||||
|
; CM: MULADD_INT24
|
||||||
|
define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
|
||||||
|
%mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
|
||||||
|
store i32 %mad, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
12
test/CodeGen/R600/llvm.AMDGPU.umad24.ll
Normal file
12
test/CodeGen/R600/llvm.AMDGPU.umad24.ll
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||||
|
|
||||||
|
declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
|
||||||
|
|
||||||
|
; FUNC-LABEL: @test_umad24
|
||||||
|
; SI: V_MAD_U32_U24
|
||||||
|
define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
|
||||||
|
%mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
|
||||||
|
store i32 %mad, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user