diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index d63cd1d281c..30a9ed7ea74 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -740,6 +740,14 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umad24: + return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_imad24: + return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), @@ -1432,6 +1440,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFM) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) + NODE_NAME_CASE(MAD_U24) + NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index bf6916259af..b876c521d32 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -186,6 +186,8 @@ enum { BFM, // Insert a range of bits into a 32-bit word. MUL_U24, MUL_I24, + MAD_U24, + MAD_I24, TEXTURE_FETCH, EXPORT, CONST_ADDRESS, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 258d5a60ef3..f96dbb4d8a1 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -100,3 +100,10 @@ def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, [SDNPCommutative] >; + +def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, + [] +>; +def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, + [] +>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index ba7cac47619..127b74a0edb 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -423,6 +423,17 @@ class UMUL24Pattern : Pat < >; */ +class IMad24Pat : Pat < + (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +class UMad24Pat : Pat < + (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + + include "R600Instructions.td" include "R700Instructions.td" include "EvergreenInstructions.td" diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 9f30bd8f1c9..9ad5e72d3f0 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -51,6 +51,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 837d6025339..6dded24c6fe 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -21,12 +21,14 @@ def isCayman : Predicate<"Subtarget.hasCaymanISA()">; let Predicates = [isCayman] in { def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", - [(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU >; +def : IMad24Pat; + let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 43981510423..500fa7894c2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1256,13 +1256,14 @@ defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", - [(set i32:$dst, (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2))] + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))] >; defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", - [(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))] + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))] >; } // End neverHasSideEffects + defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; @@ -2077,6 +2078,9 @@ def : Pat < // VOP3 Patterns //===----------------------------------------------------------------------===// +def : IMad24Pat; +def : UMad24Pat; + def : Pat < (fadd f64:$src0, f64:$src1), (V_ADD_F64 $src0, $src1, (i64 0)) diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll new file mode 100644 index 00000000000..c7a3660b0b9 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_imad24 +; SI: V_MAD_I32_I24 +; CM: MULADD_INT24 +define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll new file mode 100644 index 00000000000..08daac54424 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone + +; FUNC-LABEL: @test_umad24 +; SI: V_MAD_U32_U24 +define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind { + %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone + store i32 %mad, i32 addrspace(1)* %out, align 4 + ret void +} +