From 496dbfe7b9fd0ad986b425e5b1543fefb1812b8e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Nov 2013 21:23:20 +0000 Subject: [PATCH] R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195878 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 1 + lib/Target/R600/AMDGPUInstrInfo.td | 3 ++ lib/Target/R600/R600Instructions.td | 18 ++++++++--- test/CodeGen/R600/llvm.round.ll | 41 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/R600/llvm.round.ll diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index fdabea51695..f2a6aab9cd7 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -58,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FABS, MVT::f32, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); // The hardware supports ROTR, but not ROTL setOperationAction(ISD::ROTL, MVT::i32, Expand); diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index c0d757e2758..fccede01ab9 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -83,3 +83,6 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", SDTypeProfile<0, 2, []>, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def AMDGPUround : SDNode<"ISD::FROUND", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index abfde501735..0346e24ab77 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1110,6 +1110,10 @@ class COS_Common inst> : R600_1OP < let Itinerary = TransALU; } +def CLAMP_R600 : CLAMP ; +def FABS_R600 : FABS; +def FNEG_R600 : FNEG; + //===----------------------------------------------------------------------===// // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// @@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common ; +// FROUND pattern +class FROUNDPat : Pat < + (AMDGPUround f32:$x), + (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) +>; + + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1173,6 +1184,7 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + def : FROUNDPat ; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT @@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", // SHA-256 Patterns def : SHA256MaPattern ; + def : FROUNDPat ; + def EG_ExportSwz : ExportSwzInst { let Word1{19-16} = 0; // BURST_COUNT let Word1{20} = 0; // VALID_PIXEL_MODE @@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 < } // End isPseudo = 1 } // End usesCustomInserter = 1 -def CLAMP_R600 : CLAMP ; -def FABS_R600 : FABS; -def FNEG_R600 : FNEG; - //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll new file mode 100644 index 00000000000..e06d45d4a37 --- /dev/null +++ b/test/CodeGen/R600/llvm.round.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC + +; FUNC-LABEL: @f32 +; R600: FRACT +; R600-DAG: ADD +; R600-DAG: CEIL +; R600-DAG: FLOOR +; R600: CNDGE +define void @f32(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.round.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +; The vector tests are really difficult to verify, since it can be hard to +; predict how the scheduler will order the instructions. We already have +; a test for the scalar case, so the vector tests just check that the +; compiler doesn't crash. + +; FUNC-LABEL: v2f32 +; R600: CF_END +define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { +entry: + %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in) + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: v4f32 +; R600: CF_END +define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in) + store <4 x float> %0, <4 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.round.f32(float) +declare <2 x float> @llvm.round.v2f32(<2 x float>) +declare <4 x float> @llvm.round.v4f32(<4 x float>)