From c6f13db656c7649f933c74c4f90c09ff74de52a8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Tue, 9 Jul 2013 15:03:11 +0000 Subject: [PATCH] R600: Use DAG lowering pass to handle fcos/fsin NOTE: This is a candidate for the stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.h | 2 ++ lib/Target/R600/R600ISelLowering.cpp | 39 +++++++++++++++++++++++++++- lib/Target/R600/R600ISelLowering.h | 2 ++ lib/Target/R600/R600Instructions.td | 32 +++++++---------------- test/CodeGen/R600/llvm.cos.ll | 3 +++ test/CodeGen/R600/llvm.sin.ll | 3 +++ 6 files changed, 58 insertions(+), 23 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d739a013242..7f4468c15ed 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -121,6 +121,8 @@ enum { // End AMDIL ISD Opcodes DWORDADDR, FRACT, + COS_HW, + SIN_HW, FMAX, SMAX, UMAX, diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index ce2aa9208f8..4413734b401 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -26,7 +26,8 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : - AMDGPUTargetLowering(TM) { + AMDGPUTargetLowering(TM), + Gen(TM.getSubtarget().getGeneration()) { addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); @@ -38,6 +39,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::FDIV, MVT::v4f32, Expand); setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Custom); + setOperationAction(ISD::FSIN, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); @@ -473,6 +477,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const R600MachineFunctionInfo *MFI = MF.getInfo(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::FCOS: + case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); @@ -723,6 +729,37 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, } } +SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { + // On hw >= R700, COS/SIN input must be between -1. and 1. + // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) + EVT VT = Op.getValueType(); + SDValue Arg = Op.getOperand(0); + SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, + DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, + DAG.getConstantFP(0.15915494309, MVT::f32)), + DAG.getConstantFP(0.5, MVT::f32))); + unsigned TrigNode; + switch (Op.getOpcode()) { + case ISD::FCOS: + TrigNode = AMDGPUISD::COS_HW; + break; + case ISD::FSIN: + TrigNode = AMDGPUISD::SIN_HW; + break; + default: + llvm_unreachable("Wrong trig opcode"); + } + SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT, + DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart, + DAG.getConstantFP(-0.5, MVT::f32))); + if (Gen >= AMDGPUSubtarget::R700) + return TrigVal; + // On R600 hw, COS/SIN input must be between -Pi and Pi. + return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal, + DAG.getConstantFP(3.14159265359, MVT::f32)); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::SETCC, diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index d4ba4c877bb..a033fcba643 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -40,6 +40,7 @@ public: SmallVectorImpl &InVals) const; virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const; private: + unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the /// first nine dwords of a Vertex Buffer. These implicit parameters are /// lowered to load instructions which retreive the values from the Vertex @@ -60,6 +61,7 @@ private: SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index cb887d13430..735dcfc0256 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -364,6 +364,14 @@ def DOT4 : SDNode<"AMDGPUISD::DOT4", [] >; +def COS_HW : SDNode<"AMDGPUISD::COS_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + +def SIN_HW : SDNode<"AMDGPUISD::SIN_HW", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]> +>; + def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; @@ -1080,14 +1088,14 @@ class RECIPSQRT_IEEE_Common inst> : R600_1OP < } class SIN_Common inst> : R600_1OP < - inst, "SIN", []>{ + inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; } class COS_Common inst> : R600_1OP < - inst, "COS", []> { + inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> { let Trig = 1; let TransOnly = 1; let Itinerary = TransALU; @@ -1228,18 +1236,6 @@ let Predicates = [isR600] in { } -// Helper pattern for normalizing inputs to triginomic instructions for R700+ -// cards. -class COS_PAT : Pat< - (fcos f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - -class SIN_PAT : Pat< - (fsin f32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) ->; - //===----------------------------------------------------------------------===// // R700 Only instructions //===----------------------------------------------------------------------===// @@ -1247,10 +1243,6 @@ class SIN_PAT : Pat< let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; def COS_r700 : COS_Common<0x6F>; - - // R700 normalizes inputs to SIN/COS the same as EG - def : SIN_PAT ; - def : COS_PAT ; } //===----------------------------------------------------------------------===// @@ -1276,8 +1268,6 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common ; -def : SIN_PAT ; -def : COS_PAT ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; //===----------------------------------------------------------------------===// @@ -1726,8 +1716,6 @@ def COS_cm : COS_Common<0x8E>; } // End isVector = 1 def : POW_Common ; -def : SIN_PAT ; -def : COS_PAT ; defm DIV_cm : DIV_Common; diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index b444fa782be..8fb4559f89d 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -1,5 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;CHECK: MULADD_IEEE * +;CHECK: FRACT * +;CHECK: ADD * ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @test() { diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 09cc3d2c52d..e94c2ba56b8 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,5 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;CHECK: MULADD_IEEE * +;CHECK: FRACT * +;CHECK: ADD * ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @test() {