diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 934d59d1269..820f1a80d75 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -34,6 +34,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3, // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; +def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; +def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; + // out = a - floor(a) def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index b3429b9748c..86997c82ebe 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SUBC, MVT::i32, Legal); setOperationAction(ISD::SUBE, MVT::i32, Legal); + setOperationAction(ISD::FSIN, MVT::f32, Custom); + setOperationAction(ISD::FCOS, MVT::f32, Custom); + // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); @@ -637,6 +640,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } + case ISD::FSIN: + case ISD::FCOS: + return LowerTrig(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::FDIV: return LowerFDIV(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); @@ -1116,6 +1122,23 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return Chain; } +SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDValue Arg = Op.getOperand(0); + SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, + DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, + DAG.getConstantFP(0.5 / M_PI, VT))); + + switch (Op.getOpcode()) { + case ISD::FCOS: + return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart); + case ISD::FSIN: + return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart); + default: + llvm_unreachable("Wrong trig opcode"); + } +} + //===----------------------------------------------------------------------===// // Custom DAG optimizations //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 9e9a0b05f53..b3343ee6694 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -32,6 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a4920db3378..bd5be3248e0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1167,8 +1167,12 @@ defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", [(set f64:$dst, (fsqrt f64:$src0))] >; -defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; -defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; +defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", + [(set f32:$dst, (AMDGPUsin f32:$src0))] +>; +defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", + [(set f32:$dst, (AMDGPUcos f32:$src0))] +>; defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; @@ -2342,16 +2346,6 @@ def : Pat< (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) >; -def : Pat < - (fcos f32:$src0), - (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) ->; - -def : Pat < - (fsin f32:$src0), - (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) ->; - def : Pat < (int_AMDGPU_cube v4f32:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index 41c363cc871..53006bad5c4 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC +;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s ;FUNC-LABEL: test ;EG: MULADD_IEEE * @@ -8,6 +9,7 @@ ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ;EG-NOT: SIN ;SI: V_MUL_F32 +;SI: V_FRACT_F32 ;SI: V_SIN_F32 ;SI-NOT: V_SIN_F32 @@ -17,6 +19,22 @@ define void @test(float addrspace(1)* %out, float %x) #1 { ret void } +;FUNC-LABEL: testf +;SI-UNSAFE: 4.774 +;SI-UNSAFE: V_MUL_F32 +;SI-SAFE: V_MUL_F32 +;SI-SAFE: V_MUL_F32 +;SI: V_FRACT_F32 +;SI: V_SIN_F32 +;SI-NOT: V_SIN_F32 + +define void @testf(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 3.0, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + ;FUNC-LABEL: testv ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}