mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-08 19:25:47 +00:00
R600/SI: implement range reduction for sin/cos
These instructions can only take a limited input range, and return the constant value 1 out of range. We should do range reduction to be able to process arbitrary values. Use a FRACT instruction after normalization to achieve this. Also add a test for constant folding with the lowered code with unsafe-fp-math enabled. v2: use DAG lowering instead of intrinsic, adapt test v3: calculate constant, fold pattern into instruction definition v4: misc style fixes, add sin-fold testcase, cosmetics Patch by Grigori Goronzy git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -34,6 +34,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
|
|||||||
// This argument to this node is a dword address.
|
// This argument to this node is a dword address.
|
||||||
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
|
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
|
||||||
|
|
||||||
|
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
|
||||||
|
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
|
||||||
|
|
||||||
// out = a - floor(a)
|
// out = a - floor(a)
|
||||||
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
||||||
|
|
||||||
|
@@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::SUBC, MVT::i32, Legal);
|
setOperationAction(ISD::SUBC, MVT::i32, Legal);
|
||||||
setOperationAction(ISD::SUBE, MVT::i32, Legal);
|
setOperationAction(ISD::SUBE, MVT::i32, Legal);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FSIN, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::FCOS, MVT::f32, Custom);
|
||||||
|
|
||||||
// We need to custom lower vector stores from local memory
|
// We need to custom lower vector stores from local memory
|
||||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||||
@@ -637,6 +640,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case ISD::FSIN:
|
||||||
|
case ISD::FCOS:
|
||||||
|
return LowerTrig(Op, DAG);
|
||||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||||
case ISD::FDIV: return LowerFDIV(Op, DAG);
|
case ISD::FDIV: return LowerFDIV(Op, DAG);
|
||||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||||
@@ -1116,6 +1122,23 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return Chain;
|
return Chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
SDValue Arg = Op.getOperand(0);
|
||||||
|
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
|
||||||
|
DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
|
||||||
|
DAG.getConstantFP(0.5 / M_PI, VT)));
|
||||||
|
|
||||||
|
switch (Op.getOpcode()) {
|
||||||
|
case ISD::FCOS:
|
||||||
|
return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
|
||||||
|
case ISD::FSIN:
|
||||||
|
return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Wrong trig opcode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Custom DAG optimizations
|
// Custom DAG optimizations
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@@ -32,6 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
|||||||
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
bool foldImm(SDValue &Operand, int32_t &Immediate,
|
bool foldImm(SDValue &Operand, int32_t &Immediate,
|
||||||
|
@@ -1167,8 +1167,12 @@ defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
|
|||||||
defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
|
defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
|
||||||
[(set f64:$dst, (fsqrt f64:$src0))]
|
[(set f64:$dst, (fsqrt f64:$src0))]
|
||||||
>;
|
>;
|
||||||
defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
|
defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32",
|
||||||
defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
|
[(set f32:$dst, (AMDGPUsin f32:$src0))]
|
||||||
|
>;
|
||||||
|
defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32",
|
||||||
|
[(set f32:$dst, (AMDGPUcos f32:$src0))]
|
||||||
|
>;
|
||||||
defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
|
defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
|
||||||
defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
|
defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
|
||||||
defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
|
defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
|
||||||
@@ -2342,16 +2346,6 @@ def : Pat<
|
|||||||
(V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
|
(V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
|
||||||
(fcos f32:$src0),
|
|
||||||
(V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
|
||||||
>;
|
|
||||||
|
|
||||||
def : Pat <
|
|
||||||
(fsin f32:$src0),
|
|
||||||
(V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
|
|
||||||
>;
|
|
||||||
|
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_AMDGPU_cube v4f32:$src),
|
(int_AMDGPU_cube v4f32:$src),
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
|
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||||
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
|
||||||
|
;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
|
||||||
|
|
||||||
;FUNC-LABEL: test
|
;FUNC-LABEL: test
|
||||||
;EG: MULADD_IEEE *
|
;EG: MULADD_IEEE *
|
||||||
@@ -8,6 +9,7 @@
|
|||||||
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||||
;EG-NOT: SIN
|
;EG-NOT: SIN
|
||||||
;SI: V_MUL_F32
|
;SI: V_MUL_F32
|
||||||
|
;SI: V_FRACT_F32
|
||||||
;SI: V_SIN_F32
|
;SI: V_SIN_F32
|
||||||
;SI-NOT: V_SIN_F32
|
;SI-NOT: V_SIN_F32
|
||||||
|
|
||||||
@@ -17,6 +19,22 @@ define void @test(float addrspace(1)* %out, float %x) #1 {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;FUNC-LABEL: testf
|
||||||
|
;SI-UNSAFE: 4.774
|
||||||
|
;SI-UNSAFE: V_MUL_F32
|
||||||
|
;SI-SAFE: V_MUL_F32
|
||||||
|
;SI-SAFE: V_MUL_F32
|
||||||
|
;SI: V_FRACT_F32
|
||||||
|
;SI: V_SIN_F32
|
||||||
|
;SI-NOT: V_SIN_F32
|
||||||
|
|
||||||
|
define void @testf(float addrspace(1)* %out, float %x) #1 {
|
||||||
|
%y = fmul float 3.0, %x
|
||||||
|
%sin = call float @llvm.sin.f32(float %y)
|
||||||
|
store float %sin, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
;FUNC-LABEL: testv
|
;FUNC-LABEL: testv
|
||||||
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||||
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||||
|
Reference in New Issue
Block a user