mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-14 09:38:40 +00:00
R600: Use DAG lowering pass to handle fcos/fsin
NOTE: This is a candidate for the stable branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185940 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f79b9b8593
commit
c6f13db656
@ -121,6 +121,8 @@ enum {
|
|||||||
// End AMDIL ISD Opcodes
|
// End AMDIL ISD Opcodes
|
||||||
DWORDADDR,
|
DWORDADDR,
|
||||||
FRACT,
|
FRACT,
|
||||||
|
COS_HW,
|
||||||
|
SIN_HW,
|
||||||
FMAX,
|
FMAX,
|
||||||
SMAX,
|
SMAX,
|
||||||
UMAX,
|
UMAX,
|
||||||
|
@ -26,7 +26,8 @@
|
|||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||||
AMDGPUTargetLowering(TM) {
|
AMDGPUTargetLowering(TM),
|
||||||
|
Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) {
|
||||||
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
||||||
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
||||||
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
||||||
@ -38,6 +39,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
|||||||
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
|
setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
|
||||||
setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
|
setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
|
||||||
|
|
||||||
|
setOperationAction(ISD::FCOS, MVT::f32, Custom);
|
||||||
|
setOperationAction(ISD::FSIN, MVT::f32, Custom);
|
||||||
|
|
||||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
|
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
|
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
|
||||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
|
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
|
||||||
@ -473,6 +477,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||||||
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||||
switch (Op.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||||
|
case ISD::FCOS:
|
||||||
|
case ISD::FSIN: return LowerTrig(Op, DAG);
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||||
@ -723,6 +729,37 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
// On hw >= R700, COS/SIN input must be between -1. and 1.
|
||||||
|
// Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
SDValue Arg = Op.getOperand(0);
|
||||||
|
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
|
||||||
|
DAG.getNode(ISD::FADD, SDLoc(Op), VT,
|
||||||
|
DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
|
||||||
|
DAG.getConstantFP(0.15915494309, MVT::f32)),
|
||||||
|
DAG.getConstantFP(0.5, MVT::f32)));
|
||||||
|
unsigned TrigNode;
|
||||||
|
switch (Op.getOpcode()) {
|
||||||
|
case ISD::FCOS:
|
||||||
|
TrigNode = AMDGPUISD::COS_HW;
|
||||||
|
break;
|
||||||
|
case ISD::FSIN:
|
||||||
|
TrigNode = AMDGPUISD::SIN_HW;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Wrong trig opcode");
|
||||||
|
}
|
||||||
|
SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT,
|
||||||
|
DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart,
|
||||||
|
DAG.getConstantFP(-0.5, MVT::f32)));
|
||||||
|
if (Gen >= AMDGPUSubtarget::R700)
|
||||||
|
return TrigVal;
|
||||||
|
// On R600 hw, COS/SIN input must be between -Pi and Pi.
|
||||||
|
return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal,
|
||||||
|
DAG.getConstantFP(3.14159265359, MVT::f32));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
|
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
return DAG.getNode(
|
return DAG.getNode(
|
||||||
ISD::SETCC,
|
ISD::SETCC,
|
||||||
|
@ -40,6 +40,7 @@ public:
|
|||||||
SmallVectorImpl<SDValue> &InVals) const;
|
SmallVectorImpl<SDValue> &InVals) const;
|
||||||
virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const;
|
virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const;
|
||||||
private:
|
private:
|
||||||
|
unsigned Gen;
|
||||||
/// Each OpenCL kernel has nine implicit parameters that are stored in the
|
/// Each OpenCL kernel has nine implicit parameters that are stored in the
|
||||||
/// first nine dwords of a Vertex Buffer. These implicit parameters are
|
/// first nine dwords of a Vertex Buffer. These implicit parameters are
|
||||||
/// lowered to load instructions which retreive the values from the Vertex
|
/// lowered to load instructions which retreive the values from the Vertex
|
||||||
@ -60,6 +61,7 @@ private:
|
|||||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
|
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
|
@ -364,6 +364,14 @@ def DOT4 : SDNode<"AMDGPUISD::DOT4",
|
|||||||
[]
|
[]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def COS_HW : SDNode<"AMDGPUISD::COS_HW",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
|
||||||
|
>;
|
||||||
|
|
||||||
|
def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
|
||||||
|
>;
|
||||||
|
|
||||||
def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
|
def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
|
||||||
|
|
||||||
def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
|
def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
|
||||||
@ -1080,14 +1088,14 @@ class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
|
|||||||
}
|
}
|
||||||
|
|
||||||
class SIN_Common <bits<11> inst> : R600_1OP <
|
class SIN_Common <bits<11> inst> : R600_1OP <
|
||||||
inst, "SIN", []>{
|
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
|
||||||
let Trig = 1;
|
let Trig = 1;
|
||||||
let TransOnly = 1;
|
let TransOnly = 1;
|
||||||
let Itinerary = TransALU;
|
let Itinerary = TransALU;
|
||||||
}
|
}
|
||||||
|
|
||||||
class COS_Common <bits<11> inst> : R600_1OP <
|
class COS_Common <bits<11> inst> : R600_1OP <
|
||||||
inst, "COS", []> {
|
inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
|
||||||
let Trig = 1;
|
let Trig = 1;
|
||||||
let TransOnly = 1;
|
let TransOnly = 1;
|
||||||
let Itinerary = TransALU;
|
let Itinerary = TransALU;
|
||||||
@ -1228,18 +1236,6 @@ let Predicates = [isR600] in {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper pattern for normalizing inputs to triginomic instructions for R700+
|
|
||||||
// cards.
|
|
||||||
class COS_PAT <InstR600 trig> : Pat<
|
|
||||||
(fcos f32:$src),
|
|
||||||
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
|
|
||||||
>;
|
|
||||||
|
|
||||||
class SIN_PAT <InstR600 trig> : Pat<
|
|
||||||
(fsin f32:$src),
|
|
||||||
(trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// R700 Only instructions
|
// R700 Only instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1247,10 +1243,6 @@ class SIN_PAT <InstR600 trig> : Pat<
|
|||||||
let Predicates = [isR700] in {
|
let Predicates = [isR700] in {
|
||||||
def SIN_r700 : SIN_Common<0x6E>;
|
def SIN_r700 : SIN_Common<0x6E>;
|
||||||
def COS_r700 : COS_Common<0x6F>;
|
def COS_r700 : COS_Common<0x6F>;
|
||||||
|
|
||||||
// R700 normalizes inputs to SIN/COS the same as EG
|
|
||||||
def : SIN_PAT <SIN_r700>;
|
|
||||||
def : COS_PAT <COS_r700>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1276,8 +1268,6 @@ def SIN_eg : SIN_Common<0x8D>;
|
|||||||
def COS_eg : COS_Common<0x8E>;
|
def COS_eg : COS_Common<0x8E>;
|
||||||
|
|
||||||
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
|
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
|
||||||
def : SIN_PAT <SIN_eg>;
|
|
||||||
def : COS_PAT <COS_eg>;
|
|
||||||
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
|
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1726,8 +1716,6 @@ def COS_cm : COS_Common<0x8E>;
|
|||||||
} // End isVector = 1
|
} // End isVector = 1
|
||||||
|
|
||||||
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
|
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
|
||||||
def : SIN_PAT <SIN_cm>;
|
|
||||||
def : COS_PAT <COS_cm>;
|
|
||||||
|
|
||||||
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
|
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
|
||||||
|
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
|
;CHECK: MULADD_IEEE *
|
||||||
|
;CHECK: FRACT *
|
||||||
|
;CHECK: ADD *
|
||||||
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||||
|
|
||||||
define void @test() {
|
define void @test() {
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
|
;CHECK: MULADD_IEEE *
|
||||||
|
;CHECK: FRACT *
|
||||||
|
;CHECK: ADD *
|
||||||
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||||
|
|
||||||
define void @test() {
|
define void @test() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user