mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-05 13:26:55 +00:00
R600: Use function inputs to represent data stored in gpr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194425 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -42,6 +42,17 @@ def CC_SI : CallingConv<[
|
||||
|
||||
]>;
|
||||
|
||||
// Calling convention for R600
|
||||
def CC_R600 : CallingConv<[
|
||||
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
|
||||
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
|
||||
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
|
||||
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
|
||||
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
|
||||
T30_XYZW, T31_XYZW, T32_XYZW
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCCustom<"allocateStack">
|
||||
@@ -57,5 +68,7 @@ def CC_AMDGPU : CallingConv<[
|
||||
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
|
||||
]>;
|
||||
|
@@ -554,51 +554,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
SDLoc DL(Op);
|
||||
switch(IntrinsicID) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case AMDGPUIntrinsic::R600_load_input: {
|
||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MRI.addLiveIn(Reg);
|
||||
return DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), Reg, VT);
|
||||
}
|
||||
|
||||
case AMDGPUIntrinsic::R600_interp_input: {
|
||||
case AMDGPUIntrinsic::R600_interp_xy:
|
||||
case AMDGPUIntrinsic::R600_interp_zw: {
|
||||
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
|
||||
MachineSDNode *interp;
|
||||
if (ijb < 0) {
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
|
||||
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getTargetExtractSubreg(
|
||||
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
|
||||
DL, MVT::f32, SDValue(interp, 0));
|
||||
}
|
||||
SDValue RegisterINode = Op.getOperand(2);
|
||||
SDValue RegisterJNode = Op.getOperand(3);
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
|
||||
unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
|
||||
MRI.addLiveIn(RegisterI);
|
||||
MRI.addLiveIn(RegisterJ);
|
||||
SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
|
||||
SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
|
||||
|
||||
if (slot % 4 < 2)
|
||||
if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
|
||||
RegisterJNode, RegisterINode);
|
||||
else
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
|
||||
RegisterJNode, RegisterINode);
|
||||
return SDValue(interp, slot % 2);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
|
||||
SDValue(interp, 0), SDValue(interp, 1));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_tex:
|
||||
case AMDGPUIntrinsic::R600_texc:
|
||||
@@ -1339,6 +1311,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
|
||||
|
||||
SmallVector<ISD::InputArg, 8> LocalIns;
|
||||
|
||||
@@ -1352,6 +1326,13 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
EVT VT = Ins[i].VT;
|
||||
EVT MemVT = LocalIns[i].VT;
|
||||
|
||||
if (ShaderType != ShaderType::COMPUTE) {
|
||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
||||
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
||||
InVals.push_back(Register);
|
||||
continue;
|
||||
}
|
||||
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUAS::CONSTANT_BUFFER_0);
|
||||
|
||||
|
@@ -418,7 +418,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins i32imm:$src0),
|
||||
"INTERP_LOAD $src0 : $dst",
|
||||
[]>;
|
||||
[(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
|
||||
|
||||
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
|
||||
let bank_swizzle = 5;
|
||||
|
@@ -39,10 +39,14 @@ let TargetPrefix = "R600", isTarget = 1 in {
|
||||
llvm_i32_ty // coord_type_w
|
||||
], [IntrNoMem]>;
|
||||
|
||||
def int_R600_load_input :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_input :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_const :
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_xy :
|
||||
Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_zw :
|
||||
Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_R600_load_texbuf :
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_tex : TextureIntrinsicFloatInput;
|
||||
|
Reference in New Issue
Block a user