mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-01 15:17:25 +00:00
R600/SI: rework input interpolation v2
v2: update CMakeLists.txt as well Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176626 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
|
||||
#include "SIISelLowering.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
@@ -81,14 +82,32 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
FunctionType *FType = MF.getFunction()->getFunctionType();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
assert(CallConv == CallingConv::C);
|
||||
|
||||
SmallVector<ISD::InputArg, 16> Splits;
|
||||
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
|
||||
uint32_t Skipped = 0;
|
||||
|
||||
for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
|
||||
const ISD::InputArg &Arg = Ins[i];
|
||||
|
||||
// Split vertices into their elements
|
||||
// First check if it's a PS input addr
|
||||
if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
|
||||
|
||||
assert((PSInputNum <= 15) && "Too many PS inputs!");
|
||||
|
||||
if (!Arg.Used) {
|
||||
// We can savely skip PS inputs
|
||||
Skipped |= 1 << i;
|
||||
++PSInputNum;
|
||||
continue;
|
||||
}
|
||||
|
||||
Info->PSInputAddr |= 1 << PSInputNum++;
|
||||
}
|
||||
|
||||
// Second split vertices into their elements
|
||||
if (Arg.VT.isVector()) {
|
||||
ISD::InputArg NewArg = Arg;
|
||||
NewArg.Flags.setSplit();
|
||||
@@ -114,10 +133,22 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||
|
||||
// At least one interpolation mode must be enabled or else the GPU will hang.
|
||||
if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
|
||||
Info->PSInputAddr |= 1;
|
||||
CCInfo.AllocateReg(AMDGPU::VGPR0);
|
||||
CCInfo.AllocateReg(AMDGPU::VGPR1);
|
||||
}
|
||||
|
||||
AnalyzeFormalArguments(CCInfo, Splits);
|
||||
|
||||
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
|
||||
|
||||
if (Skipped & (1 << i)) {
|
||||
InVals.push_back(SDValue());
|
||||
continue;
|
||||
}
|
||||
|
||||
CCValAssign &VA = ArgLocs[ArgIdx++];
|
||||
assert(VA.isRegLoc() && "Parameter must be in a register!");
|
||||
|
||||
@@ -177,9 +208,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
default:
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::BRANCH: return BB;
|
||||
case AMDGPU::SI_INTERP:
|
||||
LowerSI_INTERP(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_WQM:
|
||||
LowerSI_WQM(MI, *BB, I, MRI);
|
||||
break;
|
||||
@@ -195,37 +223,6 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
|
||||
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
|
||||
MachineOperand dst = MI->getOperand(0);
|
||||
MachineOperand iReg = MI->getOperand(1);
|
||||
MachineOperand jReg = MI->getOperand(2);
|
||||
MachineOperand attr_chan = MI->getOperand(3);
|
||||
MachineOperand attr = MI->getOperand(4);
|
||||
MachineOperand params = MI->getOperand(5);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
|
||||
.addOperand(params);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
|
||||
.addOperand(iReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr)
|
||||
.addReg(M0);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
|
||||
.addOperand(dst)
|
||||
.addReg(tmp)
|
||||
.addOperand(jReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr)
|
||||
.addReg(M0);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
|
||||
return MVT::i1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user