mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 18:31:23 +00:00
77f268945e
This commit adds intrinsics and codegen support for the surface read/write and texture read instructions that take an explicit sampler parameter. Codegen operates on image handles at the PTX level, but falls back to direct replacement of handles with kernel arguments if image handles are not enabled. Note that image handles are explicitly disabled for all target architectures in this change (to be enabled later). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205907 91177308-0d34-0410-b5e6-96231b3b80d8
358 lines
12 KiB
C++
358 lines
12 KiB
C++
//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// On Fermi, image handles are not supported. To work around this, we traverse
|
|
// the machine code and replace image handles with concrete symbols. For this
|
|
// to work reliably, inlining of all function call must be performed.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "NVPTX.h"
|
|
#include "NVPTXMachineFunctionInfo.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/ADT/DenseSet.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
class NVPTXReplaceImageHandles : public MachineFunctionPass {
|
|
private:
|
|
static char ID;
|
|
DenseSet<MachineInstr *> InstrsToRemove;
|
|
|
|
public:
|
|
NVPTXReplaceImageHandles();
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF);
|
|
private:
|
|
bool processInstr(MachineInstr &MI);
|
|
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
|
|
};
|
|
}
|
|
|
|
char NVPTXReplaceImageHandles::ID = 0;
|
|
|
|
NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
|
|
: MachineFunctionPass(ID) {}
|
|
|
|
bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
|
|
bool Changed = false;
|
|
InstrsToRemove.clear();
|
|
|
|
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
|
|
++BI) {
|
|
for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
|
|
I != E; ++I) {
|
|
MachineInstr &MI = *I;
|
|
Changed |= processInstr(MI);
|
|
}
|
|
}
|
|
|
|
// Now clean up any handle-access instructions
|
|
// This is needed in debug mode when code cleanup passes are not executed,
|
|
// but we need the handle access to be eliminated because they are not
|
|
// valid instructions when image handles are disabled.
|
|
for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
|
|
E = InstrsToRemove.end(); I != E; ++I) {
|
|
(*I)->eraseFromParent();
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
|
|
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
|
|
MachineFunction &MF = *MI.getParent()->getParent();
|
|
// Check if we have a surface/texture instruction
|
|
switch (MI.getOpcode()) {
|
|
default: return false;
|
|
case NVPTX::TEX_1D_F32_I32:
|
|
case NVPTX::TEX_1D_F32_F32:
|
|
case NVPTX::TEX_1D_F32_F32_LEVEL:
|
|
case NVPTX::TEX_1D_F32_F32_GRAD:
|
|
case NVPTX::TEX_1D_I32_I32:
|
|
case NVPTX::TEX_1D_I32_F32:
|
|
case NVPTX::TEX_1D_I32_F32_LEVEL:
|
|
case NVPTX::TEX_1D_I32_F32_GRAD:
|
|
case NVPTX::TEX_1D_ARRAY_F32_I32:
|
|
case NVPTX::TEX_1D_ARRAY_F32_F32:
|
|
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
|
|
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
|
|
case NVPTX::TEX_1D_ARRAY_I32_I32:
|
|
case NVPTX::TEX_1D_ARRAY_I32_F32:
|
|
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
|
|
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
|
|
case NVPTX::TEX_2D_F32_I32:
|
|
case NVPTX::TEX_2D_F32_F32:
|
|
case NVPTX::TEX_2D_F32_F32_LEVEL:
|
|
case NVPTX::TEX_2D_F32_F32_GRAD:
|
|
case NVPTX::TEX_2D_I32_I32:
|
|
case NVPTX::TEX_2D_I32_F32:
|
|
case NVPTX::TEX_2D_I32_F32_LEVEL:
|
|
case NVPTX::TEX_2D_I32_F32_GRAD:
|
|
case NVPTX::TEX_2D_ARRAY_F32_I32:
|
|
case NVPTX::TEX_2D_ARRAY_F32_F32:
|
|
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
|
|
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
|
|
case NVPTX::TEX_2D_ARRAY_I32_I32:
|
|
case NVPTX::TEX_2D_ARRAY_I32_F32:
|
|
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
|
|
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
|
|
case NVPTX::TEX_3D_F32_I32:
|
|
case NVPTX::TEX_3D_F32_F32:
|
|
case NVPTX::TEX_3D_F32_F32_LEVEL:
|
|
case NVPTX::TEX_3D_F32_F32_GRAD:
|
|
case NVPTX::TEX_3D_I32_I32:
|
|
case NVPTX::TEX_3D_I32_F32:
|
|
case NVPTX::TEX_3D_I32_F32_LEVEL:
|
|
case NVPTX::TEX_3D_I32_F32_GRAD: {
|
|
// This is a texture fetch, so operand 4 is a texref and operand 5 is
|
|
// a samplerref
|
|
MachineOperand &TexHandle = MI.getOperand(4);
|
|
MachineOperand &SampHandle = MI.getOperand(5);
|
|
|
|
replaceImageHandle(TexHandle, MF);
|
|
replaceImageHandle(SampHandle, MF);
|
|
|
|
return true;
|
|
}
|
|
case NVPTX::SULD_1D_I8_TRAP:
|
|
case NVPTX::SULD_1D_I16_TRAP:
|
|
case NVPTX::SULD_1D_I32_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
|
|
case NVPTX::SULD_2D_I8_TRAP:
|
|
case NVPTX::SULD_2D_I16_TRAP:
|
|
case NVPTX::SULD_2D_I32_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
|
|
case NVPTX::SULD_3D_I8_TRAP:
|
|
case NVPTX::SULD_3D_I16_TRAP:
|
|
case NVPTX::SULD_3D_I32_TRAP: {
|
|
// This is a V1 surface load, so operand 1 is a surfref
|
|
MachineOperand &SurfHandle = MI.getOperand(1);
|
|
|
|
replaceImageHandle(SurfHandle, MF);
|
|
|
|
return true;
|
|
}
|
|
case NVPTX::SULD_1D_V2I8_TRAP:
|
|
case NVPTX::SULD_1D_V2I16_TRAP:
|
|
case NVPTX::SULD_1D_V2I32_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
|
|
case NVPTX::SULD_2D_V2I8_TRAP:
|
|
case NVPTX::SULD_2D_V2I16_TRAP:
|
|
case NVPTX::SULD_2D_V2I32_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
|
|
case NVPTX::SULD_3D_V2I8_TRAP:
|
|
case NVPTX::SULD_3D_V2I16_TRAP:
|
|
case NVPTX::SULD_3D_V2I32_TRAP: {
|
|
// This is a V2 surface load, so operand 2 is a surfref
|
|
MachineOperand &SurfHandle = MI.getOperand(2);
|
|
|
|
replaceImageHandle(SurfHandle, MF);
|
|
|
|
return true;
|
|
}
|
|
case NVPTX::SULD_1D_V4I8_TRAP:
|
|
case NVPTX::SULD_1D_V4I16_TRAP:
|
|
case NVPTX::SULD_1D_V4I32_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
|
|
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
|
|
case NVPTX::SULD_2D_V4I8_TRAP:
|
|
case NVPTX::SULD_2D_V4I16_TRAP:
|
|
case NVPTX::SULD_2D_V4I32_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
|
|
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
|
|
case NVPTX::SULD_3D_V4I8_TRAP:
|
|
case NVPTX::SULD_3D_V4I16_TRAP:
|
|
case NVPTX::SULD_3D_V4I32_TRAP: {
|
|
// This is a V4 surface load, so operand 4 is a surfref
|
|
MachineOperand &SurfHandle = MI.getOperand(4);
|
|
|
|
replaceImageHandle(SurfHandle, MF);
|
|
|
|
return true;
|
|
}
|
|
case NVPTX::SUST_B_1D_B8_TRAP:
|
|
case NVPTX::SUST_B_1D_B16_TRAP:
|
|
case NVPTX::SUST_B_1D_B32_TRAP:
|
|
case NVPTX::SUST_B_1D_V2B8_TRAP:
|
|
case NVPTX::SUST_B_1D_V2B16_TRAP:
|
|
case NVPTX::SUST_B_1D_V2B32_TRAP:
|
|
case NVPTX::SUST_B_1D_V4B8_TRAP:
|
|
case NVPTX::SUST_B_1D_V4B16_TRAP:
|
|
case NVPTX::SUST_B_1D_V4B32_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
|
|
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
|
|
case NVPTX::SUST_B_2D_B8_TRAP:
|
|
case NVPTX::SUST_B_2D_B16_TRAP:
|
|
case NVPTX::SUST_B_2D_B32_TRAP:
|
|
case NVPTX::SUST_B_2D_V2B8_TRAP:
|
|
case NVPTX::SUST_B_2D_V2B16_TRAP:
|
|
case NVPTX::SUST_B_2D_V2B32_TRAP:
|
|
case NVPTX::SUST_B_2D_V4B8_TRAP:
|
|
case NVPTX::SUST_B_2D_V4B16_TRAP:
|
|
case NVPTX::SUST_B_2D_V4B32_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
|
|
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
|
|
case NVPTX::SUST_B_3D_B8_TRAP:
|
|
case NVPTX::SUST_B_3D_B16_TRAP:
|
|
case NVPTX::SUST_B_3D_B32_TRAP:
|
|
case NVPTX::SUST_B_3D_V2B8_TRAP:
|
|
case NVPTX::SUST_B_3D_V2B16_TRAP:
|
|
case NVPTX::SUST_B_3D_V2B32_TRAP:
|
|
case NVPTX::SUST_B_3D_V4B8_TRAP:
|
|
case NVPTX::SUST_B_3D_V4B16_TRAP:
|
|
case NVPTX::SUST_B_3D_V4B32_TRAP:
|
|
case NVPTX::SUST_P_1D_B8_TRAP:
|
|
case NVPTX::SUST_P_1D_B16_TRAP:
|
|
case NVPTX::SUST_P_1D_B32_TRAP:
|
|
case NVPTX::SUST_P_1D_V2B8_TRAP:
|
|
case NVPTX::SUST_P_1D_V2B16_TRAP:
|
|
case NVPTX::SUST_P_1D_V2B32_TRAP:
|
|
case NVPTX::SUST_P_1D_V4B8_TRAP:
|
|
case NVPTX::SUST_P_1D_V4B16_TRAP:
|
|
case NVPTX::SUST_P_1D_V4B32_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
|
|
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
|
|
case NVPTX::SUST_P_2D_B8_TRAP:
|
|
case NVPTX::SUST_P_2D_B16_TRAP:
|
|
case NVPTX::SUST_P_2D_B32_TRAP:
|
|
case NVPTX::SUST_P_2D_V2B8_TRAP:
|
|
case NVPTX::SUST_P_2D_V2B16_TRAP:
|
|
case NVPTX::SUST_P_2D_V2B32_TRAP:
|
|
case NVPTX::SUST_P_2D_V4B8_TRAP:
|
|
case NVPTX::SUST_P_2D_V4B16_TRAP:
|
|
case NVPTX::SUST_P_2D_V4B32_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
|
|
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
|
|
case NVPTX::SUST_P_3D_B8_TRAP:
|
|
case NVPTX::SUST_P_3D_B16_TRAP:
|
|
case NVPTX::SUST_P_3D_B32_TRAP:
|
|
case NVPTX::SUST_P_3D_V2B8_TRAP:
|
|
case NVPTX::SUST_P_3D_V2B16_TRAP:
|
|
case NVPTX::SUST_P_3D_V2B32_TRAP:
|
|
case NVPTX::SUST_P_3D_V4B8_TRAP:
|
|
case NVPTX::SUST_P_3D_V4B16_TRAP:
|
|
case NVPTX::SUST_P_3D_V4B32_TRAP: {
|
|
// This is a surface store, so operand 0 is a surfref
|
|
MachineOperand &SurfHandle = MI.getOperand(0);
|
|
|
|
replaceImageHandle(SurfHandle, MF);
|
|
|
|
return true;
|
|
}
|
|
case NVPTX::TXQ_CHANNEL_ORDER:
|
|
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
|
|
case NVPTX::TXQ_WIDTH:
|
|
case NVPTX::TXQ_HEIGHT:
|
|
case NVPTX::TXQ_DEPTH:
|
|
case NVPTX::TXQ_ARRAY_SIZE:
|
|
case NVPTX::TXQ_NUM_SAMPLES:
|
|
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
|
|
case NVPTX::SUQ_CHANNEL_ORDER:
|
|
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
|
|
case NVPTX::SUQ_WIDTH:
|
|
case NVPTX::SUQ_HEIGHT:
|
|
case NVPTX::SUQ_DEPTH:
|
|
case NVPTX::SUQ_ARRAY_SIZE: {
|
|
// This is a query, so operand 1 is a surfref/texref
|
|
MachineOperand &Handle = MI.getOperand(1);
|
|
|
|
replaceImageHandle(Handle, MF);
|
|
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
void NVPTXReplaceImageHandles::
|
|
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
|
|
// Which instruction defines the handle?
|
|
MachineInstr *MI = MRI.getVRegDef(Op.getReg());
|
|
assert(MI && "No def for image handle vreg?");
|
|
MachineInstr &TexHandleDef = *MI;
|
|
|
|
switch (TexHandleDef.getOpcode()) {
|
|
case NVPTX::LD_i64_avar: {
|
|
// The handle is a parameter value being loaded, replace with the
|
|
// parameter symbol
|
|
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
|
|
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
|
|
std::string ParamBaseName = MF.getName();
|
|
ParamBaseName += "_param_";
|
|
assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
|
|
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
|
|
std::string NewSym;
|
|
raw_string_ostream NewSymStr(NewSym);
|
|
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
|
|
Op.ChangeToImmediate(
|
|
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
|
|
InstrsToRemove.insert(&TexHandleDef);
|
|
break;
|
|
}
|
|
case NVPTX::texsurf_handles: {
|
|
// The handle is a global variable, replace with the global variable name
|
|
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
|
|
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
|
|
assert(GV->hasName() && "Global sampler must be named!");
|
|
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
|
|
InstrsToRemove.insert(&TexHandleDef);
|
|
break;
|
|
}
|
|
default:
|
|
llvm_unreachable("Unknown instruction operating on handle");
|
|
}
|
|
}
|
|
|
|
MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
|
|
return new NVPTXReplaceImageHandles();
|
|
}
|