[NVPTX] Add more surface/texture intrinsics, including CUDA unified texture fetch

This also uses TSFlags to mark machine instructions that are surface/texture
accesses, as well as the vector width for surface operations.  This is used
to simplify some of the switch statements that need to detect surface/texture
instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213256 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Justin Holewinski
2014-07-17 11:59:04 +00:00
parent 58589cefee
commit d6663f565c
14 changed files with 8449 additions and 878 deletions

File diff suppressed because it is too large Load Diff

View File

@ -84,6 +84,17 @@ __attribute__((unused))
#endif #endif
static const char *NamedMDForAnnotations = "nvvm.annotations"; static const char *NamedMDForAnnotations = "nvvm.annotations";
namespace NVPTXII {
enum {
// These must be kept in sync with TSFlags in NVPTXInstrFormats.td
IsTexFlag = 0x80,
IsSuldMask = 0x300,
IsSuldShift = 8,
IsSustFlag = 0x400,
IsSurfTexQueryFlag = 0x800,
IsTexModeUnifiedFlag = 0x1000
};
}
} }
#endif #endif

View File

@ -330,253 +330,51 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
unsigned OpNo, MCOperand &MCOp) { unsigned OpNo, MCOperand &MCOp) {
const MachineOperand &MO = MI->getOperand(OpNo); const MachineOperand &MO = MI->getOperand(OpNo);
const MCInstrDesc &MCID = MI->getDesc();
switch (MI->getOpcode()) { if (MCID.TSFlags & NVPTXII::IsTexFlag) {
default: return false;
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD:
{
// This is a texture fetch, so operand 4 is a texref and operand 5 is // This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref // a samplerref
if (OpNo == 4) { if (OpNo == 4 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp); lowerImageHandleSymbol(MO.getImm(), MCOp);
return true; return true;
} }
if (OpNo == 5) { if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
lowerImageHandleSymbol(MO.getImm(), MCOp); lowerImageHandleSymbol(MO.getImm(), MCOp);
return true; return true;
} }
return false; return false;
} } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
case NVPTX::SULD_1D_I8_TRAP: unsigned VecSize =
case NVPTX::SULD_1D_I16_TRAP: 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP: // For a surface load of vector size N, the Nth operand will be the surfref
case NVPTX::SULD_1D_ARRAY_I16_TRAP: if (OpNo == VecSize && MO.isImm()) {
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
if (OpNo == 1) {
lowerImageHandleSymbol(MO.getImm(), MCOp); lowerImageHandleSymbol(MO.getImm(), MCOp);
return true; return true;
} }
return false; return false;
} } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
if (OpNo == 2) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
if (OpNo == 4) {
lowerImageHandleSymbol(MO.getImm(), MCOp);
return true;
}
return false;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
// This is a surface store, so operand 0 is a surfref // This is a surface store, so operand 0 is a surfref
if (OpNo == 0) { if (OpNo == 0 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp); lowerImageHandleSymbol(MO.getImm(), MCOp);
return true; return true;
} }
return false; return false;
} } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
// This is a query, so operand 1 is a surfref/texref // This is a query, so operand 1 is a surfref/texref
if (OpNo == 1) { if (OpNo == 1 && MO.isImm()) {
lowerImageHandleSymbol(MO.getImm(), MCOp); lowerImageHandleSymbol(MO.getImm(), MCOp);
return true; return true;
} }
return false; return false;
} }
}
return false;
} }
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -77,54 +77,244 @@ enum NodeType {
StoreRetvalV4, StoreRetvalV4,
// Texture intrinsics // Texture intrinsics
Tex1DFloatI32, Tex1DFloatS32,
Tex1DFloatFloat, Tex1DFloatFloat,
Tex1DFloatFloatLevel, Tex1DFloatFloatLevel,
Tex1DFloatFloatGrad, Tex1DFloatFloatGrad,
Tex1DI32I32, Tex1DS32S32,
Tex1DI32Float, Tex1DS32Float,
Tex1DI32FloatLevel, Tex1DS32FloatLevel,
Tex1DI32FloatGrad, Tex1DS32FloatGrad,
Tex1DArrayFloatI32, Tex1DU32S32,
Tex1DU32Float,
Tex1DU32FloatLevel,
Tex1DU32FloatGrad,
Tex1DArrayFloatS32,
Tex1DArrayFloatFloat, Tex1DArrayFloatFloat,
Tex1DArrayFloatFloatLevel, Tex1DArrayFloatFloatLevel,
Tex1DArrayFloatFloatGrad, Tex1DArrayFloatFloatGrad,
Tex1DArrayI32I32, Tex1DArrayS32S32,
Tex1DArrayI32Float, Tex1DArrayS32Float,
Tex1DArrayI32FloatLevel, Tex1DArrayS32FloatLevel,
Tex1DArrayI32FloatGrad, Tex1DArrayS32FloatGrad,
Tex2DFloatI32, Tex1DArrayU32S32,
Tex1DArrayU32Float,
Tex1DArrayU32FloatLevel,
Tex1DArrayU32FloatGrad,
Tex2DFloatS32,
Tex2DFloatFloat, Tex2DFloatFloat,
Tex2DFloatFloatLevel, Tex2DFloatFloatLevel,
Tex2DFloatFloatGrad, Tex2DFloatFloatGrad,
Tex2DI32I32, Tex2DS32S32,
Tex2DI32Float, Tex2DS32Float,
Tex2DI32FloatLevel, Tex2DS32FloatLevel,
Tex2DI32FloatGrad, Tex2DS32FloatGrad,
Tex2DArrayFloatI32, Tex2DU32S32,
Tex2DU32Float,
Tex2DU32FloatLevel,
Tex2DU32FloatGrad,
Tex2DArrayFloatS32,
Tex2DArrayFloatFloat, Tex2DArrayFloatFloat,
Tex2DArrayFloatFloatLevel, Tex2DArrayFloatFloatLevel,
Tex2DArrayFloatFloatGrad, Tex2DArrayFloatFloatGrad,
Tex2DArrayI32I32, Tex2DArrayS32S32,
Tex2DArrayI32Float, Tex2DArrayS32Float,
Tex2DArrayI32FloatLevel, Tex2DArrayS32FloatLevel,
Tex2DArrayI32FloatGrad, Tex2DArrayS32FloatGrad,
Tex3DFloatI32, Tex2DArrayU32S32,
Tex2DArrayU32Float,
Tex2DArrayU32FloatLevel,
Tex2DArrayU32FloatGrad,
Tex3DFloatS32,
Tex3DFloatFloat, Tex3DFloatFloat,
Tex3DFloatFloatLevel, Tex3DFloatFloatLevel,
Tex3DFloatFloatGrad, Tex3DFloatFloatGrad,
Tex3DI32I32, Tex3DS32S32,
Tex3DI32Float, Tex3DS32Float,
Tex3DI32FloatLevel, Tex3DS32FloatLevel,
Tex3DI32FloatGrad, Tex3DS32FloatGrad,
Tex3DU32S32,
Tex3DU32Float,
Tex3DU32FloatLevel,
Tex3DU32FloatGrad,
TexCubeFloatFloat,
TexCubeFloatFloatLevel,
TexCubeS32Float,
TexCubeS32FloatLevel,
TexCubeU32Float,
TexCubeU32FloatLevel,
TexCubeArrayFloatFloat,
TexCubeArrayFloatFloatLevel,
TexCubeArrayS32Float,
TexCubeArrayS32FloatLevel,
TexCubeArrayU32Float,
TexCubeArrayU32FloatLevel,
Tld4R2DFloatFloat,
Tld4G2DFloatFloat,
Tld4B2DFloatFloat,
Tld4A2DFloatFloat,
Tld4R2DS64Float,
Tld4G2DS64Float,
Tld4B2DS64Float,
Tld4A2DS64Float,
Tld4R2DU64Float,
Tld4G2DU64Float,
Tld4B2DU64Float,
Tld4A2DU64Float,
TexUnified1DFloatS32,
TexUnified1DFloatFloat,
TexUnified1DFloatFloatLevel,
TexUnified1DFloatFloatGrad,
TexUnified1DS32S32,
TexUnified1DS32Float,
TexUnified1DS32FloatLevel,
TexUnified1DS32FloatGrad,
TexUnified1DU32S32,
TexUnified1DU32Float,
TexUnified1DU32FloatLevel,
TexUnified1DU32FloatGrad,
TexUnified1DArrayFloatS32,
TexUnified1DArrayFloatFloat,
TexUnified1DArrayFloatFloatLevel,
TexUnified1DArrayFloatFloatGrad,
TexUnified1DArrayS32S32,
TexUnified1DArrayS32Float,
TexUnified1DArrayS32FloatLevel,
TexUnified1DArrayS32FloatGrad,
TexUnified1DArrayU32S32,
TexUnified1DArrayU32Float,
TexUnified1DArrayU32FloatLevel,
TexUnified1DArrayU32FloatGrad,
TexUnified2DFloatS32,
TexUnified2DFloatFloat,
TexUnified2DFloatFloatLevel,
TexUnified2DFloatFloatGrad,
TexUnified2DS32S32,
TexUnified2DS32Float,
TexUnified2DS32FloatLevel,
TexUnified2DS32FloatGrad,
TexUnified2DU32S32,
TexUnified2DU32Float,
TexUnified2DU32FloatLevel,
TexUnified2DU32FloatGrad,
TexUnified2DArrayFloatS32,
TexUnified2DArrayFloatFloat,
TexUnified2DArrayFloatFloatLevel,
TexUnified2DArrayFloatFloatGrad,
TexUnified2DArrayS32S32,
TexUnified2DArrayS32Float,
TexUnified2DArrayS32FloatLevel,
TexUnified2DArrayS32FloatGrad,
TexUnified2DArrayU32S32,
TexUnified2DArrayU32Float,
TexUnified2DArrayU32FloatLevel,
TexUnified2DArrayU32FloatGrad,
TexUnified3DFloatS32,
TexUnified3DFloatFloat,
TexUnified3DFloatFloatLevel,
TexUnified3DFloatFloatGrad,
TexUnified3DS32S32,
TexUnified3DS32Float,
TexUnified3DS32FloatLevel,
TexUnified3DS32FloatGrad,
TexUnified3DU32S32,
TexUnified3DU32Float,
TexUnified3DU32FloatLevel,
TexUnified3DU32FloatGrad,
TexUnifiedCubeFloatFloat,
TexUnifiedCubeFloatFloatLevel,
TexUnifiedCubeS32Float,
TexUnifiedCubeS32FloatLevel,
TexUnifiedCubeU32Float,
TexUnifiedCubeU32FloatLevel,
TexUnifiedCubeArrayFloatFloat,
TexUnifiedCubeArrayFloatFloatLevel,
TexUnifiedCubeArrayS32Float,
TexUnifiedCubeArrayS32FloatLevel,
TexUnifiedCubeArrayU32Float,
TexUnifiedCubeArrayU32FloatLevel,
Tld4UnifiedR2DFloatFloat,
Tld4UnifiedG2DFloatFloat,
Tld4UnifiedB2DFloatFloat,
Tld4UnifiedA2DFloatFloat,
Tld4UnifiedR2DS64Float,
Tld4UnifiedG2DS64Float,
Tld4UnifiedB2DS64Float,
Tld4UnifiedA2DS64Float,
Tld4UnifiedR2DU64Float,
Tld4UnifiedG2DU64Float,
Tld4UnifiedB2DU64Float,
Tld4UnifiedA2DU64Float,
// Surface intrinsics // Surface intrinsics
Suld1DI8Clamp,
Suld1DI16Clamp,
Suld1DI32Clamp,
Suld1DI64Clamp,
Suld1DV2I8Clamp,
Suld1DV2I16Clamp,
Suld1DV2I32Clamp,
Suld1DV2I64Clamp,
Suld1DV4I8Clamp,
Suld1DV4I16Clamp,
Suld1DV4I32Clamp,
Suld1DArrayI8Clamp,
Suld1DArrayI16Clamp,
Suld1DArrayI32Clamp,
Suld1DArrayI64Clamp,
Suld1DArrayV2I8Clamp,
Suld1DArrayV2I16Clamp,
Suld1DArrayV2I32Clamp,
Suld1DArrayV2I64Clamp,
Suld1DArrayV4I8Clamp,
Suld1DArrayV4I16Clamp,
Suld1DArrayV4I32Clamp,
Suld2DI8Clamp,
Suld2DI16Clamp,
Suld2DI32Clamp,
Suld2DI64Clamp,
Suld2DV2I8Clamp,
Suld2DV2I16Clamp,
Suld2DV2I32Clamp,
Suld2DV2I64Clamp,
Suld2DV4I8Clamp,
Suld2DV4I16Clamp,
Suld2DV4I32Clamp,
Suld2DArrayI8Clamp,
Suld2DArrayI16Clamp,
Suld2DArrayI32Clamp,
Suld2DArrayI64Clamp,
Suld2DArrayV2I8Clamp,
Suld2DArrayV2I16Clamp,
Suld2DArrayV2I32Clamp,
Suld2DArrayV2I64Clamp,
Suld2DArrayV4I8Clamp,
Suld2DArrayV4I16Clamp,
Suld2DArrayV4I32Clamp,
Suld3DI8Clamp,
Suld3DI16Clamp,
Suld3DI32Clamp,
Suld3DI64Clamp,
Suld3DV2I8Clamp,
Suld3DV2I16Clamp,
Suld3DV2I32Clamp,
Suld3DV2I64Clamp,
Suld3DV4I8Clamp,
Suld3DV4I16Clamp,
Suld3DV4I32Clamp,
Suld1DI8Trap, Suld1DI8Trap,
Suld1DI16Trap, Suld1DI16Trap,
Suld1DI32Trap, Suld1DI32Trap,
Suld1DI64Trap,
Suld1DV2I8Trap, Suld1DV2I8Trap,
Suld1DV2I16Trap, Suld1DV2I16Trap,
Suld1DV2I32Trap, Suld1DV2I32Trap,
Suld1DV2I64Trap,
Suld1DV4I8Trap, Suld1DV4I8Trap,
Suld1DV4I16Trap, Suld1DV4I16Trap,
Suld1DV4I32Trap, Suld1DV4I32Trap,
@ -132,9 +322,11 @@ enum NodeType {
Suld1DArrayI8Trap, Suld1DArrayI8Trap,
Suld1DArrayI16Trap, Suld1DArrayI16Trap,
Suld1DArrayI32Trap, Suld1DArrayI32Trap,
Suld1DArrayI64Trap,
Suld1DArrayV2I8Trap, Suld1DArrayV2I8Trap,
Suld1DArrayV2I16Trap, Suld1DArrayV2I16Trap,
Suld1DArrayV2I32Trap, Suld1DArrayV2I32Trap,
Suld1DArrayV2I64Trap,
Suld1DArrayV4I8Trap, Suld1DArrayV4I8Trap,
Suld1DArrayV4I16Trap, Suld1DArrayV4I16Trap,
Suld1DArrayV4I32Trap, Suld1DArrayV4I32Trap,
@ -142,9 +334,11 @@ enum NodeType {
Suld2DI8Trap, Suld2DI8Trap,
Suld2DI16Trap, Suld2DI16Trap,
Suld2DI32Trap, Suld2DI32Trap,
Suld2DI64Trap,
Suld2DV2I8Trap, Suld2DV2I8Trap,
Suld2DV2I16Trap, Suld2DV2I16Trap,
Suld2DV2I32Trap, Suld2DV2I32Trap,
Suld2DV2I64Trap,
Suld2DV4I8Trap, Suld2DV4I8Trap,
Suld2DV4I16Trap, Suld2DV4I16Trap,
Suld2DV4I32Trap, Suld2DV4I32Trap,
@ -152,9 +346,11 @@ enum NodeType {
Suld2DArrayI8Trap, Suld2DArrayI8Trap,
Suld2DArrayI16Trap, Suld2DArrayI16Trap,
Suld2DArrayI32Trap, Suld2DArrayI32Trap,
Suld2DArrayI64Trap,
Suld2DArrayV2I8Trap, Suld2DArrayV2I8Trap,
Suld2DArrayV2I16Trap, Suld2DArrayV2I16Trap,
Suld2DArrayV2I32Trap, Suld2DArrayV2I32Trap,
Suld2DArrayV2I64Trap,
Suld2DArrayV4I8Trap, Suld2DArrayV4I8Trap,
Suld2DArrayV4I16Trap, Suld2DArrayV4I16Trap,
Suld2DArrayV4I32Trap, Suld2DArrayV4I32Trap,
@ -162,12 +358,74 @@ enum NodeType {
Suld3DI8Trap, Suld3DI8Trap,
Suld3DI16Trap, Suld3DI16Trap,
Suld3DI32Trap, Suld3DI32Trap,
Suld3DI64Trap,
Suld3DV2I8Trap, Suld3DV2I8Trap,
Suld3DV2I16Trap, Suld3DV2I16Trap,
Suld3DV2I32Trap, Suld3DV2I32Trap,
Suld3DV2I64Trap,
Suld3DV4I8Trap, Suld3DV4I8Trap,
Suld3DV4I16Trap, Suld3DV4I16Trap,
Suld3DV4I32Trap Suld3DV4I32Trap,
Suld1DI8Zero,
Suld1DI16Zero,
Suld1DI32Zero,
Suld1DI64Zero,
Suld1DV2I8Zero,
Suld1DV2I16Zero,
Suld1DV2I32Zero,
Suld1DV2I64Zero,
Suld1DV4I8Zero,
Suld1DV4I16Zero,
Suld1DV4I32Zero,
Suld1DArrayI8Zero,
Suld1DArrayI16Zero,
Suld1DArrayI32Zero,
Suld1DArrayI64Zero,
Suld1DArrayV2I8Zero,
Suld1DArrayV2I16Zero,
Suld1DArrayV2I32Zero,
Suld1DArrayV2I64Zero,
Suld1DArrayV4I8Zero,
Suld1DArrayV4I16Zero,
Suld1DArrayV4I32Zero,
Suld2DI8Zero,
Suld2DI16Zero,
Suld2DI32Zero,
Suld2DI64Zero,
Suld2DV2I8Zero,
Suld2DV2I16Zero,
Suld2DV2I32Zero,
Suld2DV2I64Zero,
Suld2DV4I8Zero,
Suld2DV4I16Zero,
Suld2DV4I32Zero,
Suld2DArrayI8Zero,
Suld2DArrayI16Zero,
Suld2DArrayI32Zero,
Suld2DArrayI64Zero,
Suld2DArrayV2I8Zero,
Suld2DArrayV2I16Zero,
Suld2DArrayV2I32Zero,
Suld2DArrayV2I64Zero,
Suld2DArrayV4I8Zero,
Suld2DArrayV4I16Zero,
Suld2DArrayV4I32Zero,
Suld3DI8Zero,
Suld3DI16Zero,
Suld3DI32Zero,
Suld3DI64Zero,
Suld3DV2I8Zero,
Suld3DV2I16Zero,
Suld3DV2I32Zero,
Suld3DV2I64Zero,
Suld3DV4I8Zero,
Suld3DV4I16Zero,
Suld3DV4I32Zero
}; };
} }

View File

@ -36,8 +36,24 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
bit IsLoad = 0; bit IsLoad = 0;
bit IsStore = 0; bit IsStore = 0;
let TSFlags{3-0} = VecInstType; bit IsTex = 0;
let TSFlags{4-4} = IsSimpleMove; bit IsSust = 0;
let TSFlags{5-5} = IsLoad; bit IsSurfTexQuery = 0;
let TSFlags{6-6} = IsStore; bit IsTexModeUnified = 0;
// The following field is encoded as log2 of the vector size minus one,
// with 0 meaning the operation is not a surface instruction. For example,
// if IsSuld == 2, then the instruction is a suld instruction with vector size
// 2**(2-1) = 2.
bits<2> IsSuld = 0;
let TSFlags{3-0} = VecInstType;
let TSFlags{4-4} = IsSimpleMove;
let TSFlags{5-5} = IsLoad;
let TSFlags{6-6} = IsStore;
let TSFlags{7} = IsTex;
let TSFlags{9-8} = IsSuld;
let TSFlags{10} = IsSust;
let TSFlags{11} = IsSurfTexQuery;
let TSFlags{12} = IsTexModeUnified;
} }

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,7 @@
#include "NVPTX.h" #include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h" #include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
@ -32,10 +33,16 @@ private:
public: public:
NVPTXReplaceImageHandles(); NVPTXReplaceImageHandles();
bool runOnMachineFunction(MachineFunction &MF) override; bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "NVPTX Replace Image Handles";
}
private: private:
bool processInstr(MachineInstr &MI); bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
}; };
} }
@ -65,242 +72,43 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
E = InstrsToRemove.end(); I != E; ++I) { E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent(); (*I)->eraseFromParent();
} }
return Changed; return Changed;
} }
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent(); MachineFunction &MF = *MI.getParent()->getParent();
// Check if we have a surface/texture instruction const MCInstrDesc &MCID = MI.getDesc();
switch (MI.getOpcode()) {
default: return false; if (MCID.TSFlags & NVPTXII::IsTexFlag) {
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD: {
// This is a texture fetch, so operand 4 is a texref and operand 5 is // This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref // a samplerref
MachineOperand &TexHandle = MI.getOperand(4); MachineOperand &TexHandle = MI.getOperand(4);
MachineOperand &SampHandle = MI.getOperand(5);
replaceImageHandle(TexHandle, MF); replaceImageHandle(TexHandle, MF);
replaceImageHandle(SampHandle, MF);
if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
MachineOperand &SampHandle = MI.getOperand(5);
replaceImageHandle(SampHandle, MF);
}
return true; return true;
} } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
case NVPTX::SULD_1D_I8_TRAP: unsigned VecSize =
case NVPTX::SULD_1D_I16_TRAP: 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP: // For a surface load of vector size N, the Nth operand will be the surfref
case NVPTX::SULD_1D_ARRAY_I16_TRAP: MachineOperand &SurfHandle = MI.getOperand(VecSize);
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
MachineOperand &SurfHandle = MI.getOperand(1);
replaceImageHandle(SurfHandle, MF); replaceImageHandle(SurfHandle, MF);
return true; return true;
} } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
MachineOperand &SurfHandle = MI.getOperand(2);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
MachineOperand &SurfHandle = MI.getOperand(4);
replaceImageHandle(SurfHandle, MF);
return true;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
// This is a surface store, so operand 0 is a surfref // This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0); MachineOperand &SurfHandle = MI.getOperand(0);
replaceImageHandle(SurfHandle, MF); replaceImageHandle(SurfHandle, MF);
return true; return true;
} } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
// This is a query, so operand 1 is a surfref/texref // This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1); MachineOperand &Handle = MI.getOperand(1);
@ -308,22 +116,38 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
return true; return true;
} }
}
return false;
} }
void NVPTXReplaceImageHandles:: void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
unsigned Idx;
if (findIndexForHandle(Op, MF, Idx)) {
Op.ChangeToImmediate(Idx);
}
}
bool NVPTXReplaceImageHandles::
findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>(); NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
assert(Op.isReg() && "Handle is not in a reg?");
// Which instruction defines the handle? // Which instruction defines the handle?
MachineInstr *MI = MRI.getVRegDef(Op.getReg()); MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
assert(MI && "No def for image handle vreg?");
MachineInstr &TexHandleDef = *MI;
switch (TexHandleDef.getOpcode()) { switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: { case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the // The handle is a parameter value being loaded, replace with the
// parameter symbol // parameter symbol
const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
if (ST.getDrvInterface() == NVPTX::CUDA) {
// For CUDA, we preserve the param loads coming from function arguments
return false;
}
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName(); std::string ParamBaseName = MF.getName();
@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
std::string NewSym; std::string NewSym;
raw_string_ostream NewSymStr(NewSym); raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param; NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
Op.ChangeToImmediate(
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
InstrsToRemove.insert(&TexHandleDef); InstrsToRemove.insert(&TexHandleDef);
break; Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
return true;
} }
case NVPTX::texsurf_handles: { case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name // The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!"); assert(GV->hasName() && "Global sampler must be named!");
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef); InstrsToRemove.insert(&TexHandleDef);
break; Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
return true;
}
case NVPTX::nvvm_move_i64:
case TargetOpcode::COPY: {
bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
if (Res) {
InstrsToRemove.insert(&TexHandleDef);
}
return Res;
} }
default: default:
llvm_unreachable("Unknown instruction operating on handle"); llvm_unreachable("Unknown instruction operating on handle");

View File

@ -91,7 +91,12 @@ public:
inline bool hasROT64() const { return SmVersion >= 20; } inline bool hasROT64() const { return SmVersion >= 20; }
bool hasImageHandles() const { bool hasImageHandles() const {
// Currently disabled // Enable handles for Kepler+, where CUDA supports indirect surfaces and
// textures
if (getDrvInterface() == NVPTX::CUDA)
return (SmVersion >= 30);
// Disabled, otherwise
return false; return false;
} }
bool is64Bit() const { return Is64Bit; } bool is64Bit() const { return Is64Bit; }

View File

@ -0,0 +1,53 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
target triple = "nvptx-unknown-cuda"
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}
@surf0 = internal addrspace(1) global i64 0, align 8
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(float* %red, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}
!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
target triple = "nvptx-unknown-cuda"
declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, i32 %val, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
ret void
}
@surf0 = internal addrspace(1) global i64 0, align 8
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(i32 %val, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
ret void
}
!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}

View File

@ -0,0 +1,46 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
target triple = "nvptx-unknown-cuda"
declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}
@tex0 = internal addrspace(1) global i64 0, align 8
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(float* %red, i32 %idx) {
; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}
!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}

View File

@ -2,12 +2,12 @@
target triple = "nvptx-unknown-nvcl" target triple = "nvptx-unknown-nvcl"
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32) declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)
; CHECK: .entry foo ; CHECK: .entry foo
define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) { define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}] ; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx) %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0 %ret = extractvalue { float, float, float, float } %val, 0
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]] ; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red store float %ret, float* %red