mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-24 02:38:42 +00:00
R600: Improve texture handling
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4109bd8829
commit
d3293b49f9
@ -21,6 +21,7 @@ class FunctionPass;
|
|||||||
class AMDGPUTargetMachine;
|
class AMDGPUTargetMachine;
|
||||||
|
|
||||||
// R600 Passes
|
// R600 Passes
|
||||||
|
FunctionPass* createR600TextureIntrinsicsReplacer();
|
||||||
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
|
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
|
||||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||||
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
|
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
|
||||||
|
@ -126,6 +126,7 @@ enum {
|
|||||||
SMIN,
|
SMIN,
|
||||||
UMIN,
|
UMIN,
|
||||||
URECIP,
|
URECIP,
|
||||||
|
TEXTURE_FETCH,
|
||||||
EXPORT,
|
EXPORT,
|
||||||
CONST_ADDRESS,
|
CONST_ADDRESS,
|
||||||
REGISTER_LOAD,
|
REGISTER_LOAD,
|
||||||
|
@ -111,6 +111,8 @@ AMDGPUPassConfig::addPreISel() {
|
|||||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||||
addPass(createAMDGPUStructurizeCFGPass());
|
addPass(createAMDGPUStructurizeCFGPass());
|
||||||
addPass(createSIAnnotateControlFlowPass());
|
addPass(createSIAnnotateControlFlowPass());
|
||||||
|
} else {
|
||||||
|
addPass(createR600TextureIntrinsicsReplacer());
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,7 @@ add_llvm_target(R600CodeGen
|
|||||||
R600MachineScheduler.cpp
|
R600MachineScheduler.cpp
|
||||||
R600Packetizer.cpp
|
R600Packetizer.cpp
|
||||||
R600RegisterInfo.cpp
|
R600RegisterInfo.cpp
|
||||||
|
R600TextureIntrinsicsReplacer.cpp
|
||||||
SIAnnotateControlFlow.cpp
|
SIAnnotateControlFlow.cpp
|
||||||
SIInsertWaits.cpp
|
SIInsertWaits.cpp
|
||||||
SIInstrInfo.cpp
|
SIInstrInfo.cpp
|
||||||
|
@ -198,6 +198,51 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
|
||||||
|
raw_ostream &O) {
|
||||||
|
unsigned Sel = MI->getOperand(OpNo).getImm();
|
||||||
|
switch (Sel) {
|
||||||
|
case 0:
|
||||||
|
O << "X";
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
O << "Y";
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
O << "Z";
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
O << "W";
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
O << "0";
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
O << "1";
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
O << "_";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
|
||||||
|
raw_ostream &O) {
|
||||||
|
unsigned CT = MI->getOperand(OpNo).getImm();
|
||||||
|
switch (CT) {
|
||||||
|
case 0:
|
||||||
|
O << "U";
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
O << "N";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
|
void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
int KCacheMode = MI->getOperand(OpNo).getImm();
|
int KCacheMode = MI->getOperand(OpNo).getImm();
|
||||||
|
@ -49,6 +49,8 @@ private:
|
|||||||
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
|
void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
|
void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -81,21 +81,6 @@ enum FCInstr {
|
|||||||
FC_CONTINUE
|
FC_CONTINUE
|
||||||
};
|
};
|
||||||
|
|
||||||
enum TextureTypes {
|
|
||||||
TEXTURE_1D = 1,
|
|
||||||
TEXTURE_2D,
|
|
||||||
TEXTURE_3D,
|
|
||||||
TEXTURE_CUBE,
|
|
||||||
TEXTURE_RECT,
|
|
||||||
TEXTURE_SHADOW1D,
|
|
||||||
TEXTURE_SHADOW2D,
|
|
||||||
TEXTURE_SHADOWRECT,
|
|
||||||
TEXTURE_1D_ARRAY,
|
|
||||||
TEXTURE_2D_ARRAY,
|
|
||||||
TEXTURE_SHADOW1D_ARRAY,
|
|
||||||
TEXTURE_SHADOW2D_ARRAY
|
|
||||||
};
|
|
||||||
|
|
||||||
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
||||||
const MCRegisterInfo &MRI,
|
const MCRegisterInfo &MRI,
|
||||||
const MCSubtargetInfo &STI) {
|
const MCSubtargetInfo &STI) {
|
||||||
@ -120,55 +105,21 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||||||
Emit(InstWord2, OS);
|
Emit(InstWord2, OS);
|
||||||
Emit((u_int32_t) 0, OS);
|
Emit((u_int32_t) 0, OS);
|
||||||
} else if (IS_TEX(Desc)) {
|
} else if (IS_TEX(Desc)) {
|
||||||
unsigned Opcode = MI.getOpcode();
|
int64_t Sampler = MI.getOperand(14).getImm();
|
||||||
bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
|
|
||||||
unsigned OpOffset = HasOffsets ? 3 : 0;
|
|
||||||
int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
|
|
||||||
int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
|
|
||||||
|
|
||||||
uint32_t SrcSelect[4] = {0, 1, 2, 3};
|
uint32_t SrcSelect[4] = {
|
||||||
uint32_t Offsets[3] = {0, 0, 0};
|
MI.getOperand(2).getImm(),
|
||||||
uint64_t CoordType[4] = {1, 1, 1, 1};
|
MI.getOperand(3).getImm(),
|
||||||
|
MI.getOperand(4).getImm(),
|
||||||
|
MI.getOperand(5).getImm()
|
||||||
|
};
|
||||||
|
uint32_t Offsets[3] = {
|
||||||
|
MI.getOperand(6).getImm() & 0x1F,
|
||||||
|
MI.getOperand(7).getImm() & 0x1F,
|
||||||
|
MI.getOperand(8).getImm() & 0x1F
|
||||||
|
};
|
||||||
|
|
||||||
if (HasOffsets)
|
uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups);
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
|
||||||
int SignedOffset = MI.getOperand(i + 2).getImm();
|
|
||||||
Offsets[i] = (SignedOffset & 0x1F);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TextureType == TEXTURE_RECT ||
|
|
||||||
TextureType == TEXTURE_SHADOWRECT) {
|
|
||||||
CoordType[ELEMENT_X] = 0;
|
|
||||||
CoordType[ELEMENT_Y] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TextureType == TEXTURE_1D_ARRAY ||
|
|
||||||
TextureType == TEXTURE_SHADOW1D_ARRAY) {
|
|
||||||
if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
|
|
||||||
Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
|
|
||||||
CoordType[ELEMENT_Y] = 0;
|
|
||||||
} else {
|
|
||||||
CoordType[ELEMENT_Z] = 0;
|
|
||||||
SrcSelect[ELEMENT_Z] = ELEMENT_Y;
|
|
||||||
}
|
|
||||||
} else if (TextureType == TEXTURE_2D_ARRAY ||
|
|
||||||
TextureType == TEXTURE_SHADOW2D_ARRAY) {
|
|
||||||
CoordType[ELEMENT_Z] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if ((TextureType == TEXTURE_SHADOW1D ||
|
|
||||||
TextureType == TEXTURE_SHADOW2D ||
|
|
||||||
TextureType == TEXTURE_SHADOWRECT ||
|
|
||||||
TextureType == TEXTURE_SHADOW1D_ARRAY) &&
|
|
||||||
Opcode != AMDGPU::TEX_SAMPLE_C_L &&
|
|
||||||
Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
|
|
||||||
SrcSelect[ELEMENT_W] = ELEMENT_Z;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
|
|
||||||
CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
|
|
||||||
CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
|
|
||||||
uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
|
uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
|
||||||
SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
|
SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
|
||||||
SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
|
SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
|
||||||
|
@ -188,23 +188,99 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|||||||
case AMDGPU::TXD: {
|
case AMDGPU::TXD: {
|
||||||
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
MachineOperand &RID = MI->getOperand(4);
|
||||||
|
MachineOperand &SID = MI->getOperand(5);
|
||||||
|
unsigned TextureId = MI->getOperand(6).getImm();
|
||||||
|
unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
|
||||||
|
unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
|
||||||
|
|
||||||
|
switch (TextureId) {
|
||||||
|
case 5: // Rect
|
||||||
|
CTX = CTY = 0;
|
||||||
|
break;
|
||||||
|
case 6: // Shadow1D
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 7: // Shadow2D
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 8: // ShadowRect
|
||||||
|
CTX = CTY = 0;
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 9: // 1DArray
|
||||||
|
SrcZ = SrcY;
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 10: // 2DArray
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 11: // Shadow1DArray
|
||||||
|
SrcZ = SrcY;
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 12: // Shadow2DArray
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
|
||||||
.addOperand(MI->getOperand(3))
|
.addOperand(MI->getOperand(3))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6));
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW);
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
|
||||||
.addOperand(MI->getOperand(2))
|
.addOperand(MI->getOperand(2))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6));
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW);
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6))
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW)
|
||||||
.addReg(T0, RegState::Implicit)
|
.addReg(T0, RegState::Implicit)
|
||||||
.addReg(T1, RegState::Implicit);
|
.addReg(T1, RegState::Implicit);
|
||||||
break;
|
break;
|
||||||
@ -213,23 +289,100 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|||||||
case AMDGPU::TXD_SHADOW: {
|
case AMDGPU::TXD_SHADOW: {
|
||||||
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||||
|
MachineOperand &RID = MI->getOperand(4);
|
||||||
|
MachineOperand &SID = MI->getOperand(5);
|
||||||
|
unsigned TextureId = MI->getOperand(6).getImm();
|
||||||
|
unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
|
||||||
|
unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
|
||||||
|
|
||||||
|
switch (TextureId) {
|
||||||
|
case 5: // Rect
|
||||||
|
CTX = CTY = 0;
|
||||||
|
break;
|
||||||
|
case 6: // Shadow1D
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 7: // Shadow2D
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 8: // ShadowRect
|
||||||
|
CTX = CTY = 0;
|
||||||
|
SrcW = SrcZ;
|
||||||
|
break;
|
||||||
|
case 9: // 1DArray
|
||||||
|
SrcZ = SrcY;
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 10: // 2DArray
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 11: // Shadow1DArray
|
||||||
|
SrcZ = SrcY;
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
case 12: // Shadow2DArray
|
||||||
|
CTZ = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
|
||||||
.addOperand(MI->getOperand(3))
|
.addOperand(MI->getOperand(3))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6));
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW);
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
|
||||||
.addOperand(MI->getOperand(2))
|
.addOperand(MI->getOperand(2))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6));
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW);
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
.addOperand(MI->getOperand(4))
|
.addImm(SrcX)
|
||||||
.addOperand(MI->getOperand(5))
|
.addImm(SrcY)
|
||||||
.addOperand(MI->getOperand(6))
|
.addImm(SrcZ)
|
||||||
|
.addImm(SrcW)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(1)
|
||||||
|
.addImm(2)
|
||||||
|
.addImm(3)
|
||||||
|
.addOperand(RID)
|
||||||
|
.addOperand(SID)
|
||||||
|
.addImm(CTX)
|
||||||
|
.addImm(CTY)
|
||||||
|
.addImm(CTZ)
|
||||||
|
.addImm(CTW)
|
||||||
.addReg(T0, RegState::Implicit)
|
.addReg(T0, RegState::Implicit)
|
||||||
.addReg(T1, RegState::Implicit);
|
.addReg(T1, RegState::Implicit);
|
||||||
break;
|
break;
|
||||||
@ -409,6 +562,75 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||||||
|
|
||||||
return SDValue(interp, slot % 2);
|
return SDValue(interp, slot % 2);
|
||||||
}
|
}
|
||||||
|
case AMDGPUIntrinsic::R600_tex:
|
||||||
|
case AMDGPUIntrinsic::R600_texc:
|
||||||
|
case AMDGPUIntrinsic::R600_txl:
|
||||||
|
case AMDGPUIntrinsic::R600_txlc:
|
||||||
|
case AMDGPUIntrinsic::R600_txb:
|
||||||
|
case AMDGPUIntrinsic::R600_txbc:
|
||||||
|
case AMDGPUIntrinsic::R600_txf:
|
||||||
|
case AMDGPUIntrinsic::R600_txq:
|
||||||
|
case AMDGPUIntrinsic::R600_ddx:
|
||||||
|
case AMDGPUIntrinsic::R600_ddy: {
|
||||||
|
unsigned TextureOp;
|
||||||
|
switch (IntrinsicID) {
|
||||||
|
case AMDGPUIntrinsic::R600_tex:
|
||||||
|
TextureOp = 0;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_texc:
|
||||||
|
TextureOp = 1;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txl:
|
||||||
|
TextureOp = 2;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txlc:
|
||||||
|
TextureOp = 3;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txb:
|
||||||
|
TextureOp = 4;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txbc:
|
||||||
|
TextureOp = 5;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txf:
|
||||||
|
TextureOp = 6;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_txq:
|
||||||
|
TextureOp = 7;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_ddx:
|
||||||
|
TextureOp = 8;
|
||||||
|
break;
|
||||||
|
case AMDGPUIntrinsic::R600_ddy:
|
||||||
|
TextureOp = 9;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unknow Texture Operation");
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue TexArgs[19] = {
|
||||||
|
DAG.getConstant(TextureOp, MVT::i32),
|
||||||
|
Op.getOperand(1),
|
||||||
|
DAG.getConstant(0, MVT::i32),
|
||||||
|
DAG.getConstant(1, MVT::i32),
|
||||||
|
DAG.getConstant(2, MVT::i32),
|
||||||
|
DAG.getConstant(3, MVT::i32),
|
||||||
|
Op.getOperand(2),
|
||||||
|
Op.getOperand(3),
|
||||||
|
Op.getOperand(4),
|
||||||
|
DAG.getConstant(0, MVT::i32),
|
||||||
|
DAG.getConstant(1, MVT::i32),
|
||||||
|
DAG.getConstant(2, MVT::i32),
|
||||||
|
DAG.getConstant(3, MVT::i32),
|
||||||
|
Op.getOperand(5),
|
||||||
|
Op.getOperand(6),
|
||||||
|
Op.getOperand(7),
|
||||||
|
Op.getOperand(8),
|
||||||
|
Op.getOperand(9),
|
||||||
|
Op.getOperand(10)
|
||||||
|
};
|
||||||
|
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
|
||||||
|
}
|
||||||
|
|
||||||
case r600_read_ngroups_x:
|
case r600_read_ngroups_x:
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 0);
|
return LowerImplicitParameter(DAG, VT, DL, 0);
|
||||||
|
@ -96,6 +96,12 @@ def UP : InstFlag <"printUpdatePred">;
|
|||||||
// Once we start using the packetizer in this backend we should have this
|
// Once we start using the packetizer in this backend we should have this
|
||||||
// default to 0.
|
// default to 0.
|
||||||
def LAST : InstFlag<"printLast", 1>;
|
def LAST : InstFlag<"printLast", 1>;
|
||||||
|
def RSel : Operand<i32> {
|
||||||
|
let PrintMethod = "printRSel";
|
||||||
|
}
|
||||||
|
def CT: Operand<i32> {
|
||||||
|
let PrintMethod = "printCT";
|
||||||
|
}
|
||||||
|
|
||||||
def FRAMEri : Operand<iPTR> {
|
def FRAMEri : Operand<iPTR> {
|
||||||
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
|
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
|
||||||
@ -463,38 +469,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
|
|||||||
pattern,
|
pattern,
|
||||||
itin>;
|
itin>;
|
||||||
|
|
||||||
class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
|
|
||||||
InstrItinClass itin = AnyALU> :
|
|
||||||
InstR600 <(outs R600_Reg128:$DST_GPR),
|
|
||||||
(ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
|
|
||||||
!strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
|
|
||||||
pattern,
|
|
||||||
itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
|
|
||||||
let Inst{31-0} = Word0;
|
|
||||||
let Inst{63-32} = Word1;
|
|
||||||
|
|
||||||
let TEX_INST = inst{4-0};
|
|
||||||
let SRC_REL = 0;
|
|
||||||
let DST_REL = 0;
|
|
||||||
let DST_SEL_X = 0;
|
|
||||||
let DST_SEL_Y = 1;
|
|
||||||
let DST_SEL_Z = 2;
|
|
||||||
let DST_SEL_W = 3;
|
|
||||||
let LOD_BIAS = 0;
|
|
||||||
|
|
||||||
let INST_MOD = 0;
|
|
||||||
let FETCH_WHOLE_QUAD = 0;
|
|
||||||
let ALT_CONST = 0;
|
|
||||||
let SAMPLER_INDEX_MODE = 0;
|
|
||||||
let RESOURCE_INDEX_MODE = 0;
|
|
||||||
|
|
||||||
let COORD_TYPE_X = 0;
|
|
||||||
let COORD_TYPE_Y = 0;
|
|
||||||
let COORD_TYPE_Z = 0;
|
|
||||||
let COORD_TYPE_W = 0;
|
|
||||||
|
|
||||||
let TEXInst = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
|
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
|
||||||
|
|
||||||
@ -618,6 +593,29 @@ def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
|
|||||||
[SDNPVariadic]
|
[SDNPVariadic]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
|
||||||
|
|
||||||
|
def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
|
||||||
|
|
||||||
|
multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
|
||||||
|
def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
|
||||||
|
(i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
|
||||||
|
(i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
|
||||||
|
(i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
|
||||||
|
(i32 imm:$DST_SEL_W),
|
||||||
|
(i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
|
||||||
|
(i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
|
||||||
|
(i32 imm:$COORD_TYPE_W)),
|
||||||
|
(inst R600_Reg128:$SRC_GPR,
|
||||||
|
imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
|
||||||
|
imm:$offsetx, imm:$offsety, imm:$offsetz,
|
||||||
|
imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
|
||||||
|
imm:$DST_SEL_W,
|
||||||
|
imm:$RESOURCE_ID, imm:$SAMPLER_ID,
|
||||||
|
imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
|
||||||
|
imm:$COORD_TYPE_W)>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Interpolation Instructions
|
// Interpolation Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1132,92 +1130,70 @@ def CNDGT_INT : R600_3OP <
|
|||||||
// Texture instructions
|
// Texture instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def TEX_LD : R600_TEX <
|
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
|
||||||
0x03, "TEX_LD",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
|
class R600_TEX <bits<11> inst, string opName> :
|
||||||
imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
|
InstR600 <(outs R600_Reg128:$DST_GPR),
|
||||||
imm:$SAMPLER_ID, imm:$textureTarget))]
|
(ins R600_Reg128:$SRC_GPR,
|
||||||
> {
|
RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
|
||||||
let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
|
i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
|
||||||
"$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
|
RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
|
||||||
let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
|
i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
|
||||||
i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
|
CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
|
||||||
i32imm:$textureTarget);
|
CT:$COORD_TYPE_W),
|
||||||
|
!strconcat(opName,
|
||||||
|
" $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
|
||||||
|
"$SRC_GPR.$srcx$srcy$srcz$srcw "
|
||||||
|
"RID:$RESOURCE_ID SID:$SAMPLER_ID "
|
||||||
|
"CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
|
||||||
|
[],
|
||||||
|
NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
|
||||||
|
let Inst{31-0} = Word0;
|
||||||
|
let Inst{63-32} = Word1;
|
||||||
|
|
||||||
|
let TEX_INST = inst{4-0};
|
||||||
|
let SRC_REL = 0;
|
||||||
|
let DST_REL = 0;
|
||||||
|
let LOD_BIAS = 0;
|
||||||
|
|
||||||
|
let INST_MOD = 0;
|
||||||
|
let FETCH_WHOLE_QUAD = 0;
|
||||||
|
let ALT_CONST = 0;
|
||||||
|
let SAMPLER_INDEX_MODE = 0;
|
||||||
|
let RESOURCE_INDEX_MODE = 0;
|
||||||
|
|
||||||
|
let TEXInst = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
|
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
|
||||||
0x04, "TEX_GET_TEXTURE_RESINFO",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_GET_GRADIENTS_H : R600_TEX <
|
|
||||||
0x07, "TEX_GET_GRADIENTS_H",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_GET_GRADIENTS_V : R600_TEX <
|
|
||||||
0x08, "TEX_GET_GRADIENTS_V",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SET_GRADIENTS_H : R600_TEX <
|
def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
|
||||||
0x0B, "TEX_SET_GRADIENTS_H",
|
def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
|
||||||
[]
|
def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
|
||||||
>;
|
def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
|
||||||
|
def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
|
||||||
|
def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
|
||||||
|
def TEX_LD : R600_TEX <0x03, "TEX_LD">;
|
||||||
|
def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
|
||||||
|
def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
|
||||||
|
def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
|
||||||
|
def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
|
||||||
|
def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
|
||||||
|
def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
|
||||||
|
def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
|
||||||
|
|
||||||
def TEX_SET_GRADIENTS_V : R600_TEX <
|
defm : TexPattern<0, TEX_SAMPLE>;
|
||||||
0x0C, "TEX_SET_GRADIENTS_V",
|
defm : TexPattern<1, TEX_SAMPLE_C>;
|
||||||
[]
|
defm : TexPattern<2, TEX_SAMPLE_L>;
|
||||||
>;
|
defm : TexPattern<3, TEX_SAMPLE_C_L>;
|
||||||
|
defm : TexPattern<4, TEX_SAMPLE_LB>;
|
||||||
def TEX_SAMPLE : R600_TEX <
|
defm : TexPattern<5, TEX_SAMPLE_C_LB>;
|
||||||
0x10, "TEX_SAMPLE",
|
defm : TexPattern<6, TEX_LD, v4i32>;
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
|
defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
|
||||||
>;
|
defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
|
||||||
|
|
||||||
def TEX_SAMPLE_C : R600_TEX <
|
|
||||||
0x18, "TEX_SAMPLE_C",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_L : R600_TEX <
|
|
||||||
0x11, "TEX_SAMPLE_L",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_C_L : R600_TEX <
|
|
||||||
0x19, "TEX_SAMPLE_C_L",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_LB : R600_TEX <
|
|
||||||
0x12, "TEX_SAMPLE_LB",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_C_LB : R600_TEX <
|
|
||||||
0x1A, "TEX_SAMPLE_C_LB",
|
|
||||||
[(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
|
|
||||||
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_G : R600_TEX <
|
|
||||||
0x14, "TEX_SAMPLE_G",
|
|
||||||
[]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def TEX_SAMPLE_C_G : R600_TEX <
|
|
||||||
0x1C, "TEX_SAMPLE_C_G",
|
|
||||||
[]
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Helper classes for common instructions
|
// Helper classes for common instructions
|
||||||
|
@ -12,12 +12,49 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
let TargetPrefix = "R600", isTarget = 1 in {
|
let TargetPrefix = "R600", isTarget = 1 in {
|
||||||
|
class TextureIntrinsicFloatInput :
|
||||||
|
Intrinsic<[llvm_v4f32_ty], [
|
||||||
|
llvm_v4f32_ty, // Coord
|
||||||
|
llvm_i32_ty, // offset_x
|
||||||
|
llvm_i32_ty, // offset_y,
|
||||||
|
llvm_i32_ty, // offset_z,
|
||||||
|
llvm_i32_ty, // resource_id
|
||||||
|
llvm_i32_ty, // samplerid
|
||||||
|
llvm_i32_ty, // coord_type_x
|
||||||
|
llvm_i32_ty, // coord_type_y
|
||||||
|
llvm_i32_ty, // coord_type_z
|
||||||
|
llvm_i32_ty // coord_type_w
|
||||||
|
], [IntrNoMem]>;
|
||||||
|
class TextureIntrinsicInt32Input :
|
||||||
|
Intrinsic<[llvm_v4i32_ty], [
|
||||||
|
llvm_v4i32_ty, // Coord
|
||||||
|
llvm_i32_ty, // offset_x
|
||||||
|
llvm_i32_ty, // offset_y,
|
||||||
|
llvm_i32_ty, // offset_z,
|
||||||
|
llvm_i32_ty, // resource_id
|
||||||
|
llvm_i32_ty, // samplerid
|
||||||
|
llvm_i32_ty, // coord_type_x
|
||||||
|
llvm_i32_ty, // coord_type_y
|
||||||
|
llvm_i32_ty, // coord_type_z
|
||||||
|
llvm_i32_ty // coord_type_w
|
||||||
|
], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_R600_load_input :
|
def int_R600_load_input :
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_R600_interp_input :
|
def int_R600_interp_input :
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_R600_load_texbuf :
|
def int_R600_load_texbuf :
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
def int_R600_tex : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_texc : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_txl : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_txlc : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_txb : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_txbc : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_txf : TextureIntrinsicInt32Input;
|
||||||
|
def int_R600_txq : TextureIntrinsicInt32Input;
|
||||||
|
def int_R600_ddx : TextureIntrinsicFloatInput;
|
||||||
|
def int_R600_ddy : TextureIntrinsicFloatInput;
|
||||||
def int_R600_store_swizzle :
|
def int_R600_store_swizzle :
|
||||||
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
|
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
|
||||||
def int_R600_store_stream_output :
|
def int_R600_store_stream_output :
|
||||||
|
286
lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
Normal file
286
lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
//===-- R600TextureIntrinsicsReplacer.cpp ---------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
/// \file
|
||||||
|
/// This pass translates tgsi-like texture intrinsics into R600 texture
|
||||||
|
/// closer to hardware intrinsics.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "llvm/Analysis/Passes.h"
|
||||||
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/IR/Function.h"
|
||||||
|
#include "llvm/InstVisitor.h"
|
||||||
|
#include "llvm/IR/IRBuilder.h"
|
||||||
|
#include "llvm/IR/GlobalValue.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class R600TextureIntrinsicsReplacer :
|
||||||
|
public FunctionPass, public InstVisitor<R600TextureIntrinsicsReplacer> {
|
||||||
|
static char ID;
|
||||||
|
|
||||||
|
Module *Mod;
|
||||||
|
Type *FloatType;
|
||||||
|
Type *Int32Type;
|
||||||
|
Type *V4f32Type;
|
||||||
|
Type *V4i32Type;
|
||||||
|
FunctionType *TexSign;
|
||||||
|
FunctionType *TexQSign;
|
||||||
|
|
||||||
|
void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD,
|
||||||
|
unsigned SrcSelect[4], unsigned CT[4],
|
||||||
|
bool &useShadowVariant) {
|
||||||
|
enum TextureTypes {
|
||||||
|
TEXTURE_1D = 1,
|
||||||
|
TEXTURE_2D,
|
||||||
|
TEXTURE_3D,
|
||||||
|
TEXTURE_CUBE,
|
||||||
|
TEXTURE_RECT,
|
||||||
|
TEXTURE_SHADOW1D,
|
||||||
|
TEXTURE_SHADOW2D,
|
||||||
|
TEXTURE_SHADOWRECT,
|
||||||
|
TEXTURE_1D_ARRAY,
|
||||||
|
TEXTURE_2D_ARRAY,
|
||||||
|
TEXTURE_SHADOW1D_ARRAY,
|
||||||
|
TEXTURE_SHADOW2D_ARRAY,
|
||||||
|
TEXTURE_SHADOWCUBE,
|
||||||
|
TEXTURE_2D_MSAA,
|
||||||
|
TEXTURE_2D_ARRAY_MSAA,
|
||||||
|
TEXTURE_CUBE_ARRAY,
|
||||||
|
TEXTURE_SHADOWCUBE_ARRAY
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (TextureType) {
|
||||||
|
case 0:
|
||||||
|
return;
|
||||||
|
case TEXTURE_RECT:
|
||||||
|
case TEXTURE_1D:
|
||||||
|
case TEXTURE_2D:
|
||||||
|
case TEXTURE_3D:
|
||||||
|
case TEXTURE_CUBE:
|
||||||
|
case TEXTURE_1D_ARRAY:
|
||||||
|
case TEXTURE_2D_ARRAY:
|
||||||
|
case TEXTURE_CUBE_ARRAY:
|
||||||
|
case TEXTURE_2D_MSAA:
|
||||||
|
case TEXTURE_2D_ARRAY_MSAA:
|
||||||
|
useShadowVariant = false;
|
||||||
|
break;
|
||||||
|
case TEXTURE_SHADOW1D:
|
||||||
|
case TEXTURE_SHADOW2D:
|
||||||
|
case TEXTURE_SHADOWRECT:
|
||||||
|
case TEXTURE_SHADOW1D_ARRAY:
|
||||||
|
case TEXTURE_SHADOW2D_ARRAY:
|
||||||
|
case TEXTURE_SHADOWCUBE:
|
||||||
|
case TEXTURE_SHADOWCUBE_ARRAY:
|
||||||
|
useShadowVariant = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unknow Texture Type");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TextureType == TEXTURE_RECT ||
|
||||||
|
TextureType == TEXTURE_SHADOWRECT) {
|
||||||
|
CT[0] = 0;
|
||||||
|
CT[1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TextureType == TEXTURE_CUBE_ARRAY ||
|
||||||
|
TextureType == TEXTURE_SHADOWCUBE_ARRAY) {
|
||||||
|
CT[2] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TextureType == TEXTURE_1D_ARRAY ||
|
||||||
|
TextureType == TEXTURE_SHADOW1D_ARRAY) {
|
||||||
|
if (hasLOD && useShadowVariant) {
|
||||||
|
CT[1] = 0;
|
||||||
|
} else {
|
||||||
|
CT[2] = 0;
|
||||||
|
SrcSelect[2] = 1;
|
||||||
|
}
|
||||||
|
} else if (TextureType == TEXTURE_2D_ARRAY ||
|
||||||
|
TextureType == TEXTURE_SHADOW2D_ARRAY) {
|
||||||
|
CT[2] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((TextureType == TEXTURE_SHADOW1D ||
|
||||||
|
TextureType == TEXTURE_SHADOW2D ||
|
||||||
|
TextureType == TEXTURE_SHADOWRECT ||
|
||||||
|
TextureType == TEXTURE_SHADOW1D_ARRAY) &&
|
||||||
|
!(hasLOD && useShadowVariant)) {
|
||||||
|
SrcSelect[3] = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
|
||||||
|
unsigned SrcSelect[4], Value *Offset[3], Value *Resource,
|
||||||
|
Value *Sampler, unsigned CT[4], Value *Coord) {
|
||||||
|
IRBuilder<> Builder(&I);
|
||||||
|
Constant *Mask[] = {
|
||||||
|
ConstantInt::get(Int32Type, SrcSelect[0]),
|
||||||
|
ConstantInt::get(Int32Type, SrcSelect[1]),
|
||||||
|
ConstantInt::get(Int32Type, SrcSelect[2]),
|
||||||
|
ConstantInt::get(Int32Type, SrcSelect[3])
|
||||||
|
};
|
||||||
|
Value *SwizzleMask = ConstantVector::get(Mask);
|
||||||
|
Value *SwizzledCoord =
|
||||||
|
Builder.CreateShuffleVector(Coord, Coord, SwizzleMask);
|
||||||
|
|
||||||
|
Value *Args[] = {
|
||||||
|
SwizzledCoord,
|
||||||
|
Offset[0],
|
||||||
|
Offset[1],
|
||||||
|
Offset[2],
|
||||||
|
Resource,
|
||||||
|
Sampler,
|
||||||
|
ConstantInt::get(Int32Type, CT[0]),
|
||||||
|
ConstantInt::get(Int32Type, CT[1]),
|
||||||
|
ConstantInt::get(Int32Type, CT[2]),
|
||||||
|
ConstantInt::get(Int32Type, CT[3])
|
||||||
|
};
|
||||||
|
|
||||||
|
Function *F = Mod->getFunction(Name);
|
||||||
|
if (!F) {
|
||||||
|
F = Function::Create(FT, GlobalValue::ExternalLinkage, Name, Mod);
|
||||||
|
F->addFnAttr(Attribute::ReadNone);
|
||||||
|
}
|
||||||
|
I.replaceAllUsesWith(Builder.CreateCall(F, Args));
|
||||||
|
I.eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReplaceTexIntrinsic(CallInst &I, bool hasLOD, FunctionType *FT,
|
||||||
|
const char *VanillaInt,
|
||||||
|
const char *ShadowInt) {
|
||||||
|
Value *Coord = I.getArgOperand(0);
|
||||||
|
Value *ResourceId = I.getArgOperand(1);
|
||||||
|
Value *SamplerId = I.getArgOperand(2);
|
||||||
|
|
||||||
|
unsigned TextureType =
|
||||||
|
dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
|
||||||
|
|
||||||
|
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
|
||||||
|
unsigned CT[4] = {1, 1, 1, 1};
|
||||||
|
Value *Offset[3] = {
|
||||||
|
ConstantInt::get(Int32Type, 0),
|
||||||
|
ConstantInt::get(Int32Type, 0),
|
||||||
|
ConstantInt::get(Int32Type, 0)
|
||||||
|
};
|
||||||
|
bool useShadowVariant;
|
||||||
|
|
||||||
|
getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
|
||||||
|
useShadowVariant);
|
||||||
|
|
||||||
|
ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
|
||||||
|
Offset, ResourceId, SamplerId, CT, Coord);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReplaceTXF(CallInst &I) {
|
||||||
|
Value *Coord = I.getArgOperand(0);
|
||||||
|
Value *ResourceId = I.getArgOperand(4);
|
||||||
|
Value *SamplerId = I.getArgOperand(5);
|
||||||
|
|
||||||
|
unsigned TextureType =
|
||||||
|
dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
|
||||||
|
|
||||||
|
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
|
||||||
|
unsigned CT[4] = {1, 1, 1, 1};
|
||||||
|
Value *Offset[3] = {
|
||||||
|
I.getArgOperand(1),
|
||||||
|
I.getArgOperand(2),
|
||||||
|
I.getArgOperand(3),
|
||||||
|
};
|
||||||
|
bool useShadowVariant;
|
||||||
|
|
||||||
|
getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT,
|
||||||
|
useShadowVariant);
|
||||||
|
|
||||||
|
ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
|
||||||
|
Offset, ResourceId, SamplerId, CT, Coord);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
R600TextureIntrinsicsReplacer():
|
||||||
|
FunctionPass(ID) {
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool doInitialization(Module &M) {
|
||||||
|
LLVMContext &Ctx = M.getContext();
|
||||||
|
Mod = &M;
|
||||||
|
FloatType = Type::getFloatTy(Ctx);
|
||||||
|
Int32Type = Type::getInt32Ty(Ctx);
|
||||||
|
V4f32Type = VectorType::get(FloatType, 4);
|
||||||
|
V4i32Type = VectorType::get(Int32Type, 4);
|
||||||
|
Type *ArgsType[] = {
|
||||||
|
V4f32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
};
|
||||||
|
TexSign = FunctionType::get(V4f32Type, ArgsType);
|
||||||
|
Type *ArgsQType[] = {
|
||||||
|
V4i32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
Int32Type,
|
||||||
|
};
|
||||||
|
TexQSign = FunctionType::get(V4f32Type, ArgsQType);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool runOnFunction(Function &F) {
|
||||||
|
visit(F);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char *getPassName() const {
|
||||||
|
return "R600 Texture Intrinsics Replacer";
|
||||||
|
}
|
||||||
|
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
}
|
||||||
|
|
||||||
|
void visitCallInst(CallInst &I) {
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.tex")
|
||||||
|
ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txl")
|
||||||
|
ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txl", "llvm.R600.txlc");
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txb")
|
||||||
|
ReplaceTexIntrinsic(I, true, TexSign, "llvm.R600.txb", "llvm.R600.txbc");
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txf")
|
||||||
|
ReplaceTXF(I);
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.txq")
|
||||||
|
ReplaceTexIntrinsic(I, false, TexQSign, "llvm.R600.txq", "llvm.R600.txq");
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddx")
|
||||||
|
ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddx", "llvm.R600.ddx");
|
||||||
|
if (I.getCalledFunction()->getName() == "llvm.AMDGPU.ddy")
|
||||||
|
ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.ddy", "llvm.R600.ddy");
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
char R600TextureIntrinsicsReplacer::ID = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
|
||||||
|
return new R600TextureIntrinsicsReplacer();
|
||||||
|
}
|
@ -1,21 +1,21 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 1
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 2
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 3
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 4
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 5
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 6
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 7
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 8
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:UUNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 9
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 10
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 11
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 12
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
|
||||||
;CHECK: TEX_SAMPLE_CT{{[0-9]+, T[0-9]+}}, 0, 0, 13
|
;CHECK: TEX_SAMPLE_C T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 14
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 15
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
|
||||||
;CHECK: TEX_SAMPLET{{[0-9]+, T[0-9]+}}, 0, 0, 16
|
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
|
||||||
|
|
||||||
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||||
%addr = load <4 x float> addrspace(1)* %in
|
%addr = load <4 x float> addrspace(1)* %in
|
||||||
|
Loading…
x
Reference in New Issue
Block a user