mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-11 00:39:36 +00:00
R600: Optimize and cleanup KILL on SI
We shouldn't insert KILL optimization if we don't have a kill instruction at all. Patch by: Christian König Tested-by: Michel Dänzer <michel.daenzer@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Signed-off-by: Christian König <deathsimple@vodafone.de> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172845 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
956f13440a
commit
935a91540b
@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
case AMDGPU::SI_INTERP_CONST:
|
||||
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_KIL:
|
||||
LowerSI_KIL(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_WQM:
|
||||
LowerSI_WQM(MI, *BB, I, MRI);
|
||||
break;
|
||||
@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
|
||||
// Clear this pixel from the exec mask if the operand is negative
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
|
||||
AMDGPU::VCC)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addOperand(MI->getOperand(0));
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
|
||||
unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
|
@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
|
||||
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
|
@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
|
||||
imm:$attr, SReg_32:$params))]
|
||||
>;
|
||||
|
||||
def SI_KIL : InstSI <
|
||||
(outs),
|
||||
(ins VReg_32:$src),
|
||||
"SI_KIL $src",
|
||||
[(int_AMDGPU_kill VReg_32:$src)]
|
||||
>;
|
||||
|
||||
def SI_WQM : InstSI <
|
||||
(outs),
|
||||
(ins),
|
||||
@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
|
||||
[(int_SI_end_cf SReg_64:$saved)]
|
||||
>;
|
||||
|
||||
def SI_KILL : InstSI <
|
||||
(outs),
|
||||
(ins VReg_32:$src),
|
||||
"SI_KIL $src",
|
||||
[(int_AMDGPU_kill VReg_32:$src)]
|
||||
>;
|
||||
|
||||
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||
// Uses = [EXEC], Defs = [EXEC]
|
||||
|
||||
} // end IsCodeGenOnly, isPseudo
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_kilp),
|
||||
(SI_KILL (V_MOV_IMM_I32 0xbf800000))
|
||||
>;
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
|
||||
@ -1314,11 +1319,6 @@ def : Pat<
|
||||
(V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_kilp),
|
||||
(SI_KIL (V_MOV_IMM_I32 0xbf800000))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_cube VReg_128:$src),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
|
||||
|
@ -68,7 +68,10 @@ private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
void Skip(MachineInstr &MI, MachineOperand &To);
|
||||
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
|
||||
|
||||
void Skip(MachineInstr &From, MachineOperand &To);
|
||||
void SkipIfDead(MachineInstr &MI);
|
||||
|
||||
void If(MachineInstr &MI);
|
||||
void Else(MachineInstr &MI);
|
||||
@ -78,6 +81,7 @@ private:
|
||||
void Loop(MachineInstr &MI);
|
||||
void EndCf(MachineInstr &MI);
|
||||
|
||||
void Kill(MachineInstr &MI);
|
||||
void Branch(MachineInstr &MI);
|
||||
|
||||
public:
|
||||
@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
|
||||
return new SILowerControlFlowPass(tm);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
|
||||
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
|
||||
MachineBasicBlock *To) {
|
||||
|
||||
unsigned NumInstr = 0;
|
||||
|
||||
for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
|
||||
NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
|
||||
for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
|
||||
MBB = *MBB->succ_begin()) {
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
||||
NumInstr < SkipThreshold && I != E; ++I) {
|
||||
|
||||
if (I->isBundle() || !I->isBundled())
|
||||
++NumInstr;
|
||||
if (++NumInstr >= SkipThreshold)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (NumInstr < SkipThreshold)
|
||||
return false;
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
|
||||
|
||||
if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
|
||||
return;
|
||||
|
||||
DebugLoc DL = From.getDebugLoc();
|
||||
@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
|
||||
.addReg(AMDGPU::EXEC);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
if (!shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
return;
|
||||
|
||||
MachineBasicBlock::iterator Insert = &MI;
|
||||
++Insert;
|
||||
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::If(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
// Kill is only allowed in pixel shaders
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
assert(Info->ShaderType == ShaderType::PIXEL);
|
||||
|
||||
// Clear this pixel from the exec mask if the operand is negative
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addOperand(MI.getOperand(0));
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool HaveCf = false;
|
||||
|
||||
bool HaveKill = false;
|
||||
unsigned Depth = 0;
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_IF:
|
||||
++Depth;
|
||||
If(MI);
|
||||
break;
|
||||
|
||||
@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_LOOP:
|
||||
++Depth;
|
||||
Loop(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_END_CF:
|
||||
HaveCf = true;
|
||||
if (--Depth == 0 && HaveKill) {
|
||||
SkipIfDead(MI);
|
||||
HaveKill = false;
|
||||
}
|
||||
EndCf(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_KILL:
|
||||
if (Depth == 0)
|
||||
SkipIfDead(MI);
|
||||
else
|
||||
HaveKill = true;
|
||||
Kill(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::S_BRANCH:
|
||||
Branch(MI);
|
||||
break;
|
||||
@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: What is this good for?
|
||||
unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
|
||||
if (HaveCf && ShaderType == ShaderType::PIXEL) {
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
if (MBB.succ_empty()) {
|
||||
|
||||
MachineInstr &MI = *MBB.getFirstNonPHI();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user