R600/SI: Use V_FRACT_F64 for faster 64-bit floor on SI

Other f64 opcodes not supported on SI can be lowered in a similar way.

v2: use complex VOP3 patterns

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233076 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak
2015-03-24 13:40:15 +00:00
parent 91c066ae15
commit 3f05a5e0ad
6 changed files with 175 additions and 22 deletions

View File

@@ -727,6 +727,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
case AMDGPU::V_CNDMASK_B64_PSEUDO: {
unsigned Dst = MI->getOperand(0).getReg();
unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
unsigned Src0 = MI->getOperand(1).getReg();
unsigned Src1 = MI->getOperand(2).getReg();
const MachineOperand &SrcCond = MI->getOperand(3);
BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
.addReg(RI.getSubReg(Src0, AMDGPU::sub0))
.addReg(RI.getSubReg(Src1, AMDGPU::sub0))
.addOperand(SrcCond);
BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
.addReg(RI.getSubReg(Src0, AMDGPU::sub1))
.addReg(RI.getSubReg(Src1, AMDGPU::sub1))
.addOperand(SrcCond);
MI->eraseFromParent();
break;
}
}
return true;
}