mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-11-01 15:17:25 +00:00
[FastISel][X86] Add support for floating-point select.
This extends the select lowering to support floating-point selects. The lowering depends on SSE instructions and that the conditon comes from a floating-point compare. Under this conditions it is possible to emit an optimized instruction sequence that doesn't require any branches to simulate the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211544 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -113,6 +113,8 @@ private:
|
||||
|
||||
bool X86FastEmitCMoveSelect(const Instruction *I);
|
||||
|
||||
bool X86FastEmitSSESelect(const Instruction *I);
|
||||
|
||||
bool X86SelectSelect(const Instruction *I);
|
||||
|
||||
bool X86SelectTrunc(const Instruction *I);
|
||||
@@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Predicate) {
|
||||
return std::make_pair(CC, NeedSwap);
|
||||
}
|
||||
|
||||
static std::pair<unsigned, bool>
|
||||
getX86SSECondtionCode(CmpInst::Predicate Predicate) {
|
||||
unsigned CC;
|
||||
bool NeedSwap = false;
|
||||
|
||||
// SSE Condition code mapping:
|
||||
// 0 - EQ
|
||||
// 1 - LT
|
||||
// 2 - LE
|
||||
// 3 - UNORD
|
||||
// 4 - NEQ
|
||||
// 5 - NLT
|
||||
// 6 - NLE
|
||||
// 7 - ORD
|
||||
switch (Predicate) {
|
||||
default: llvm_unreachable("Unexpected predicate");
|
||||
case CmpInst::FCMP_OEQ: CC = 0; break;
|
||||
case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_OLT: CC = 1; break;
|
||||
case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_OLE: CC = 2; break;
|
||||
case CmpInst::FCMP_UNO: CC = 3; break;
|
||||
case CmpInst::FCMP_UNE: CC = 4; break;
|
||||
case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_UGE: CC = 5; break;
|
||||
case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_UGT: CC = 6; break;
|
||||
case CmpInst::FCMP_ORD: CC = 7; break;
|
||||
case CmpInst::FCMP_UEQ:
|
||||
case CmpInst::FCMP_ONE: CC = 8; break;
|
||||
}
|
||||
|
||||
return std::make_pair(CC, NeedSwap);
|
||||
}
|
||||
|
||||
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
||||
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
|
||||
if (evt == MVT::Other || !evt.isSimple())
|
||||
@@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Emit SSE instructions to lower the select.
|
||||
///
|
||||
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
|
||||
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
|
||||
/// SSE instructions are available.
|
||||
bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) {
|
||||
MVT RetVT;
|
||||
if (!isTypeLegal(I->getType(), RetVT))
|
||||
return false;
|
||||
|
||||
const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
|
||||
if (!CI)
|
||||
return false;
|
||||
|
||||
if (I->getType() != CI->getOperand(0)->getType() ||
|
||||
!((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
|
||||
(Subtarget->hasSSE2() && RetVT == MVT::f64) ))
|
||||
return false;
|
||||
|
||||
const Value *CmpLHS = CI->getOperand(0);
|
||||
const Value *CmpRHS = CI->getOperand(1);
|
||||
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
||||
|
||||
// The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
|
||||
// We don't have to materialize a zero constant for this case and can just use
|
||||
// %x again on the RHS.
|
||||
if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
|
||||
const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
|
||||
if (CmpRHSC && CmpRHSC->isNullValue())
|
||||
CmpRHS = CmpLHS;
|
||||
}
|
||||
|
||||
unsigned CC;
|
||||
bool NeedSwap;
|
||||
std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate);
|
||||
if (CC > 7)
|
||||
return false;
|
||||
|
||||
if (NeedSwap)
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
|
||||
static unsigned OpcTable[2][2][4] = {
|
||||
{ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
|
||||
{ X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
|
||||
{ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
|
||||
{ X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
|
||||
};
|
||||
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
unsigned *Opc = nullptr;
|
||||
switch (RetVT.SimpleTy) {
|
||||
default: return false;
|
||||
case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
|
||||
case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
|
||||
}
|
||||
|
||||
const Value *LHS = I->getOperand(1);
|
||||
const Value *RHS = I->getOperand(2);
|
||||
|
||||
unsigned LHSReg = getRegForValue(LHS);
|
||||
bool LHSIsKill = hasTrivialKill(LHS);
|
||||
|
||||
unsigned RHSReg = getRegForValue(RHS);
|
||||
bool RHSIsKill = hasTrivialKill(RHS);
|
||||
|
||||
unsigned CmpLHSReg = getRegForValue(CmpLHS);
|
||||
bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
|
||||
|
||||
unsigned CmpRHSReg = getRegForValue(CmpRHS);
|
||||
bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
|
||||
|
||||
if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
|
||||
return false;
|
||||
|
||||
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
||||
unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
|
||||
CmpRHSReg, CmpRHSIsKill, CC);
|
||||
unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
|
||||
LHSReg, LHSIsKill);
|
||||
unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
|
||||
RHSReg, RHSIsKill);
|
||||
unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
|
||||
AndReg, /*IsKill=*/true);
|
||||
UpdateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86FastISel::X86SelectSelect(const Instruction *I) {
|
||||
MVT RetVT;
|
||||
if (!isTypeLegal(I->getType(), RetVT))
|
||||
@@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
|
||||
if (X86FastEmitCMoveSelect(I))
|
||||
return true;
|
||||
|
||||
// Try to use a sequence of SSE instructions to simulate a conditonal move.
|
||||
if (X86FastEmitSSESelect(I))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user