mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-09 01:38:03 +00:00
[FastISel][X86] Add support for floating-point select.
This extends the select lowering to support floating-point selects. The lowering depends on SSE instructions and that the conditon comes from a floating-point compare. Under this conditions it is possible to emit an optimized instruction sequence that doesn't require any branches to simulate the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211544 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5f4e6e1ec0
commit
d0976a3d20
@ -113,6 +113,8 @@ private:
|
||||
|
||||
bool X86FastEmitCMoveSelect(const Instruction *I);
|
||||
|
||||
bool X86FastEmitSSESelect(const Instruction *I);
|
||||
|
||||
bool X86SelectSelect(const Instruction *I);
|
||||
|
||||
bool X86SelectTrunc(const Instruction *I);
|
||||
@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Predicate) {
|
||||
return std::make_pair(CC, NeedSwap);
|
||||
}
|
||||
|
||||
static std::pair<unsigned, bool>
|
||||
getX86SSECondtionCode(CmpInst::Predicate Predicate) {
|
||||
unsigned CC;
|
||||
bool NeedSwap = false;
|
||||
|
||||
// SSE Condition code mapping:
|
||||
// 0 - EQ
|
||||
// 1 - LT
|
||||
// 2 - LE
|
||||
// 3 - UNORD
|
||||
// 4 - NEQ
|
||||
// 5 - NLT
|
||||
// 6 - NLE
|
||||
// 7 - ORD
|
||||
switch (Predicate) {
|
||||
default: llvm_unreachable("Unexpected predicate");
|
||||
case CmpInst::FCMP_OEQ: CC = 0; break;
|
||||
case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_OLT: CC = 1; break;
|
||||
case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_OLE: CC = 2; break;
|
||||
case CmpInst::FCMP_UNO: CC = 3; break;
|
||||
case CmpInst::FCMP_UNE: CC = 4; break;
|
||||
case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_UGE: CC = 5; break;
|
||||
case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
|
||||
case CmpInst::FCMP_UGT: CC = 6; break;
|
||||
case CmpInst::FCMP_ORD: CC = 7; break;
|
||||
case CmpInst::FCMP_UEQ:
|
||||
case CmpInst::FCMP_ONE: CC = 8; break;
|
||||
}
|
||||
|
||||
return std::make_pair(CC, NeedSwap);
|
||||
}
|
||||
|
||||
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
||||
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
|
||||
if (evt == MVT::Other || !evt.isSimple())
|
||||
@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Emit SSE instructions to lower the select.
|
||||
///
|
||||
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
|
||||
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
|
||||
/// SSE instructions are available.
|
||||
bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) {
|
||||
MVT RetVT;
|
||||
if (!isTypeLegal(I->getType(), RetVT))
|
||||
return false;
|
||||
|
||||
const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
|
||||
if (!CI)
|
||||
return false;
|
||||
|
||||
if (I->getType() != CI->getOperand(0)->getType() ||
|
||||
!((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
|
||||
(Subtarget->hasSSE2() && RetVT == MVT::f64) ))
|
||||
return false;
|
||||
|
||||
const Value *CmpLHS = CI->getOperand(0);
|
||||
const Value *CmpRHS = CI->getOperand(1);
|
||||
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
||||
|
||||
// The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
|
||||
// We don't have to materialize a zero constant for this case and can just use
|
||||
// %x again on the RHS.
|
||||
if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
|
||||
const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
|
||||
if (CmpRHSC && CmpRHSC->isNullValue())
|
||||
CmpRHS = CmpLHS;
|
||||
}
|
||||
|
||||
unsigned CC;
|
||||
bool NeedSwap;
|
||||
std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate);
|
||||
if (CC > 7)
|
||||
return false;
|
||||
|
||||
if (NeedSwap)
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
|
||||
static unsigned OpcTable[2][2][4] = {
|
||||
{ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
|
||||
{ X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
|
||||
{ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
|
||||
{ X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
|
||||
};
|
||||
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
unsigned *Opc = nullptr;
|
||||
switch (RetVT.SimpleTy) {
|
||||
default: return false;
|
||||
case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
|
||||
case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
|
||||
}
|
||||
|
||||
const Value *LHS = I->getOperand(1);
|
||||
const Value *RHS = I->getOperand(2);
|
||||
|
||||
unsigned LHSReg = getRegForValue(LHS);
|
||||
bool LHSIsKill = hasTrivialKill(LHS);
|
||||
|
||||
unsigned RHSReg = getRegForValue(RHS);
|
||||
bool RHSIsKill = hasTrivialKill(RHS);
|
||||
|
||||
unsigned CmpLHSReg = getRegForValue(CmpLHS);
|
||||
bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
|
||||
|
||||
unsigned CmpRHSReg = getRegForValue(CmpRHS);
|
||||
bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
|
||||
|
||||
if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
|
||||
return false;
|
||||
|
||||
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
||||
unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
|
||||
CmpRHSReg, CmpRHSIsKill, CC);
|
||||
unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
|
||||
LHSReg, LHSIsKill);
|
||||
unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
|
||||
RHSReg, RHSIsKill);
|
||||
unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
|
||||
AndReg, /*IsKill=*/true);
|
||||
UpdateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86FastISel::X86SelectSelect(const Instruction *I) {
|
||||
MVT RetVT;
|
||||
if (!isTypeLegal(I->getType(), RetVT))
|
||||
@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
|
||||
if (X86FastEmitCMoveSelect(I))
|
||||
return true;
|
||||
|
||||
// Try to use a sequence of SSE instructions to simulate a conditonal move.
|
||||
if (X86FastEmitSSESelect(I))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
391
test/CodeGen/X86/fast-isel-select-sse.ll
Normal file
391
test/CodeGen/X86/fast-isel-select-sse.ll
Normal file
@ -0,0 +1,391 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
|
||||
|
||||
; Test all cmp predicates that can be used with SSE.
|
||||
|
||||
define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_oeq_f32
|
||||
; CHECK: cmpeqss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_oeq_f32
|
||||
; AVX: vcmpeqss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp oeq float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_oeq_f64
|
||||
; CHECK: cmpeqsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_oeq_f64
|
||||
; AVX: vcmpeqsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp oeq double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ogt_f32
|
||||
; CHECK: cmpltss %xmm0, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm1
|
||||
; CHECK-NEXT: orps %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ogt_f32
|
||||
; AVX: vcmpltss %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ogt float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ogt_f64
|
||||
; CHECK: cmpltsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: andpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm1
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ogt_f64
|
||||
; AVX: vcmpltsd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ogt double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_oge_f32
|
||||
; CHECK: cmpless %xmm0, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm1
|
||||
; CHECK-NEXT: orps %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_oge_f32
|
||||
; AVX: vcmpless %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp oge float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_oge_f64
|
||||
; CHECK: cmplesd %xmm0, %xmm1
|
||||
; CHECK-NEXT: andpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm1
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_oge_f64
|
||||
; AVX: vcmplesd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp oge double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_olt_f32
|
||||
; CHECK: cmpltss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_olt_f32
|
||||
; AVX: vcmpltss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp olt float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_olt_f64
|
||||
; CHECK: cmpltsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_olt_f64
|
||||
; AVX: vcmpltsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp olt double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ole_f32
|
||||
; CHECK: cmpless %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ole_f32
|
||||
; AVX: vcmpless %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ole float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ole_f64
|
||||
; CHECK: cmplesd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ole_f64
|
||||
; AVX: vcmplesd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ole double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ord_f32
|
||||
; CHECK: cmpordss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ord_f32
|
||||
; AVX: vcmpordss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ord float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ord_f64
|
||||
; CHECK: cmpordsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ord_f64
|
||||
; AVX: vcmpordsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ord double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_uno_f32
|
||||
; CHECK: cmpunordss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_uno_f32
|
||||
; AVX: vcmpunordss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp uno float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_uno_f64
|
||||
; CHECK: cmpunordsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_uno_f64
|
||||
; AVX: vcmpunordsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp uno double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ugt_f32
|
||||
; CHECK: cmpnless %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ugt_f32
|
||||
; AVX: vcmpnless %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ugt float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ugt_f64
|
||||
; CHECK: cmpnlesd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_ugt_f64
|
||||
; AVX: vcmpnlesd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ugt double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_uge_f32
|
||||
; CHECK: cmpnltss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_uge_f32
|
||||
; AVX: vcmpnltss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp uge float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_uge_f64
|
||||
; CHECK: cmpnltsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_uge_f64
|
||||
; AVX: vcmpnltsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp uge double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ult_f32
|
||||
; CHECK: cmpnless %xmm0, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm1
|
||||
; CHECK-NEXT: orps %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ult_f32
|
||||
; AVX: vcmpnless %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ult float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ult_f64
|
||||
; CHECK: cmpnlesd %xmm0, %xmm1
|
||||
; CHECK-NEXT: andpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm1
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ult_f64
|
||||
; AVX: vcmpnlesd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ult double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_ule_f32
|
||||
; CHECK: cmpnltss %xmm0, %xmm1
|
||||
; CHECK-NEXT: andps %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm1
|
||||
; CHECK-NEXT: orps %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ule_f32
|
||||
; AVX: vcmpnltss %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ule float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_ule_f64
|
||||
; CHECK: cmpnltsd %xmm0, %xmm1
|
||||
; CHECK-NEXT: andpd %xmm1, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm1
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm1
|
||||
; AVX-LABEL: select_fcmp_ule_f64
|
||||
; AVX: vcmpnltsd %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp ule double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
||||
define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) {
|
||||
; CHECK-LABEL: select_fcmp_une_f32
|
||||
; CHECK: cmpneqss %xmm1, %xmm0
|
||||
; CHECK-NEXT: andps %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnps %xmm3, %xmm0
|
||||
; CHECK-NEXT: orps %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_une_f32
|
||||
; AVX: vcmpneqss %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp une float %a, %b
|
||||
%2 = select i1 %1, float %c, float %d
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) {
|
||||
; CHECK-LABEL: select_fcmp_une_f64
|
||||
; CHECK: cmpneqsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: andpd %xmm0, %xmm2
|
||||
; CHECK-NEXT: andnpd %xmm3, %xmm0
|
||||
; CHECK-NEXT: orpd %xmm2, %xmm0
|
||||
; AVX-LABEL: select_fcmp_une_f64
|
||||
; AVX: vcmpneqsd %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
|
||||
; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
|
||||
%1 = fcmp une double %a, %b
|
||||
%2 = select i1 %1, double %c, double %d
|
||||
ret double %2
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user