X86: Turn fp selects into mask operations.

double test(double a, double b, double c, double d) { return a<b ? c : d; }

before:
_test:
	ucomisd	%xmm0, %xmm1
	ja	LBB0_2
	movaps	%xmm3, %xmm2
LBB0_2:
	movaps	%xmm2, %xmm0

after:
_test:
	cmpltsd	%xmm1, %xmm0
	andpd	%xmm0, %xmm2
	andnpd	%xmm3, %xmm0
	orpd	%xmm2, %xmm0

Small speedup on Benchmarks/SmallPT

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187706 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2013-08-04 12:05:16 +00:00
parent f3e3417e65
commit 75311b7b4d
8 changed files with 381 additions and 118 deletions

View File

@ -517,37 +517,6 @@ to <2 x i64> ops being so bad.
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
'select' on vectors and scalars could be a whole lot better. We currently
lower them to conditional branches. On x86-64 for example, we compile this:
double test(double a, double b, double c, double d) { return a<b ? c : d; }
to:
_test:
ucomisd %xmm0, %xmm1
ja LBB1_2 # entry
LBB1_1: # entry
movapd %xmm3, %xmm2
LBB1_2: # entry
movapd %xmm2, %xmm0
ret
instead of:
_test:
cmpltsd %xmm1, %xmm0
andpd %xmm0, %xmm2
andnpd %xmm3, %xmm0
orpd %xmm2, %xmm0
ret
For unpredictable branches, the later is much more efficient. This should
just be a matter of having scalar sse map to SELECT_CC and custom expanding
or iseling it.
//===---------------------------------------------------------------------===//
LLVM currently generates stack realignment code, when it is not necessary LLVM currently generates stack realignment code, when it is not necessary
needed. The problem is that we need to know about stack alignment too early, needed. The problem is that we need to know about stack alignment too early,
before RA runs. before RA runs.

View File

@ -9488,6 +9488,51 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
return SDValue(); return SDValue();
} }
/// \brief - Turns an ISD::CondCode into a value suitable for SSE floating point
/// mask CMPs.
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
SDValue &Op1) {
unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
// 0 - EQ
// 1 - LT
// 2 - LE
// 3 - UNORD
// 4 - NEQ
// 5 - NLT
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; // Fallthrough
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ:
case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
return SSECC;
}
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128 // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back. // ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
@ -9535,43 +9580,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
assert(EltVT == MVT::f32 || EltVT == MVT::f64); assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif #endif
unsigned SSECC; unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
bool Swap = false;
// SSE Condition code mapping:
// 0 - EQ
// 1 - LT
// 2 - LE
// 3 - UNORD
// 4 - NEQ
// 5 - NLT
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; // Fallthrough
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ:
case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons. // In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) { if (SSECC == 8) {
@ -9832,8 +9841,30 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Op1 = Op.getOperand(1); SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2); SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op); SDLoc DL(Op);
EVT VT = Op1.getValueType();
SDValue CC; SDValue CC;
// Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available. Otherwise fp cmovs get lowered into a less efficient branch
// sequence later on.
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
}
}
if (Cond.getOpcode() == ISD::SETCC) { if (Cond.getOpcode() == ISD::SETCC) {
SDValue NewCond = LowerSETCC(Cond, DAG); SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode()) if (NewCond.getNode())
@ -12980,6 +13011,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SHLD: return "X86ISD::SHLD"; case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD"; case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND"; case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FSRL: return "X86ISD::FSRL"; case X86ISD::FSRL: return "X86ISD::FSRL";
@ -17760,6 +17792,19 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue(); return SDValue();
} }
/// PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
// FANDN(x, 0.0) -> 0.0
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDValue();
}
static SDValue PerformBTCombine(SDNode *N, static SDValue PerformBTCombine(SDNode *N,
SelectionDAG &DAG, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) { TargetLowering::DAGCombinerInfo &DCI) {
@ -18214,6 +18259,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMIN: case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND: case ISD::ANY_EXTEND:

View File

@ -53,6 +53,10 @@ namespace llvm {
/// to X86::XORPS or X86::XORPD. /// to X86::XORPS or X86::XORPD.
FXOR, FXOR,
/// FAND - Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
/// FSRL - Bitwise logical right shift of floating point values. These /// FSRL - Bitwise logical right shift of floating point values. These
/// corresponds to X86::PSRLDQ. /// corresponds to X86::PSRLDQ.
FSRL, FSRL,

View File

@ -47,6 +47,8 @@ def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>; [SDNPCommutative, SDNPAssociative]>;
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>; [SDNPCommutative, SDNPAssociative]>;
def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;

View File

@ -2843,8 +2843,8 @@ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
SSE_BIT_ITINS_P>; SSE_BIT_ITINS_P>;
let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in let isCommutable = 0 in
defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
SSE_BIT_ITINS_P>; SSE_BIT_ITINS_P>;
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops

View File

@ -3,74 +3,124 @@
; Convert oeq and une to ole/oge/ule/uge when comparing with infinity ; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
; and negative infinity, because those are more efficient on x86. ; and negative infinity, because those are more efficient on x86.
declare void @f() nounwind
; CHECK-LABEL: oeq_inff: ; CHECK-LABEL: oeq_inff:
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jb ; CHECK: jb
define float @oeq_inff(float %x, float %y) nounwind readonly { define void @oeq_inff(float %x) nounwind {
%t0 = fcmp oeq float %x, 0x7FF0000000000000 %t0 = fcmp oeq float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y br i1 %t0, label %true, label %false
ret float %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: oeq_inf: ; CHECK-LABEL: oeq_inf:
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jb ; CHECK: jb
define double @oeq_inf(double %x, double %y) nounwind readonly { define void @oeq_inf(double %x) nounwind {
%t0 = fcmp oeq double %x, 0x7FF0000000000000 %t0 = fcmp oeq double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y br i1 %t0, label %true, label %false
ret double %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: une_inff: ; CHECK-LABEL: une_inff:
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jae ; CHECK: jae
define float @une_inff(float %x, float %y) nounwind readonly { define void @une_inff(float %x) nounwind {
%t0 = fcmp une float %x, 0x7FF0000000000000 %t0 = fcmp une float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y br i1 %t0, label %true, label %false
ret float %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: une_inf: ; CHECK-LABEL: une_inf:
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jae ; CHECK: jae
define double @une_inf(double %x, double %y) nounwind readonly { define void @une_inf(double %x) nounwind {
%t0 = fcmp une double %x, 0x7FF0000000000000 %t0 = fcmp une double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y br i1 %t0, label %true, label %false
ret double %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: oeq_neg_inff: ; CHECK-LABEL: oeq_neg_inff:
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jb ; CHECK: jb
define float @oeq_neg_inff(float %x, float %y) nounwind readonly { define void @oeq_neg_inff(float %x) nounwind {
%t0 = fcmp oeq float %x, 0xFFF0000000000000 %t0 = fcmp oeq float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y br i1 %t0, label %true, label %false
ret float %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: oeq_neg_inf: ; CHECK-LABEL: oeq_neg_inf:
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jb ; CHECK: jb
define double @oeq_neg_inf(double %x, double %y) nounwind readonly { define void @oeq_neg_inf(double %x) nounwind {
%t0 = fcmp oeq double %x, 0xFFF0000000000000 %t0 = fcmp oeq double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y br i1 %t0, label %true, label %false
ret double %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: une_neg_inff: ; CHECK-LABEL: une_neg_inff:
; CHECK: ucomiss ; CHECK: ucomiss
; CHECK: jae ; CHECK: jae
define float @une_neg_inff(float %x, float %y) nounwind readonly { define void @une_neg_inff(float %x) nounwind {
%t0 = fcmp une float %x, 0xFFF0000000000000 %t0 = fcmp une float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y br i1 %t0, label %true, label %false
ret float %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }
; CHECK-LABEL: une_neg_inf: ; CHECK-LABEL: une_neg_inf:
; CHECK: ucomisd ; CHECK: ucomisd
; CHECK: jae ; CHECK: jae
define double @une_neg_inf(double %x, double %y) nounwind readonly { define void @une_neg_inf(double %x) nounwind {
%t0 = fcmp une double %x, 0xFFF0000000000000 %t0 = fcmp une double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y br i1 %t0, label %true, label %false
ret double %t1
true:
call void @f() nounwind
br label %false
false:
ret void
} }

View File

@ -0,0 +1,185 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s
define double @test1(double %a, double %b, double %eps) {
%cmp = fcmp olt double %a, %eps
%cond = select i1 %cmp, double %b, double 0.000000e+00
ret double %cond
; CHECK-LABEL: @test1
; CHECK: cmpltsd %xmm2, %xmm0
; CHECK-NEXT: andpd %xmm1, %xmm0
}
define double @test2(double %a, double %b, double %eps) {
%cmp = fcmp ole double %a, %eps
%cond = select i1 %cmp, double %b, double 0.000000e+00
ret double %cond
; CHECK-LABEL: @test2
; CHECK: cmplesd %xmm2, %xmm0
; CHECK-NEXT: andpd %xmm1, %xmm0
}
define double @test3(double %a, double %b, double %eps) {
%cmp = fcmp ogt double %a, %eps
%cond = select i1 %cmp, double %b, double 0.000000e+00
ret double %cond
; CHECK-LABEL: @test3
; CHECK: cmpltsd %xmm0, %xmm2
; CHECK-NEXT: andpd %xmm1, %xmm2
}
define double @test4(double %a, double %b, double %eps) {
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double %b, double 0.000000e+00
ret double %cond
; CHECK-LABEL: @test4
; CHECK: cmplesd %xmm0, %xmm2
; CHECK-NEXT: andpd %xmm1, %xmm2
}
define double @test5(double %a, double %b, double %eps) {
%cmp = fcmp olt double %a, %eps
%cond = select i1 %cmp, double 0.000000e+00, double %b
ret double %cond
; CHECK-LABEL: @test5
; CHECK: cmpltsd %xmm2, %xmm0
; CHECK-NEXT: andnpd %xmm1, %xmm0
}
define double @test6(double %a, double %b, double %eps) {
%cmp = fcmp ole double %a, %eps
%cond = select i1 %cmp, double 0.000000e+00, double %b
ret double %cond
; CHECK-LABEL: @test6
; CHECK: cmplesd %xmm2, %xmm0
; CHECK-NEXT: andnpd %xmm1, %xmm0
}
define double @test7(double %a, double %b, double %eps) {
%cmp = fcmp ogt double %a, %eps
%cond = select i1 %cmp, double 0.000000e+00, double %b
ret double %cond
; CHECK-LABEL: @test7
; CHECK: cmpltsd %xmm0, %xmm2
; CHECK-NEXT: andnpd %xmm1, %xmm2
}
define double @test8(double %a, double %b, double %eps) {
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double 0.000000e+00, double %b
ret double %cond
; CHECK-LABEL: @test8
; CHECK: cmplesd %xmm0, %xmm2
; CHECK-NEXT: andnpd %xmm1, %xmm2
}
define float @test9(float %a, float %b, float %eps) {
%cmp = fcmp olt float %a, %eps
%cond = select i1 %cmp, float %b, float 0.000000e+00
ret float %cond
; CHECK-LABEL: @test9
; CHECK: cmpltss %xmm2, %xmm0
; CHECK-NEXT: andps %xmm1, %xmm0
}
define float @test10(float %a, float %b, float %eps) {
%cmp = fcmp ole float %a, %eps
%cond = select i1 %cmp, float %b, float 0.000000e+00
ret float %cond
; CHECK-LABEL: @test10
; CHECK: cmpless %xmm2, %xmm0
; CHECK-NEXT: andps %xmm1, %xmm0
}
define float @test11(float %a, float %b, float %eps) {
%cmp = fcmp ogt float %a, %eps
%cond = select i1 %cmp, float %b, float 0.000000e+00
ret float %cond
; CHECK-LABEL: @test11
; CHECK: cmpltss %xmm0, %xmm2
; CHECK-NEXT: andps %xmm1, %xmm2
}
define float @test12(float %a, float %b, float %eps) {
%cmp = fcmp oge float %a, %eps
%cond = select i1 %cmp, float %b, float 0.000000e+00
ret float %cond
; CHECK-LABEL: @test12
; CHECK: cmpless %xmm0, %xmm2
; CHECK-NEXT: andps %xmm1, %xmm2
}
define float @test13(float %a, float %b, float %eps) {
%cmp = fcmp olt float %a, %eps
%cond = select i1 %cmp, float 0.000000e+00, float %b
ret float %cond
; CHECK-LABEL: @test13
; CHECK: cmpltss %xmm2, %xmm0
; CHECK-NEXT: andnps %xmm1, %xmm0
}
define float @test14(float %a, float %b, float %eps) {
%cmp = fcmp ole float %a, %eps
%cond = select i1 %cmp, float 0.000000e+00, float %b
ret float %cond
; CHECK-LABEL: @test14
; CHECK: cmpless %xmm2, %xmm0
; CHECK-NEXT: andnps %xmm1, %xmm0
}
define float @test15(float %a, float %b, float %eps) {
%cmp = fcmp ogt float %a, %eps
%cond = select i1 %cmp, float 0.000000e+00, float %b
ret float %cond
; CHECK-LABEL: @test15
; CHECK: cmpltss %xmm0, %xmm2
; CHECK-NEXT: andnps %xmm1, %xmm2
}
define float @test16(float %a, float %b, float %eps) {
%cmp = fcmp oge float %a, %eps
%cond = select i1 %cmp, float 0.000000e+00, float %b
ret float %cond
; CHECK-LABEL: @test16
; CHECK: cmpless %xmm0, %xmm2
; CHECK-NEXT: andnps %xmm1, %xmm2
}
define float @test17(float %a, float %b, float %c, float %eps) {
%cmp = fcmp oge float %a, %eps
%cond = select i1 %cmp, float %c, float %b
ret float %cond
; CHECK-LABEL: @test17
; CHECK: cmpless %xmm0, %xmm3
; CHECK-NEXT: andps %xmm3, %xmm2
; CHECK-NEXT: andnps %xmm1, %xmm3
; CHECK-NEXT: orps %xmm2, %xmm3
}
define double @test18(double %a, double %b, double %c, double %eps) {
%cmp = fcmp oge double %a, %eps
%cond = select i1 %cmp, double %c, double %b
ret double %cond
; CHECK-LABEL: @test18
; CHECK: cmplesd %xmm0, %xmm3
; CHECK-NEXT: andpd %xmm3, %xmm2
; CHECK-NEXT: andnpd %xmm1, %xmm3
; CHECK-NEXT: orpd %xmm2, %xmm3
}

View File

@ -77,7 +77,7 @@ define double @olt_inverse(double %x, double %y) nounwind {
} }
; CHECK-LABEL: oge: ; CHECK-LABEL: oge:
; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge: ; UNSAFE-LABEL: oge:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -91,7 +91,7 @@ define double @oge(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ole: ; CHECK-LABEL: ole:
; CHECK-NEXT: ucomisd %xmm0, %xmm1 ; CHECK: cmplesd %xmm1
; UNSAFE-LABEL: ole: ; UNSAFE-LABEL: ole:
; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
; FINITE-LABEL: ole: ; FINITE-LABEL: ole:
@ -103,7 +103,7 @@ define double @ole(double %x, double %y) nounwind {
} }
; CHECK-LABEL: oge_inverse: ; CHECK-LABEL: oge_inverse:
; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_inverse: ; UNSAFE-LABEL: oge_inverse:
; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -118,7 +118,7 @@ define double @oge_inverse(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ole_inverse: ; CHECK-LABEL: ole_inverse:
; CHECK-NEXT: ucomisd %xmm0, %xmm1 ; CHECK: cmplesd %xmm1
; UNSAFE-LABEL: ole_inverse: ; UNSAFE-LABEL: ole_inverse:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -213,7 +213,8 @@ define double @olt_inverse_x(double %x) nounwind {
} }
; CHECK-LABEL: oge_x: ; CHECK-LABEL: oge_x:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmplesd %xmm
; CHECK-NEXT: andpd
; UNSAFE-LABEL: oge_x: ; UNSAFE-LABEL: oge_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
@ -230,7 +231,8 @@ define double @oge_x(double %x) nounwind {
} }
; CHECK-LABEL: ole_x: ; CHECK-LABEL: ole_x:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmplesd %xmm
; CHECK-NEXT: andpd
; UNSAFE-LABEL: ole_x: ; UNSAFE-LABEL: ole_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
@ -247,7 +249,8 @@ define double @ole_x(double %x) nounwind {
} }
; CHECK-LABEL: oge_inverse_x: ; CHECK-LABEL: oge_inverse_x:
; CHECK: ucomisd %xmm ; CHECK: cmplesd %xmm
; CHECK-NEXT: andnpd
; UNSAFE-LABEL: oge_inverse_x: ; UNSAFE-LABEL: oge_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
@ -265,7 +268,7 @@ define double @oge_inverse_x(double %x) nounwind {
} }
; CHECK-LABEL: ole_inverse_x: ; CHECK-LABEL: ole_inverse_x:
; CHECK: ucomisd %xmm ; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_inverse_x: ; UNSAFE-LABEL: ole_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
@ -283,7 +286,7 @@ define double @ole_inverse_x(double %x) nounwind {
} }
; CHECK-LABEL: ugt: ; CHECK-LABEL: ugt:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmpnlesd %xmm1
; UNSAFE-LABEL: ugt: ; UNSAFE-LABEL: ugt:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -297,7 +300,7 @@ define double @ugt(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ult: ; CHECK-LABEL: ult:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult: ; UNSAFE-LABEL: ult:
; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -311,7 +314,7 @@ define double @ult(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ugt_inverse: ; CHECK-LABEL: ugt_inverse:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmpnlesd %xmm1
; UNSAFE-LABEL: ugt_inverse: ; UNSAFE-LABEL: ugt_inverse:
; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -326,7 +329,7 @@ define double @ugt_inverse(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ult_inverse: ; CHECK-LABEL: ult_inverse:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult_inverse: ; UNSAFE-LABEL: ult_inverse:
; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -405,7 +408,8 @@ define double @ule_inverse(double %x, double %y) nounwind {
} }
; CHECK-LABEL: ugt_x: ; CHECK-LABEL: ugt_x:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmpnlesd %xmm
; CHECK-NEXT: andpd
; UNSAFE-LABEL: ugt_x: ; UNSAFE-LABEL: ugt_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
@ -422,7 +426,8 @@ define double @ugt_x(double %x) nounwind {
} }
; CHECK-LABEL: ult_x: ; CHECK-LABEL: ult_x:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmpnlesd %xmm
; CHECK-NEXT: andpd
; UNSAFE-LABEL: ult_x: ; UNSAFE-LABEL: ult_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
@ -439,7 +444,8 @@ define double @ult_x(double %x) nounwind {
} }
; CHECK-LABEL: ugt_inverse_x: ; CHECK-LABEL: ugt_inverse_x:
; CHECK: ucomisd %xmm ; CHECK: cmpnlesd %xmm
; CHECK-NEXT: andnpd
; UNSAFE-LABEL: ugt_inverse_x: ; UNSAFE-LABEL: ugt_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: minsd %xmm0, %xmm1 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
@ -457,7 +463,8 @@ define double @ugt_inverse_x(double %x) nounwind {
} }
; CHECK-LABEL: ult_inverse_x: ; CHECK-LABEL: ult_inverse_x:
; CHECK: ucomisd %xmm ; CHECK: cmpnlesd %xmm
; CHECK-NEXT: andnpd
; UNSAFE-LABEL: ult_inverse_x: ; UNSAFE-LABEL: ult_inverse_x:
; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
; UNSAFE-NEXT: maxsd %xmm0, %xmm1 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
@ -623,7 +630,7 @@ define double @olt_inverse_y(double %x) nounwind {
} }
; CHECK-LABEL: oge_y: ; CHECK-LABEL: oge_y:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_y: ; UNSAFE-LABEL: oge_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -637,7 +644,7 @@ define double @oge_y(double %x) nounwind {
} }
; CHECK-LABEL: ole_y: ; CHECK-LABEL: ole_y:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_y: ; UNSAFE-LABEL: ole_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -651,7 +658,7 @@ define double @ole_y(double %x) nounwind {
} }
; CHECK-LABEL: oge_inverse_y: ; CHECK-LABEL: oge_inverse_y:
; CHECK: ucomisd %xmm ; CHECK: cmplesd %xmm0
; UNSAFE-LABEL: oge_inverse_y: ; UNSAFE-LABEL: oge_inverse_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -667,7 +674,7 @@ define double @oge_inverse_y(double %x) nounwind {
} }
; CHECK-LABEL: ole_inverse_y: ; CHECK-LABEL: ole_inverse_y:
; CHECK: ucomisd %xmm ; CHECK: cmplesd %xmm
; UNSAFE-LABEL: ole_inverse_y: ; UNSAFE-LABEL: ole_inverse_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -683,7 +690,7 @@ define double @ole_inverse_y(double %x) nounwind {
} }
; CHECK-LABEL: ugt_y: ; CHECK-LABEL: ugt_y:
; CHECK: ucomisd %xmm0, %xmm1 ; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ugt_y: ; UNSAFE-LABEL: ugt_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -697,7 +704,7 @@ define double @ugt_y(double %x) nounwind {
} }
; CHECK-LABEL: ult_y: ; CHECK-LABEL: ult_y:
; CHECK: ucomisd %xmm1, %xmm0 ; CHECK: cmpnlesd %xmm0
; UNSAFE-LABEL: ult_y: ; UNSAFE-LABEL: ult_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -711,7 +718,7 @@ define double @ult_y(double %x) nounwind {
} }
; CHECK-LABEL: ugt_inverse_y: ; CHECK-LABEL: ugt_inverse_y:
; CHECK: ucomisd %xmm ; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ugt_inverse_y: ; UNSAFE-LABEL: ugt_inverse_y:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret
@ -727,7 +734,7 @@ define double @ugt_inverse_y(double %x) nounwind {
} }
; CHECK-LABEL: ult_inverse_y: ; CHECK-LABEL: ult_inverse_y:
; CHECK: ucomisd %xmm ; CHECK: cmpnlesd %xmm
; UNSAFE-LABEL: ult_inverse_y: ; UNSAFE-LABEL: ult_inverse_y:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0 ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
; UNSAFE-NEXT: ret ; UNSAFE-NEXT: ret