[SKX] Enable lowering of integer CMP operations.

Added new types to Legalizer.
Fixed getSetCCResultType function
Added lowering tests.

Reviewed by Elena Demikhovsky.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216717 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robert Khasanov 2014-08-29 08:46:04 +00:00
parent d2323fc295
commit 37e671e894
5 changed files with 1008 additions and 9 deletions

View File

@ -1526,8 +1526,39 @@ void X86TargetLowering::resetOperationActions() {
}// has AVX-512 }// has AVX-512
if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) { if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
// Do not attempt to promote non-256-bit vectors
if (!VT.is512BitVector())
continue;
if ( EltSize < 32) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
}
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
} }
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
@ -1665,11 +1696,41 @@ EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
const unsigned NumElts = VT.getVectorNumElements();
const EVT EltVT = VT.getVectorElementType();
if (VT.is512BitVector()) {
if (Subtarget->hasAVX512()) if (Subtarget->hasAVX512())
switch(VT.getVectorNumElements()) { if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 8: return MVT::v8i1; case 8: return MVT::v8i1;
case 16: return MVT::v16i1; case 16: return MVT::v16i1;
} }
if (Subtarget->hasBWI())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 32: return MVT::v32i1;
case 64: return MVT::v64i1;
}
}
if (VT.is256BitVector() || VT.is128BitVector()) {
if (Subtarget->hasVLX())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 2: return MVT::v2i1;
case 4: return MVT::v4i1;
case 8: return MVT::v8i1;
}
if (Subtarget->hasBWI() && Subtarget->hasVLX())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
case 32: return MVT::v32i1;
}
}
return VT.changeVectorElementTypeToInteger(); return VT.changeVectorElementTypeToInteger();
} }
@ -10435,6 +10496,8 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
break; break;
case MVT::v8i16: case MVT::v8i16:
case MVT::v16i16: case MVT::v16i16:
if (Subtarget->hasBWI() && Subtarget->hasVLX())
break;
return SDValue(); return SDValue();
} }
@ -12829,7 +12892,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType(); MVT VT = Op.getSimpleValueType();
SDLoc dl(Op); SDLoc dl(Op);
assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 && assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
Op.getValueType().getScalarType() == MVT::i1 && Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation"); "Cannot set masked compare for this operation");
@ -12943,11 +13006,12 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
EVT OpVT = Op1.getValueType(); EVT OpVT = Op1.getValueType();
if (Subtarget->hasAVX512()) { if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() || if (Op1.getValueType().is512BitVector() ||
(Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
// In AVX-512 architecture setcc returns mask with i1 elements, // In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements. // But there is no compare instruction for i8 and i16 elements in KNL.
// We are not talking about 512-bit operands in this case, these // We are not talking about 512-bit operands in this case, these
// types are illegal. // types are illegal.
if (MaskResult && if (MaskResult &&
@ -20218,13 +20282,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() && if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) { CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on AVX-512. In this case we convert it to // lowering on KNL. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
// The same situation for all 128 and 256-bit vectors of i8 and i16 // The same situation for all 128 and 256-bit vectors of i8 and i16.
// Since SKX these selects have a proper lowering.
EVT OpVT = LHS.getValueType(); EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) && if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 || (OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16)) { OpVT.getVectorElementType() == MVT::i16) &&
!(Subtarget->hasBWI() && Subtarget->hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond); Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode()); DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS); return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);

View File

@ -162,3 +162,151 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
ret <8 x i64>%res ret <8 x i64>%res
} }
; CHECK-LABEL: @test16
; CHECK: vpcmpled
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y) nounwind {
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
ret <16 x i32> %max
}
; CHECK-LABEL: @test17
; CHECK: vpcmpgtd (%rdi)
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
%y = load <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: @test18
; CHECK: vpcmpled (%rdi)
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
%y = load <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: @test19
; CHECK: vpcmpleud (%rdi)
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
%y = load <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: @test20
; CHECK: vpcmpeqd %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
ret <16 x i32> %max
}
; CHECK-LABEL: @test21
; CHECK: vpcmpleq %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
; CHECK-LABEL: @test22
; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
; CHECK-LABEL: @test23
; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: test24
; CHECK: vpcmpeqq (%rdi){1to8}
; CHECK: vmovdqa64
; CHECK: ret
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
; CHECK-LABEL: test25
; CHECK: vpcmpled (%rdi){1to16}
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: test26
; CHECK: vpcmpgtd (%rdi){1to16}{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask0 = icmp sgt <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
; CHECK-LABEL: test27
; CHECK: vpcmpleq (%rdi){1to8}{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}

View File

@ -0,0 +1,135 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind {
%mask = icmp eq <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: test2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y) nounwind {
%mask = icmp sgt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: @test3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind {
%mask = icmp sge <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
ret <32 x i16> %max
}
; CHECK-LABEL: test4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y) nounwind {
%mask = icmp ugt <64 x i8> %x, %y
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
ret <64 x i8> %max
}
; CHECK-LABEL: test5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
%y = load <32 x i16>* %yp, align 4
%mask = icmp eq <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
%y = load <32 x i16>* %y.ptr, align 4
%mask = icmp sgt <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
%y = load <32 x i16>* %y.ptr, align 4
%mask = icmp sle <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
%y = load <32 x i16>* %y.ptr, align 4
%mask = icmp ule <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}
; CHECK-LABEL: @test9
; CHECK: vpcmpeqw %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
%mask1 = icmp eq <32 x i16> %x1, %y1
%mask0 = icmp eq <32 x i16> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %y
ret <32 x i16> %max
}
; CHECK-LABEL: @test10
; CHECK: vpcmpleb %zmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
%mask1 = icmp sge <64 x i8> %x1, %y1
%mask0 = icmp sle <64 x i8> %x, %y
%mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
ret <64 x i8> %max
}
; CHECK-LABEL: @test11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
%mask1 = icmp sgt <64 x i8> %x1, %y1
%y = load <64 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <64 x i8> %x, %y
%mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
ret <64 x i8> %max
}
; CHECK-LABEL: @test12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
%mask1 = icmp sge <32 x i16> %x1, %y1
%y = load <32 x i16>* %y.ptr, align 4
%mask0 = icmp ule <32 x i16> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
}

View File

@ -0,0 +1,269 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test256_1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind {
%mask = icmp eq <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
ret <32 x i8> %max
}
; CHECK-LABEL: test256_2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
%mask = icmp sgt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounwind {
%mask = icmp sge <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
ret <16 x i16> %max
}
; CHECK-LABEL: test256_4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
%mask = icmp ugt <32 x i8> %x, %y
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: test256_5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
%y = load <16 x i16>* %yp, align 4
%mask = icmp eq <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
%y = load <16 x i16>* %y.ptr, align 4
%mask = icmp sgt <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
%y = load <16 x i16>* %y.ptr, align 4
%mask = icmp sle <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
%y = load <16 x i16>* %y.ptr, align 4
%mask = icmp ule <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_9
; CHECK: vpcmpeqw %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
%mask1 = icmp eq <16 x i16> %x1, %y1
%mask0 = icmp eq <16 x i16> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
ret <16 x i16> %max
}
; CHECK-LABEL: @test256_10
; CHECK: vpcmpleb %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
%mask1 = icmp sge <32 x i8> %x1, %y1
%mask0 = icmp sle <32 x i8> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
%mask1 = icmp sgt <32 x i8> %x1, %y1
%y = load <32 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <32 x i8> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
ret <32 x i8> %max
}
; CHECK-LABEL: @test256_12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
%mask1 = icmp sge <16 x i16> %x1, %y1
%y = load <16 x i16>* %y.ptr, align 4
%mask0 = icmp ule <16 x i16> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
}
; CHECK-LABEL: test128_1
; CHECK: vpcmpeqb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind {
%mask = icmp eq <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %max
}
; CHECK-LABEL: test128_2
; CHECK: vpcmpgtb {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
%mask = icmp sgt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_3
; CHECK: vpcmplew {{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind {
%mask = icmp sge <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
ret <8 x i16> %max
}
; CHECK-LABEL: test128_4
; CHECK: vpcmpnleub {{.*%k[0-7]}}
; CHECK: vmovdqu8 {{.*}}%k1
; CHECK: ret
define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
%mask = icmp ugt <16 x i8> %x, %y
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: test128_5
; CHECK: vpcmpeqw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
%y = load <8 x i16>* %yp, align 4
%mask = icmp eq <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_6
; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
%y = load <8 x i16>* %y.ptr, align 4
%mask = icmp sgt <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_7
; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
%y = load <8 x i16>* %y.ptr, align 4
%mask = icmp sle <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_8
; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
%y = load <8 x i16>* %y.ptr, align 4
%mask = icmp ule <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_9
; CHECK: vpcmpeqw %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
%mask1 = icmp eq <8 x i16> %x1, %y1
%mask0 = icmp eq <8 x i16> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %max
}
; CHECK-LABEL: @test128_10
; CHECK: vpcmpleb %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
%mask1 = icmp sge <16 x i8> %x1, %y1
%mask0 = icmp sle <16 x i8> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_11
; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu8
; CHECK: ret
define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
%mask1 = icmp sgt <16 x i8> %x1, %y1
%y = load <16 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <16 x i8> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
ret <16 x i8> %max
}
; CHECK-LABEL: @test128_12
; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
%mask1 = icmp sge <8 x i16> %x1, %y1
%y = load <8 x i16>* %y.ptr, align 4
%mask0 = icmp ule <8 x i16> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
}

View File

@ -0,0 +1,381 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
; CHECK-LABEL: test256_1
; CHECK: vpcmpeqq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
%mask = icmp eq <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: test256_2
; CHECK: vpcmpgtq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y) nounwind {
%mask = icmp sgt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_3
; CHECK: vpcmpled {{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind {
%mask = icmp sge <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
ret <8 x i32> %max
}
; CHECK-LABEL: test256_4
; CHECK: vpcmpnleuq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y) nounwind {
%mask = icmp ugt <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
ret <4 x i64> %max
}
; CHECK-LABEL: test256_5
; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
%y = load <8 x i32>* %yp, align 4
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_6
; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
%y = load <8 x i32>* %y.ptr, align 4
%mask = icmp sgt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_7
; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
%y = load <8 x i32>* %y.ptr, align 4
%mask = icmp sle <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_8
; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
%y = load <8 x i32>* %y.ptr, align 4
%mask = icmp ule <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_9
; CHECK: vpcmpeqd %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
%mask1 = icmp eq <8 x i32> %x1, %y1
%mask0 = icmp eq <8 x i32> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
; CHECK-LABEL: @test256_10
; CHECK: vpcmpleq %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
%mask1 = icmp sge <4 x i64> %x1, %y1
%mask0 = icmp sle <4 x i64> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_11
; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
%mask1 = icmp sgt <4 x i64> %x1, %y1
%y = load <4 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <4 x i64> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
ret <4 x i64> %max
}
; CHECK-LABEL: @test256_12
; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
%mask1 = icmp sge <8 x i32> %x1, %y1
%y = load <8 x i32>* %y.ptr, align 4
%mask0 = icmp ule <8 x i32> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: test256_13
; CHECK: vpcmpeqq (%rdi){1to4}, %ymm
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
%y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
%mask = icmp eq <4 x i64> %x, %y
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
ret <4 x i64> %max
}
; CHECK-LABEL: test256_14
; CHECK: vpcmpled (%rdi){1to8}, %ymm
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
%y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
%mask = icmp sle <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: test256_15
; CHECK: vpcmpgtd (%rdi){1to8}, %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
%mask1 = icmp sge <8 x i32> %x1, %y1
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
%y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
%mask0 = icmp sgt <8 x i32> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
}
; CHECK-LABEL: test256_16
; CHECK: vpcmpgtq (%rdi){1to4}, %ymm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
%mask1 = icmp sge <4 x i64> %x1, %y1
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
%y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
%mask0 = icmp sgt <4 x i64> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
ret <4 x i64> %max
}
; CHECK-LABEL: test128_1
; CHECK: vpcmpeqq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind {
%mask = icmp eq <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: test128_2
; CHECK: vpcmpgtq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y) nounwind {
%mask = icmp sgt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_3
; CHECK: vpcmpled {{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind {
%mask = icmp sge <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
ret <4 x i32> %max
}
; CHECK-LABEL: test128_4
; CHECK: vpcmpnleuq {{.*%k[0-7]}}
; CHECK: vmovdqa64 {{.*}}%k1
; CHECK: ret
define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y) nounwind {
%mask = icmp ugt <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %max
}
; CHECK-LABEL: test128_5
; CHECK: vpcmpeqd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
%y = load <4 x i32>* %yp, align 4
%mask = icmp eq <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_6
; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
%y = load <4 x i32>* %y.ptr, align 4
%mask = icmp sgt <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_7
; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
%y = load <4 x i32>* %y.ptr, align 4
%mask = icmp sle <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_8
; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
%y = load <4 x i32>* %y.ptr, align 4
%mask = icmp ule <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_9
; CHECK: vpcmpeqd %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
%mask1 = icmp eq <4 x i32> %x1, %y1
%mask0 = icmp eq <4 x i32> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %max
}
; CHECK-LABEL: @test128_10
; CHECK: vpcmpleq %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
%mask1 = icmp sge <2 x i64> %x1, %y1
%mask0 = icmp sle <2 x i64> %x, %y
%mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_11
; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
%mask1 = icmp sgt <2 x i64> %x1, %y1
%y = load <2 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <2 x i64> %x, %y
%mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
ret <2 x i64> %max
}
; CHECK-LABEL: @test128_12
; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
%mask1 = icmp sge <4 x i32> %x1, %y1
%y = load <4 x i32>* %y.ptr, align 4
%mask0 = icmp ule <4 x i32> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: test128_13
; CHECK: vpcmpeqq (%rdi){1to2}, %xmm
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
%y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
%mask = icmp eq <2 x i64> %x, %y
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
ret <2 x i64> %max
}
; CHECK-LABEL: test128_14
; CHECK: vpcmpled (%rdi){1to4}, %xmm
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
%y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
%mask = icmp sle <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: test128_15
; CHECK: vpcmpgtd (%rdi){1to4}, %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
%mask1 = icmp sge <4 x i32> %x1, %y1
%yb = load i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
%y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
%mask0 = icmp sgt <4 x i32> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
}
; CHECK-LABEL: test128_16
; CHECK: vpcmpgtq (%rdi){1to2}, %xmm{{.*{%k[1-7]}}}
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
%mask1 = icmp sge <2 x i64> %x1, %y1
%yb = load i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
%y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
%mask0 = icmp sgt <2 x i64> %x, %y
%mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
ret <2 x i64> %max
}