diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 69de3a7513b..e5d3c91fd00 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8560,8 +8560,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); - } - else if (SetCCOpcode == ISD::SETONE) { + } else if (SetCCOpcode == ISD::SETONE) { SDValue ORD, NEQ; ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); @@ -8574,7 +8573,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { } // Break 256-bit integer vector compare into smaller ones. - if (!isFP && VT.getSizeInBits() == 256) + if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) return Lower256IntVSETCC(Op, DAG); // We are handling one of the integer comparisons here. Since SSE only has @@ -8583,12 +8582,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = 0, EQOpc = 0, GTOpc = 0; bool Swap = false, Invert = false, FlipSigns = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { default: break; - case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; - case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; + case MVT::i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; + case MVT::i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; + case MVT::i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; + case MVT::i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } switch (SetCCOpcode) { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4b6ba5d6932..8648d48cdb9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3917,6 +3917,32 @@ let Predicates = [HasAVX2] in { VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d, VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)), + (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, (memop addr:$src2))), + (VPCMPEQBYrm VR256:$src1, addr:$src2)>; + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)), + (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, (memop addr:$src2))), + (VPCMPEQWYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)), + (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, (memop addr:$src2))), + (VPCMPEQDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)), + (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, (memop addr:$src2))), + (VPCMPGTBYrm VR256:$src1, addr:$src2)>; + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)), + (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, (memop addr:$src2))), + (VPCMPGTWYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)), + (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, (memop addr:$src2))), + (VPCMPGTDYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -6325,6 +6351,11 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pmaxu_w>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", int_x86_avx2_pmul_dq>, VEX_4V; + + def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, VR256:$src2)), + (VPCMPEQQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, (memop addr:$src2))), + (VPCMPEQQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -6647,6 +6678,11 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>, VEX_4V; + + def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, VR256:$src2)), + (VPCMPGTQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, (memop addr:$src2))), + (VPCMPGTQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll new file mode 100644 index 00000000000..df30d9efed1 --- /dev/null +++ b/test/CodeGen/X86/avx2-cmp.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s + +; CHECK: vpcmpgtd %ymm +define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %bincmp = icmp slt <8 x i32> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %x +} + +; CHECK: vpcmpgtq %ymm +define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %bincmp = icmp slt <4 x i64> %i, %j + %x = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %x +} + +; CHECK: vpcmpgtw %ymm +define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp slt <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +; CHECK: vpcmpgtb %ymm +define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp slt <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} + +; CHECK: vpcmpeqd %ymm +define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %bincmp = icmp eq <8 x i32> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %x +} + +; CHECK: vpcmpeqq %ymm +define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %bincmp = icmp eq <4 x i64> %i, %j + %x = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %x +} + +; CHECK: vpcmpeqw %ymm +define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp eq <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +; CHECK: vpcmpeqb %ymm +define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp eq <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} +