diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e261fca9105..dd78aa943f7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -989,6 +989,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i32, Custom); setOperationAction(ISD::SRA, MVT::v16i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); + setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); setOperationAction(ISD::VSETCC, MVT::v4i64, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d189ed37acd..d1f74842c37 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4889,6 +4889,11 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (VPCMPEQQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (VPCMPEQQrm VR128:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -5099,9 +5104,16 @@ multiclass SS42I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX] in { defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; + + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (VPCMPGTQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (VPCMPGTQrm VR128:$src1, addr:$src2)>; +} + let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; @@ -5660,6 +5672,11 @@ def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), + (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), + (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; + //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores diff --git a/test/CodeGen/X86/avx-cmp.ll b/test/CodeGen/X86/avx-cmp.ll index 7782b5a82ed..53310b290d9 100644 --- a/test/CodeGen/X86/avx-cmp.ll +++ b/test/CodeGen/X86/avx-cmp.ll @@ -53,3 +53,80 @@ define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone { ret <8 x i32> %x } +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpgtq %xmm +; CHECK-NEXT: vpcmpgtq %xmm +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %bincmp = icmp slt <4 x i64> %i, %j + %x = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpgtw %xmm +; CHECK-NEXT: vpcmpgtw %xmm +; CHECK-NEXT: vinsertf128 $1 +define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp slt <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpgtb %xmm +; CHECK-NEXT: vpcmpgtb %xmm +; CHECK-NEXT: vinsertf128 $1 +define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp slt <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpeqd %xmm +; CHECK-NEXT: vpcmpeqd %xmm +; CHECK-NEXT: vinsertf128 $1 +define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone { + %bincmp = icmp eq <8 x i32> %i, %j + %x = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpeqq %xmm +; CHECK-NEXT: vpcmpeqq %xmm +; CHECK-NEXT: vinsertf128 $1 +define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { + %bincmp = icmp eq <4 x i64> %i, %j + %x = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpeqw %xmm +; CHECK-NEXT: vpcmpeqw %xmm +; CHECK-NEXT: vinsertf128 $1 +define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone { + %bincmp = icmp eq <16 x i16> %i, %j + %x = sext <16 x i1> %bincmp to <16 x i16> + ret <16 x i16> %x +} + +; CHECK: vextractf128 $1 +; CHECK: vextractf128 $1 +; CHECK-NEXT: vpcmpeqb %xmm +; CHECK-NEXT: vpcmpeqb %xmm +; CHECK-NEXT: vinsertf128 $1 +define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone { + %bincmp = icmp eq <32 x i8> %i, %j + %x = sext <32 x i1> %bincmp to <32 x i8> + ret <32 x i8> %x +} + diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index 57ff01b59f2..38b2ff3b069 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -1,10 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd -; CHECK: vextractf128 $0 -; CHECK-NEXT: punpcklbw -; CHECK-NEXT: punpckhbw +; CHECK: vpunpcklbw %xmm +; CHECK-NEXT: vpunpckhbw %xmm ; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vpermilps $85 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { @@ -13,8 +11,7 @@ entry: ret <32 x i8> %shuffle } -; CHECK: vextractf128 $0 -; CHECK-NEXT: punpckhwd +; CHECK: vpunpckhwd %xmm ; CHECK-NEXT: vinsertf128 $1 ; CHECK-NEXT: vpermilps $85 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {