From bb539bf973bb861ffe8179ac8791fad19448cd52 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Nov 2011 13:21:28 +0000 Subject: [PATCH] Add AVX2 support for vselect of v32i8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144187 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 ++ lib/Target/X86/X86InstrSSE.td | 6 ++++++ test/CodeGen/X86/avx2-logic.ll | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e5d3c91fd00..c34f225dc53 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1050,6 +1050,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); + + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); // Don't lower v32i8 because there is no 128-bit byte mul } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8648d48cdb9..068e223e59e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6568,6 +6568,12 @@ let Predicates = [HasAVX] in { (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; } +let Predicates = [HasAVX2] in { + def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), + (v32i8 VR256:$src2))), + (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; +} + /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll index a763bc0010c..944849cf4ff 100644 --- a/test/CodeGen/X86/avx2-logic.ll +++ b/test/CodeGen/X86/avx2-logic.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s +; CHECK: vpandn ; CHECK: vpandn %ymm +; CHECK: ret define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: ; Force the execution domain with an add. @@ -10,7 +12,9 @@ entry: ret <4 x i64> %x } +; CHECK: vpand ; CHECK: vpand %ymm +; CHECK: ret define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: ; Force the execution domain with an add. @@ -19,7 +23,9 @@ entry: ret <4 x i64> %x } +; CHECK: vpor ; CHECK: vpor %ymm +; CHECK: ret define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: ; Force the execution domain with an add. @@ -28,7 +34,9 @@ entry: ret <4 x i64> %x } +; CHECK: vpxor ; CHECK: vpxor %ymm +; CHECK: ret define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { entry: ; Force the execution domain with an add. @@ -36,3 +44,14 @@ entry: %x = xor <4 x i64> %a2, %b ret <4 x i64> %x } + + + +; CHECK: vpblendvb +; CHECK: vpblendvb %ymm +; CHECK: ret +define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) { + %min_is_x = icmp ult <32 x i8> %x, %y + %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y + ret <32 x i8> %min +}