From 6f4f46cf112567e273504b41b47301f922347577 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Tue, 25 Mar 2014 17:47:06 +0000 Subject: [PATCH] [X86] Generate VPSHUFB for in-place v16i16 shuffles This used to resort to splitting the 256-bit operation into two 128-bit shuffles and then recombining the results. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 25 +++++++++++++++++++++++++ test/CodeGen/X86/vec_shuffle-40.ll | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 test/CodeGen/X86/vec_shuffle-40.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 14d7be31306..ec923ba7aa2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6576,6 +6576,25 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, return NewV; } +/// \brief v16i16 shuffles +/// +/// FIXME: We only support generation of a single pshufb currently. We can +/// generalize the other applicable cases from LowerVECTOR_SHUFFLEv8i16 as +/// well (e.g 2 x pshufb + 1 x por). +static SDValue +LowerVECTOR_SHUFFLEv16i16(SDValue Op, SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast(Op); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + SDLoc dl(SVOp); + + if (V2.getOpcode() != ISD::UNDEF) + return SDValue(); + + SmallVector MaskVals(SVOp->getMask().begin(), SVOp->getMask().end()); + return getPSHUFB(MaskVals, V1, dl, DAG); +} + // v16i8 shuffles - Prefer shuffles in the following order: // 1. [ssse3] 1 x pshufb // 2. [ssse3] 2 x pshufb + 1 x por @@ -7635,6 +7654,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } + if (VT == MVT::v16i16 && Subtarget->hasInt256()) { + SDValue NewOp = LowerVECTOR_SHUFFLEv16i16(Op, DAG); + if (NewOp.getNode()) + return NewOp; + } + if (VT == MVT::v16i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG); if (NewOp.getNode()) diff --git a/test/CodeGen/X86/vec_shuffle-40.ll b/test/CodeGen/X86/vec_shuffle-40.ll new file mode 100644 index 00000000000..75b45e3df11 --- /dev/null +++ b/test/CodeGen/X86/vec_shuffle-40.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s + +define void @shuffle_v16i16(<16 x i16>* %a) { +; CHECK-LABEL: shuffle_v16i16: +; CHECK: vpshufb {{.*}}%ymm +; CHECK-NOT: vpshufb {{.*}}%xmm +entry: + %0 = load <16 x i16>* %a, align 32 + %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + store <16 x i16> %shuffle, <16 x i16>* %a, align 32 + ret void +} + +define void @shuffle_v16i16_lanecrossing(<16 x i16>* %a) { +; CHECK-LABEL: shuffle_v16i16_lanecrossing: +; CHECK-NOT: vpshufb {{.*}}%ymm +entry: + %0 = load <16 x i16>* %a, align 32 + %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> + store <16 x i16> %shuffle, <16 x i16>* %a, align 32 + ret void +}