From 293f71ddd2a821a6aad4e84efb41f01ac4ad9035 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Tue, 13 Jan 2015 22:20:18 +0000 Subject: [PATCH] [AVX512] Unpack support in new shuffle lowering This now handles both 32 and 64-bit element sizes. In this version, the test are in vector-shuffle-512-v8.ll, canonicalized by Chandler's update_llc_test_checks.py. Part of git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++++++ test/CodeGen/X86/vector-shuffle-512-v8.ll | 36 +++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c36bd52fc55..a1fd34ea800 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10750,6 +10750,13 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + // X86 has dedicated unpack instructions that can handle specific blend + // operations: UNPCKH and UNPCKL. + if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); + if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); } @@ -10765,6 +10772,16 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + // Use dedicated unpack instructions for masks that match their pattern. + if (isShuffleEquivalent(Mask, + 0, 16, 1, 17, 4, 20, 5, 21, + 8, 24, 9, 25, 12, 28, 13, 29)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); + if (isShuffleEquivalent(Mask, + 2, 18, 3, 19, 6, 22, 7, 23, + 10, 26, 11, 27, 14, 30, 15, 31)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); } @@ -10780,6 +10797,13 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + // X86 has dedicated unpack instructions that can handle specific blend + // operations: UNPCKH and UNPCKL. + if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); + if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); } @@ -10795,6 +10819,16 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + // Use dedicated unpack instructions for masks that match their pattern. + if (isShuffleEquivalent(Mask, + 0, 16, 1, 17, 4, 20, 5, 21, + 8, 24, 9, 25, 12, 28, 13, 29)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); + if (isShuffleEquivalent(Mask, + 2, 18, 3, 19, 6, 22, 7, 23, + 10, 26, 11, 27, 14, 30, 15, 31)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); + // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); } diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 050fb34b661..e0ec567cd71 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1418,3 +1418,39 @@ define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) { %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle } + +define <8 x double> @shuffle_v4f64_082a4c6e(<8 x double> %a, <8 x double> %b) { +; ALL-LABEL: shuffle_v4f64_082a4c6e: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; ALL-NEXT: retq + %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + ret <8 x double> %shuffle +} + +define <8 x i64> @shuffle_v4i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) { +; ALL-LABEL: shuffle_v4i64_082a4c6e: +; ALL: # BB#0: +; ALL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; ALL-NEXT: retq + %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> + ret <8 x i64> %shuffle +} + +define <8 x double> @shuffle_v4f64_193b5d7f(<8 x double> %a, <8 x double> %b) { +; ALL-LABEL: shuffle_v4f64_193b5d7f: +; ALL: # BB#0: +; ALL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; ALL-NEXT: retq + %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> + ret <8 x double> %shuffle +} + +define <8 x i64> @shuffle_v4i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) { +; ALL-LABEL: shuffle_v4i64_193b5d7f: +; ALL: # BB#0: +; ALL-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; ALL-NEXT: retq + %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> + ret <8 x i64> %shuffle +}