From 15d82b7d33812d0df391c50e524d789f00fa4a4d Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Thu, 7 Aug 2014 08:11:31 +0000 Subject: [PATCH] [x86] Fix a miscompile in the new shuffle lowering found through the new fuzz testing. The function which tested for adjacency did what it said on the tin, but when I called it, I wanted it to do something more thorough: I wanted to know if the *pairs* of shuffle elements were adjacent and started at 0 mod 2. In one place I had the decency to try to test for this, but in the other it was completely skipped, miscompiling this test case. Fix this by making the helper actually do what I wanted it to do everywhere I called it (and removing the now redundant code in one place). I *really* dislike the name "canWidenShuffleElements" for this predicate. If anyone can come up with a better name, please let me know. The other name I thought about was "canWidenShuffleMask" but is it really widening the mask to reduce the number of lanes shuffled? I don't know. Naming things is hard. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215089 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ test/CodeGen/X86/vector-shuffle-128-v4.ll | 7 +++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 47e3a5a6dac..38cb996b8d7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8234,10 +8234,11 @@ static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } -/// \brief Tiny helper function to test whether adjacent masks are sequential. -static bool areAdjacentMasksSequential(ArrayRef Mask) { +/// \brief Tiny helper function to test whether a shuffle mask could be +/// simplified by widening the elements being shuffled. +static bool canWidenShuffleElements(ArrayRef Mask) { for (int i = 0, Size = Mask.size(); i < Size; i += 2) - if (Mask[i] + 1 != Mask[i+1]) + if (Mask[i] % 2 != 0 || Mask[i] + 1 != Mask[i+1]) return false; return true; @@ -8291,7 +8292,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // but it might be interesting to form i128 integers to handle flipping the // low and high halves of AVX 256-bit vectors. if (VT.isInteger() && VT.getScalarSizeInBits() < 64 && - areAdjacentMasksSequential(Mask)) { + canWidenShuffleElements(Mask)) { SmallVector NewMask; for (int i = 0, Size = Mask.size(); i < Size; i += 2) NewMask.push_back(Mask[i] / 2); @@ -19517,8 +19518,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, // See if this reduces to a PSHUFD which is no more expensive and can // combine with more operations. - if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 && - areAdjacentMasksSequential(Mask)) { + if (canWidenShuffleElements(Mask)) { int DMask[] = {-1, -1, -1, -1}; int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; DMask[DOffset + 0] = DOffset + Mask[0] / 2; diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 7d496fa19f1..210d672b5c0 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -17,6 +17,13 @@ define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) { %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } +define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) { +; CHECK-SSE2-LABEL: @shuffle_v4i32_0112 +; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2] +; CHECK-SSE2-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) { ; CHECK-SSE2-LABEL: @shuffle_v4i32_0300 ; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]