diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 063d7b8c4f7..20d018835b9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7946,39 +7946,34 @@ static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1, // PSHL : (little-endian) left bit shift. // [ zz, 0, zz, 2 ] // [ -1, 4, zz, -1 ] - auto MatchBitShift = [&](int Shift, int Scale) -> SDValue { + + auto CheckZeros = [&](int Shift, int Scale, bool Left) { + for (int i = 0; i < Size; i += Scale) + for (int j = 0; j < Shift; ++j) + if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) + return false; + + return true; + }; + + auto MatchBitShift = [&](int Shift, int Scale, bool Left, SDValue V) { MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale); MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale); assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && "Illegal integer vector type"); - bool MatchLeft = true, MatchRight = true; for (int i = 0; i != Size; i += Scale) { - for (int j = 0; j != Shift; ++j) { - MatchLeft &= Zeroable[i + j]; - } - for (int j = Scale - Shift; j != Scale; ++j) { - MatchRight &= Zeroable[i + j]; - } - } - if (!(MatchLeft || MatchRight)) - return SDValue(); - - bool MatchV1 = true, MatchV2 = true; - for (int i = 0; i != Size; i += Scale) { - unsigned Pos = MatchLeft ? i + Shift : i; - unsigned Low = MatchLeft ? i : i + Shift; + unsigned Pos = Left ? i + Shift : i; + unsigned Low = Left ? i : i + Shift; unsigned Len = Scale - Shift; - MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low); - MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + Size); + if (!isSequentialOrUndefInRange(Mask, Pos, Len, + Low + (V == V1 ? 0 : Size))) + return SDValue(); } - if (!(MatchV1 || MatchV2)) - return SDValue(); // Cast the inputs to ShiftVT to match VSRLI/VSHLI and back again. - unsigned OpCode = MatchLeft ? X86ISD::VSHLI : X86ISD::VSRLI; + unsigned OpCode = Left ? X86ISD::VSHLI : X86ISD::VSRLI; int ShiftAmt = Shift * VT.getScalarSizeInBits(); - SDValue V = MatchV1 ? V1 : V2; V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V); V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8)); return DAG.getNode(ISD::BITCAST, DL, VT, V); @@ -7992,8 +7987,11 @@ static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1, // and that the shifted in elements are all zeroable. for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 64; Scale *= 2) for (int Shift = 1; Shift != Scale; ++Shift) - if (SDValue BitShift = MatchBitShift(Shift, Scale)) - return BitShift; + for (bool Left : {true, false}) + if (CheckZeros(Shift, Scale, Left)) + for (SDValue V : {V1, V2}) + if (SDValue BitShift = MatchBitShift(Shift, Scale, Left, V)) + return BitShift; // no match return SDValue(); diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 269ef6a8fe7..3fa3b1a4e30 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1434,3 +1434,17 @@ entry: %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> ret <16 x i8> %0 } + +define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) { +; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: +; SSE: # BB#0: +; SSE-NEXT: psrld $8, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $8, %xmm0, %xmm0 +; AVX-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %shuffle +}