diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index cb120633a40..472dbfa62f8 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -628,14 +628,16 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, const char *SrcName = isSrc1 ? Src1Name : Src2Name; OS << (SrcName ? SrcName : "mem") << '['; bool IsFirst = true; - while (i != e && - (int)ShuffleMask[i] >= 0 && + while (i != e && (int)ShuffleMask[i] != SM_SentinelZero && (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) { if (!IsFirst) OS << ','; else IsFirst = false; - OS << ShuffleMask[i] % ShuffleMask.size(); + if (ShuffleMask[i] == SM_SentinelUndef) + OS << "u"; + else + OS << ShuffleMask[i] % ShuffleMask.size(); ++i; } OS << ']'; diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 9aca2da4902..713e147fbf5 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -224,8 +224,7 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, } } -void DecodePSHUFBMask(const ConstantDataSequential *C, - SmallVectorImpl &ShuffleMask) { +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); assert(MaskTy->getVectorElementType()->isIntegerTy(8) && @@ -234,22 +233,48 @@ void DecodePSHUFBMask(const ConstantDataSequential *C, // FIXME: Add support for AVX-512. assert((NumElements == 16 || NumElements == 32) && "Only 128-bit and 256-bit vectors supported!"); - assert((unsigned)NumElements == C->getNumElements() && - "Constant mask has a different number of elements!"); - ShuffleMask.reserve(NumElements); - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the half of the - // vector we're inside. - int Base = i < 16 ? 0 : 16; - uint64_t Element = C->getElementAsInteger(i); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); + + if (auto *CDS = dyn_cast(C)) { + assert((unsigned)NumElements == CDS->getNumElements() && + "Constant mask has a different number of elements!"); + + for (int i = 0; i < NumElements; ++i) { + // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte + // lane of the vector we're inside. + int Base = i < 16 ? 0 : 16; + uint64_t Element = CDS->getElementAsInteger(i); + // If the high bit (7) of the byte is set, the element is zeroed. + if (Element & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (Element & 0xf); + ShuffleMask.push_back(Index); + } + } + } else if (auto *CV = dyn_cast(C)) { + assert((unsigned)NumElements == CV->getNumOperands() && + "Constant mask has a different number of elements!"); + + for (int i = 0; i < NumElements; ++i) { + // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte + // lane of the vector we're inside. + int Base = i < 16 ? 0 : 16; + Constant *COp = CV->getOperand(i); + if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t Element = cast(COp)->getZExtValue(); + // If the high bit (7) of the byte is set, the element is zeroed. + if (Element & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (Element & 0xf); + ShuffleMask.push_back(Index); + } } } } @@ -258,6 +283,10 @@ void DecodePSHUFBMask(ArrayRef RawMask, SmallVectorImpl &ShuffleMask) { for (int i = 0, e = RawMask.size(); i < e; ++i) { uint64_t M = RawMask[i]; + if (M == (uint64_t)SM_SentinelUndef) { + ShuffleMask.push_back(M); + continue; + } // For AVX vectors with 32 bytes the base of the shuffle is the half of // the vector we're inside. int Base = i < 16 ? 0 : 16; @@ -287,8 +316,7 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { } } -void DecodeVPERMILPMask(const ConstantDataSequential *C, - SmallVectorImpl &ShuffleMask) { +void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); assert(MaskTy->getVectorElementType()->isIntegerTy() && @@ -297,16 +325,34 @@ void DecodeVPERMILPMask(const ConstantDataSequential *C, int NumElements = MaskTy->getVectorNumElements(); assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && "Unexpected number of vector elements."); - assert((unsigned)NumElements == C->getNumElements() && - "Constant mask has a different number of elements!"); - ShuffleMask.reserve(NumElements); - for (int i = 0; i < NumElements; ++i) { - int Base = (i * ElementBits / 128) * (128 / ElementBits); - uint64_t Element = C->getElementAsInteger(i); - // Only the least significant 2 bits of the integer are used. - int Index = Base + (Element & 0x3); - ShuffleMask.push_back(Index); + if (auto *CDS = dyn_cast(C)) { + assert((unsigned)NumElements == CDS->getNumElements() && + "Constant mask has a different number of elements!"); + + for (int i = 0; i < NumElements; ++i) { + int Base = (i * ElementBits / 128) * (128 / ElementBits); + uint64_t Element = CDS->getElementAsInteger(i); + // Only the least significant 2 bits of the integer are used. + int Index = Base + (Element & 0x3); + ShuffleMask.push_back(Index); + } + } else if (auto *CV = dyn_cast(C)) { + assert((unsigned)NumElements == C->getNumOperands() && + "Constant mask has a different number of elements!"); + + for (int i = 0; i < NumElements; ++i) { + int Base = (i * ElementBits / 128) * (128 / ElementBits); + Constant *COp = CV->getOperand(i); + if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t Element = cast(COp)->getZExtValue(); + // Only the least significant 2 bits of the integer are used. + int Index = Base + (Element & 0x3); + ShuffleMask.push_back(Index); + } } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 8034d209ac3..ece895d77cc 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -23,12 +23,10 @@ //===----------------------------------------------------------------------===// namespace llvm { -class ConstantDataSequential; +class Constant; class MVT; -enum { - SM_SentinelZero = -1 -}; +enum { SM_SentinelZero = -1, SM_SentinelUndef = -2 }; void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl &ShuffleMask); @@ -66,8 +64,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from an IR-level vector constant. -void DecodePSHUFBMask(const ConstantDataSequential *C, - SmallVectorImpl &ShuffleMask); +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. @@ -85,8 +82,7 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask); /// \brief Decode a VPERMILP variable mask from an IR-level vector constant. -void DecodeVPERMILPMask(const ConstantDataSequential *C, - SmallVectorImpl &ShuffleMask); +void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 40ab77aaaa0..a3fa78a701a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5347,7 +5347,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVector RawMask; for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) { - auto *CN = dyn_cast(MaskNode->getOperand(i)); + SDValue Op = MaskNode->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + RawMask.push_back((uint64_t)SM_SentinelUndef); + continue; + } + auto *CN = dyn_cast(Op.getNode()); if (!CN) return false; APInt MaskElement = CN->getAPIntValue(); @@ -5377,13 +5382,13 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) return false; - if (auto *C = dyn_cast(MaskCP->getConstVal())) { + if (auto *C = dyn_cast(MaskCP->getConstVal())) { // FIXME: Support AVX-512 here. - if (!C->getType()->isVectorTy() || - (C->getNumElements() != 16 && C->getNumElements() != 32)) + Type *Ty = C->getType(); + if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 && + Ty->getVectorNumElements() != 32)) return false; - assert(C->getType()->isVectorTy() && "Expected a vector constant."); DecodePSHUFBMask(C, Mask); break; } @@ -8994,7 +8999,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, SDValue V2Mask[16]; for (int i = 0; i < 16; ++i) if (Mask[i] == -1) { - V1Mask[i] = V2Mask[i] = DAG.getConstant(0x80, MVT::i8); + V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8); } else { V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8); V2Mask[i] = @@ -20167,6 +20172,10 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!"); int Ratio = 16 / Mask.size(); for (unsigned i = 0; i < 16; ++i) { + if (Mask[i / Ratio] == SM_SentinelUndef) { + PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); + continue; + } int M = Mask[i / Ratio] != SM_SentinelZero ? Ratio * Mask[i / Ratio] + i % Ratio : 255; @@ -20277,17 +20286,18 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, // for this order is that we are recursing up the operation chain. for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) { int RootIdx = i / RootRatio; - if (RootMask[RootIdx] == SM_SentinelZero) { - // This is a zero-ed lane, we're done. - Mask.push_back(SM_SentinelZero); + if (RootMask[RootIdx] < 0) { + // This is a zero or undef lane, we're done. + Mask.push_back(RootMask[RootIdx]); continue; } int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio; int OpIdx = RootMaskedIdx / OpRatio; - if (OpMask[OpIdx] == SM_SentinelZero) { - // The incoming lanes are zero, it doesn't matter which ones we are using. - Mask.push_back(SM_SentinelZero); + if (OpMask[OpIdx] < 0) { + // The incoming lanes are zero or undef, it doesn't matter which ones we + // are using. + Mask.push_back(OpMask[OpIdx]); continue; } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 5665a012606..bc02d6bac3e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1060,8 +1060,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { Type *MaskTy = MaskConstantEntry.getType(); (void)MaskTy; if (!MaskConstantEntry.isMachineConstantPoolEntry()) - if (auto *C = dyn_cast( - MaskConstantEntry.Val.ConstVal)) { + if (auto *C = dyn_cast(MaskConstantEntry.Val.ConstVal)) { assert(MaskTy == C->getType() && "Expected a constant of the same type!"); @@ -1077,8 +1076,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { DecodeVPERMILPMask(C, Mask); } - assert(Mask.size() == MaskTy->getVectorNumElements() && - "Shuffle mask has a different size than its type!"); + assert( + (Mask.empty() || Mask.size() == MaskTy->getVectorNumElements()) && + "Shuffle mask has a different size than its type!"); } } @@ -1104,7 +1104,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { InSrc = true; CS << SrcName << "["; } - CS << M; + if (M == SM_SentinelUndef) + CS << "u"; + else + CS << M; } } if (InSrc) diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index 36575463da3..b7991bbff99 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -301,12 +301,12 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { ; ; SSSE3-LABEL: @trunc_v4i32_shuffle ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: @trunc_v4i32_shuffle ; SSE41: # BB#0: -; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] ; SSE41-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index df40df2a325..1922150f900 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -498,7 +498,7 @@ define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_002u6u44 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -507,7 +507,7 @@ define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00uu66uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -516,7 +516,7 @@ define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_103245uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -525,7 +525,7 @@ define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_1133uu67 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -534,7 +534,7 @@ define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_0uu354uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -543,7 +543,7 @@ define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_uuu3uu66 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle @@ -1044,7 +1044,7 @@ define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_002u6u44 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,u,6,u,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1053,7 +1053,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00uu66uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,u,u,6,6,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1062,7 +1062,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_103245uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1071,7 +1071,7 @@ define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_1133uu67 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,u,u,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1080,7 +1080,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_0uu354uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,u,u,3,5,4,u,u] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1089,7 +1089,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_uuu3uu66 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[u,u,u,3,u,u,6,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle