diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 6d5ee4dc516..e4c58d42c5d 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -13,10 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86ShuffleDecode.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// @@ -256,8 +253,7 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, } } -void DecodePSHUFBMask(const Constant *C, const DataLayout *TD, - SmallVectorImpl &ShuffleMask) { +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); // It is not an error for the PSHUFB mask to not be a vector of i8 because the // constant pool uniques constants by their bit representation. @@ -266,56 +262,43 @@ void DecodePSHUFBMask(const Constant *C, const DataLayout *TD, // // <2 x i64> // - // <4 x i32> + // <4 x i32> unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); - VectorType *DestTy = nullptr; - if (MaskTySize == 128) - DestTy = VectorType::get(Type::getInt8Ty(MaskTy->getContext()), 16); - else if (MaskTySize == 256) - DestTy = VectorType::get(Type::getInt8Ty(MaskTy->getContext()), 32); - - // FIXME: Add support for AVX-512. - if (!DestTy) + if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. return; - if (DestTy != MaskTy) { - if (!CastInst::castIsValid(Instruction::BitCast, const_cast(C), - DestTy)) - return; + // This is a straightforward byte vector. + if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { + int NumElements = MaskTy->getVectorNumElements(); + ShuffleMask.reserve(NumElements); - C = ConstantFoldInstOperands(Instruction::BitCast, DestTy, - const_cast(C), TD); - MaskTy = DestTy; - } - - int NumElements = MaskTy->getVectorNumElements(); - ShuffleMask.reserve(NumElements); - - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte - // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; - Constant *COp = C->getAggregateElement(i); - if (!COp) { - ShuffleMask.clear(); - return; - } else if (isa(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - uint64_t Element = cast(COp)->getZExtValue(); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); + for (int i = 0; i < NumElements; ++i) { + // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte + // lane of the vector we're inside. + int Base = i < 16 ? 0 : 16; + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t Element = cast(COp)->getZExtValue(); + // If the high bit (7) of the byte is set, the element is zeroed. + if (Element & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (Element & 0xf); + ShuffleMask.push_back(Index); + } } } + // TODO: Handle funny-looking vectors too. } void DecodePSHUFBMask(ArrayRef RawMask, diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 48ba32354f9..6ba3c64f8ec 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -24,7 +24,6 @@ namespace llvm { class Constant; -class DataLayout; class MVT; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; @@ -69,8 +68,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from an IR-level vector constant. -void DecodePSHUFBMask(const Constant *C, const DataLayout *TD, - SmallVectorImpl &ShuffleMask); +void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ceb4ac2ae49..65440a0d22a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5365,7 +5365,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// IsUnary to true if only uses one source. Note that this will set IsUnary for /// shuffles which use a single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. -static bool getTargetShuffleMask(SDNode *N, MVT VT, const DataLayout *TD, +static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; @@ -5472,7 +5472,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, const DataLayout *TD, return false; if (auto *C = dyn_cast(MaskCP->getConstVal())) { - DecodePSHUFBMask(C, TD, Mask); + DecodePSHUFBMask(C, Mask); break; } @@ -5535,7 +5535,6 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, SDValue V = SDValue(N, 0); EVT VT = V.getValueType(); unsigned Opcode = V.getOpcode(); - const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast(N)) { @@ -5557,7 +5556,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, SmallVector ShuffleMask; bool IsUnary; - if (!getTargetShuffleMask(N, ShufVT, TD, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; @@ -22112,8 +22111,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, return false; SmallVector OpMask; bool IsUnary; - bool HaveMask = getTargetShuffleMask( - Op.getNode(), VT, Subtarget->getDataLayout(), OpMask, IsUnary); + bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary); // We only can combine unary shuffles which we can decode the mask for. if (!HaveMask || !IsUnary) return false; @@ -22204,12 +22202,10 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, /// /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 /// PSHUF-style masks that can be reused with such instructions. -static SmallVector getPSHUFShuffleMask(SDValue N, - const DataLayout *TD) { +static SmallVector getPSHUFShuffleMask(SDValue N) { SmallVector Mask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), TD, - Mask, IsUnary); + bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary); (void)HaveMask; assert(HaveMask); @@ -22241,7 +22237,6 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); - const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Walk up a single-use chain looking for a combinable shuffle. Keep a stack // of the shuffles in the chain so that we can form a fresh chain to replace @@ -22327,7 +22322,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, return SDValue(); // Merge this node's mask and our incoming mask. - SmallVector VMask = getPSHUFShuffleMask(V, TD); + SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), @@ -22376,7 +22371,6 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); unsigned CombineOpcode = N.getOpcode(); - const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Walk up a single-use chain looking for a combinable shuffle. SDValue V = N.getOperand(0); @@ -22415,7 +22409,7 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, // Merge this node's mask and our incoming mask (adjusted to account for all // the pshufd instructions encountered). - SmallVector VMask = getPSHUFShuffleMask(V, TD); + SmallVector VMask = getPSHUFShuffleMask(V); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), @@ -22437,14 +22431,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDLoc DL(N); MVT VT = N.getSimpleValueType(); - const DataLayout *TD = Subtarget->getDataLayout(); SmallVector Mask; switch (N.getOpcode()) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: - Mask = getPSHUFShuffleMask(N, TD); + Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; default: @@ -22496,8 +22489,8 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, while (D.getOpcode() == ISD::BITCAST && D.hasOneUse()) D = D.getOperand(0); if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { - SmallVector VMask = getPSHUFShuffleMask(V, TD); - SmallVector DMask = getPSHUFShuffleMask(D, TD); + SmallVector VMask = getPSHUFShuffleMask(V); + SmallVector DMask = getPSHUFShuffleMask(D); int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int WordMask[8]; @@ -22750,10 +22743,9 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!InVec.hasOneUse()) return SDValue(); - const DataLayout *TD = DAG.getSubtarget().getDataLayout(); SmallVector ShuffleMask; bool UnaryShuffle; - if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), TD, + if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), ShuffleMask, UnaryShuffle)) return SDValue(); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 460f88b0dd5..63ccded5ac9 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1161,7 +1161,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; - DecodePSHUFBMask(C, TM.getSubtargetImpl()->getDataLayout(), Mask); + DecodePSHUFBMask(C, Mask); if (!Mask.empty()) OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask)); } diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll index 8f2617068ae..303c4a68476 100644 --- a/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/test/CodeGen/X86/pshufb-mask-comments.ll @@ -31,7 +31,7 @@ define <16 x i8> @test3(<16 x i8> %V) { define <16 x i8> @test4(<2 x i64>* %V) { ; CHECK-LABEL: test4 -; CHECK: pshufb {{.*}}# xmm0 = xmm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7] +; CHECK: pshufb {{.*}} store <2 x i64> , <2 x i64>* %V, align 16 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> ) ret <16 x i8> %1