From d2f4460ee73d452bb109db2f678fd837c92d2b22 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 11 Jan 2015 05:08:57 +0000 Subject: [PATCH] X86: Properly decode shuffle masks when the constant pool type is weird It's possible for the constant pool entry for the shuffle mask to come from a completely different operation. This occurs when Constants have the same bit pattern but have different types. Make DecodePSHUFBMask tolerant of types which, after a bitcast, are appropriately sized vector types. This fixes PR22188. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225597 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 102 ++++++++++++---------- lib/Target/X86/Utils/X86ShuffleDecode.h | 4 +- lib/Target/X86/X86ISelLowering.cpp | 38 ++++---- lib/Target/X86/X86MCInstLower.cpp | 2 +- test/CodeGen/X86/pshufb-mask-comments.ll | 10 +++ 5 files changed, 90 insertions(+), 66 deletions(-) diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index ba6cbc8bc21..6d5ee4dc516 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -13,7 +13,10 @@ //===----------------------------------------------------------------------===// #include "X86ShuffleDecode.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// @@ -253,57 +256,64 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, } } -void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { +void DecodePSHUFBMask(const Constant *C, const DataLayout *TD, + SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); - assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); - assert(MaskTy->getVectorElementType()->isIntegerTy(8) && - "Expected i8 constant mask elements!"); - int NumElements = MaskTy->getVectorNumElements(); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> + // + // <4 x i32> + + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + + VectorType *DestTy = nullptr; + if (MaskTySize == 128) + DestTy = VectorType::get(Type::getInt8Ty(MaskTy->getContext()), 16); + else if (MaskTySize == 256) + DestTy = VectorType::get(Type::getInt8Ty(MaskTy->getContext()), 32); + // FIXME: Add support for AVX-512. - assert((NumElements == 16 || NumElements == 32) && - "Only 128-bit and 256-bit vectors supported!"); + if (!DestTy) + return; + + if (DestTy != MaskTy) { + if (!CastInst::castIsValid(Instruction::BitCast, const_cast(C), + DestTy)) + return; + + C = ConstantFoldInstOperands(Instruction::BitCast, DestTy, + const_cast(C), TD); + MaskTy = DestTy; + } + + int NumElements = MaskTy->getVectorNumElements(); ShuffleMask.reserve(NumElements); - if (auto *CDS = dyn_cast(C)) { - assert((unsigned)NumElements == CDS->getNumElements() && - "Constant mask has a different number of elements!"); - - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte - // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; - uint64_t Element = CDS->getElementAsInteger(i); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); - } + for (int i = 0; i < NumElements; ++i) { + // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte + // lane of the vector we're inside. + int Base = i < 16 ? 0 : 16; + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; } - } else if (auto *CV = dyn_cast(C)) { - assert((unsigned)NumElements == CV->getNumOperands() && - "Constant mask has a different number of elements!"); - - for (int i = 0; i < NumElements; ++i) { - // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte - // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; - Constant *COp = CV->getOperand(i); - if (isa(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - uint64_t Element = cast(COp)->getZExtValue(); - // If the high bit (7) of the byte is set, the element is zeroed. - if (Element & (1 << 7)) - ShuffleMask.push_back(SM_SentinelZero); - else { - // Only the least significant 4 bits of the byte are used. - int Index = Base + (Element & 0xf); - ShuffleMask.push_back(Index); - } + uint64_t Element = cast(COp)->getZExtValue(); + // If the high bit (7) of the byte is set, the element is zeroed. + if (Element & (1 << 7)) + ShuffleMask.push_back(SM_SentinelZero); + else { + // Only the least significant 4 bits of the byte are used. + int Index = Base + (Element & 0xf); + ShuffleMask.push_back(Index); } } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 6ba3c64f8ec..48ba32354f9 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -24,6 +24,7 @@ namespace llvm { class Constant; +class DataLayout; class MVT; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; @@ -68,7 +69,8 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from an IR-level vector constant. -void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask); +void DecodePSHUFBMask(const Constant *C, const DataLayout *TD, + SmallVectorImpl &ShuffleMask); /// \brief Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9525ed0a220..ceb4ac2ae49 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5365,7 +5365,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// IsUnary to true if only uses one source. Note that this will set IsUnary for /// shuffles which use a single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. -static bool getTargetShuffleMask(SDNode *N, MVT VT, +static bool getTargetShuffleMask(SDNode *N, MVT VT, const DataLayout *TD, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; @@ -5472,13 +5472,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, return false; if (auto *C = dyn_cast(MaskCP->getConstVal())) { - // FIXME: Support AVX-512 here. - Type *Ty = C->getType(); - if (!Ty->isVectorTy() || (Ty->getVectorNumElements() != 16 && - Ty->getVectorNumElements() != 32)) - return false; - - DecodePSHUFBMask(C, Mask); + DecodePSHUFBMask(C, TD, Mask); break; } @@ -5541,6 +5535,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, SDValue V = SDValue(N, 0); EVT VT = V.getValueType(); unsigned Opcode = V.getOpcode(); + const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast(N)) { @@ -5562,7 +5557,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, SmallVector ShuffleMask; bool IsUnary; - if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, TD, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; @@ -22117,7 +22112,8 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, return false; SmallVector OpMask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary); + bool HaveMask = getTargetShuffleMask( + Op.getNode(), VT, Subtarget->getDataLayout(), OpMask, IsUnary); // We only can combine unary shuffles which we can decode the mask for. if (!HaveMask || !IsUnary) return false; @@ -22208,10 +22204,12 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, /// /// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 /// PSHUF-style masks that can be reused with such instructions. -static SmallVector getPSHUFShuffleMask(SDValue N) { +static SmallVector getPSHUFShuffleMask(SDValue N, + const DataLayout *TD) { SmallVector Mask; bool IsUnary; - bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary); + bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), TD, + Mask, IsUnary); (void)HaveMask; assert(HaveMask); @@ -22243,6 +22241,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, assert(N.getOpcode() == X86ISD::PSHUFD && "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); + const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Walk up a single-use chain looking for a combinable shuffle. Keep a stack // of the shuffles in the chain so that we can form a fresh chain to replace @@ -22328,7 +22327,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, return SDValue(); // Merge this node's mask and our incoming mask. - SmallVector VMask = getPSHUFShuffleMask(V); + SmallVector VMask = getPSHUFShuffleMask(V, TD); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), @@ -22377,6 +22376,7 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, "Called with something other than an x86 128-bit half shuffle!"); SDLoc DL(N); unsigned CombineOpcode = N.getOpcode(); + const DataLayout *TD = DAG.getSubtarget().getDataLayout(); // Walk up a single-use chain looking for a combinable shuffle. SDValue V = N.getOperand(0); @@ -22415,7 +22415,7 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, // Merge this node's mask and our incoming mask (adjusted to account for all // the pshufd instructions encountered). - SmallVector VMask = getPSHUFShuffleMask(V); + SmallVector VMask = getPSHUFShuffleMask(V, TD); for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), @@ -22437,13 +22437,14 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDLoc DL(N); MVT VT = N.getSimpleValueType(); + const DataLayout *TD = Subtarget->getDataLayout(); SmallVector Mask; switch (N.getOpcode()) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: - Mask = getPSHUFShuffleMask(N); + Mask = getPSHUFShuffleMask(N, TD); assert(Mask.size() == 4); break; default: @@ -22495,8 +22496,8 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, while (D.getOpcode() == ISD::BITCAST && D.hasOneUse()) D = D.getOperand(0); if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { - SmallVector VMask = getPSHUFShuffleMask(V); - SmallVector DMask = getPSHUFShuffleMask(D); + SmallVector VMask = getPSHUFShuffleMask(V, TD); + SmallVector DMask = getPSHUFShuffleMask(D, TD); int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; int WordMask[8]; @@ -22749,9 +22750,10 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!InVec.hasOneUse()) return SDValue(); + const DataLayout *TD = DAG.getSubtarget().getDataLayout(); SmallVector ShuffleMask; bool UnaryShuffle; - if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), + if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), TD, ShuffleMask, UnaryShuffle)) return SDValue(); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 63ccded5ac9..460f88b0dd5 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1161,7 +1161,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; - DecodePSHUFBMask(C, Mask); + DecodePSHUFBMask(C, TM.getSubtargetImpl()->getDataLayout(), Mask); if (!Mask.empty()) OutStreamer.AddComment(getShuffleComment(DstOp, SrcOp, Mask)); } diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll index 7fc9890e802..8f2617068ae 100644 --- a/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/test/CodeGen/X86/pshufb-mask-comments.ll @@ -27,4 +27,14 @@ define <16 x i8> @test3(<16 x i8> %V) { ret <16 x i8> %1 } +; Test that we won't crash when the constant was reused for another instruction. + +define <16 x i8> @test4(<2 x i64>* %V) { +; CHECK-LABEL: test4 +; CHECK: pshufb {{.*}}# xmm0 = xmm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7] + store <2 x i64> , <2 x i64>* %V, align 16 + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> ) + ret <16 x i8> %1 +} + declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone