From dd6353073f8345e76716e5785da5a48bc964eac9 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 29 Jul 2011 01:31:15 +0000 Subject: [PATCH] Match VPERMIL masks more strictly and update the target specific mask generation to always catch the weird cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 +++++++++++++++++------- test/CodeGen/X86/avx-vpermil.ll | 10 ++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d229592587d..e35d9e98130 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3465,11 +3465,14 @@ static bool isVPERMILPSMask(const SmallVectorImpl &Mask, EVT VT, return false; // The mask on the high lane should be the same as the low. Actually, - // they can differ if any of the corresponding index in a lane is undef. + // they can differ if any of the corresponding index in a lane is undef + // and the other stays in range. int LaneSize = NumElts/NumLanes; for (int i = 0; i < LaneSize; ++i) { int HighElt = i+LaneSize; - if (Mask[i] < 0 || Mask[HighElt] < 0) + if (Mask[i] < 0 && (isUndefOrInRange(Mask[HighElt], LaneSize, NumElts))) + continue; + if (Mask[HighElt] < 0 && (isUndefOrInRange(Mask[i], 0, LaneSize))) continue; if (Mask[HighElt]-Mask[i] != LaneSize) return false; @@ -3486,13 +3489,20 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits()/128; + int LaneSize = NumElts/NumLanes; + // Although the mask is equal for both lanes do it twice to get the cases + // where a mask will match because the same mask element is undef on the + // first half but valid on the second. This would get pathological cases + // such as: shuffle , which is completely valid. unsigned Mask = 0; - for (int i = 0; i < NumElts/NumLanes /* lane size */; ++i) { - int MaskElt = SVOp->getMaskElt(i); - if (MaskElt < 0) - continue; - Mask |= MaskElt << (i*2); + for (int l = 0; l < NumLanes; ++l) { + for (int i = 0; i < LaneSize; ++i) { + int MaskElt = SVOp->getMaskElt(i+(l*LaneSize)); + if (MaskElt < 0) + continue; + Mask |= MaskElt << (i*2); + } } return Mask; diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll index f059948a799..a3e916dc1eb 100644 --- a/test/CodeGen/X86/avx-vpermil.ll +++ b/test/CodeGen/X86/avx-vpermil.ll @@ -27,3 +27,13 @@ entry: %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> ret <4 x i64> %shuffle } + +; vpermil should match masks like this: . Check that the +; target specific mask was correctly generated. +; CHECK: vpermilps $-100 +define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp { +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> + ret <8 x float> %shuffle +} +