mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-11 16:37:42 +00:00
Match VPERMIL masks more strictly and update the target specific mask
generation to always catch the weird cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136453 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2eb4c2bcad
commit
dd6353073f
@ -3465,11 +3465,14 @@ static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||
return false;
|
||||
|
||||
// The mask on the high lane should be the same as the low. Actually,
|
||||
// they can differ if any of the corresponding index in a lane is undef.
|
||||
// they can differ if any of the corresponding index in a lane is undef
|
||||
// and the other stays in range.
|
||||
int LaneSize = NumElts/NumLanes;
|
||||
for (int i = 0; i < LaneSize; ++i) {
|
||||
int HighElt = i+LaneSize;
|
||||
if (Mask[i] < 0 || Mask[HighElt] < 0)
|
||||
if (Mask[i] < 0 && (isUndefOrInRange(Mask[HighElt], LaneSize, NumElts)))
|
||||
continue;
|
||||
if (Mask[HighElt] < 0 && (isUndefOrInRange(Mask[i], 0, LaneSize)))
|
||||
continue;
|
||||
if (Mask[HighElt]-Mask[i] != LaneSize)
|
||||
return false;
|
||||
@ -3486,13 +3489,20 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
|
||||
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
int NumLanes = VT.getSizeInBits()/128;
|
||||
int LaneSize = NumElts/NumLanes;
|
||||
|
||||
// Although the mask is equal for both lanes do it twice to get the cases
|
||||
// where a mask will match because the same mask element is undef on the
|
||||
// first half but valid on the second. This would get pathological cases
|
||||
// such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
|
||||
unsigned Mask = 0;
|
||||
for (int i = 0; i < NumElts/NumLanes /* lane size */; ++i) {
|
||||
int MaskElt = SVOp->getMaskElt(i);
|
||||
if (MaskElt < 0)
|
||||
continue;
|
||||
Mask |= MaskElt << (i*2);
|
||||
for (int l = 0; l < NumLanes; ++l) {
|
||||
for (int i = 0; i < LaneSize; ++i) {
|
||||
int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
|
||||
if (MaskElt < 0)
|
||||
continue;
|
||||
Mask |= MaskElt << (i*2);
|
||||
}
|
||||
}
|
||||
|
||||
return Mask;
|
||||
|
@ -27,3 +27,13 @@ entry:
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
|
||||
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
|
||||
; target specific mask was correctly generated.
|
||||
; CHECK: vpermilps $-100
|
||||
define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user