Revert r188449 as it turns out we're just missing the instructions that need the v16i32/v16f32 matching.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188454 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2013-08-15 08:38:25 +00:00
parent d9767021f8
commit d36e1efa4b
2 changed files with 26 additions and 17 deletions

View File

@ -4102,26 +4102,41 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
return (FstHalf | (SndHalf << 4));
}
// Symmetric in-lane mask. Each lane has 4 elements (for imm8)
// Symetric in-lane mask. Each lane has 4 elements (for imm8)
static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
unsigned NumElts = VT.getVectorNumElements();
if (!(VT.is256BitVector() && NumElts == 4) &&
!(VT.is512BitVector() && NumElts == 8))
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize < 32)
return false;
unsigned NumElts = VT.getVectorNumElements();
Imm8 = 0;
if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
Imm8 |= Mask[i] << (i*2);
}
return true;
}
unsigned LaneSize = 4;
SmallVector<int, 4> MaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l))
if (Mask[i+l] < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Mask[i+l] - l;
Imm8 |= MaskVal[i] << (i*2);
continue;
}
if (Mask[i+l] != (signed)(MaskVal[i]+l))
return false;
if (Mask[i+l] >= 0)
Imm8 |= (Mask[i+l] - l) << (i*2);
}
}
return true;
}
@ -4150,7 +4165,9 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (NumElts != 8 || l == 0)
continue;
// VPERMILPS handling
if (Mask[i] >= 0 && !isUndefOrEqual(Mask[i+l], Mask[i]+l))
if (Mask[i] < 0)
continue;
if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
return false;
}
}

View File

@ -32,14 +32,6 @@ define <16 x i32> @test2(<16 x i32> %a) nounwind {
ret <16 x i32> %c
}
; CHECK: test2b:
; CHECK: vpermd
; CHECK: ret
define <16 x i32> @test2b(<16 x i32> %a) nounwind {
%c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x i32> %c
}
; CHECK: test3:
; CHECK: vpermq
; CHECK: ret