mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
[X86][AVX] Add support for shuffle decoding of vperm2f128/vperm2i128 with zero'd lanes
The vperm2f128/vperm2i128 shuffle mask decoding was not attempting to deal with shuffles that give zero lanes. This patch fixes this so that the assembly printer can provide shuffle comments. As this decoder is also used in X86ISelLowering for shuffle combining, I've added an early-out to match existing behaviour. The hope is that we can add zero support in the future, this would allow other ops' decodes (e.g. insertps) to be combined as well. Differential Revision: http://reviews.llvm.org/D10593 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241516 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
75a2ce3271
commit
315fd86400
@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
|
||||
|
||||
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
if (Imm & 0x88)
|
||||
return; // Not a shuffle
|
||||
|
||||
unsigned HalfSize = VT.getVectorNumElements() / 2;
|
||||
|
||||
for (unsigned l = 0; l != 2; ++l) {
|
||||
unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
|
||||
unsigned HalfMask = Imm >> (l * 4);
|
||||
unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
|
||||
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
|
||||
ShuffleMask.push_back(i);
|
||||
ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4390,6 +4390,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
|
||||
/// IsUnary to true if only uses one source. Note that this will set IsUnary for
|
||||
/// shuffles which use a single input multiple times, and in those cases it will
|
||||
/// adjust the mask to only have indices within that single input.
|
||||
/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
|
||||
static bool getTargetShuffleMask(SDNode *N, MVT VT,
|
||||
SmallVectorImpl<int> &Mask, bool &IsUnary) {
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
@ -4519,6 +4520,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
|
||||
if (Mask.empty()) return false;
|
||||
// Mask only contains negative index if an element is zero.
|
||||
if (std::any_of(Mask.begin(), Mask.end(),
|
||||
[](int M){ return M == SM_SentinelZero; }))
|
||||
return false;
|
||||
break;
|
||||
case X86ISD::MOVSLDUP:
|
||||
DecodeMOVSLDUPMask(VT, Mask);
|
||||
|
@ -269,7 +269,7 @@ entry:
|
||||
define <4 x double> @vperm2z_0x08(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x08:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
||||
ret <4 x double> %s
|
||||
@ -279,7 +279,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x18:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
ret <4 x double> %s
|
||||
@ -288,7 +288,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
|
||||
define <4 x double> @vperm2z_0x28(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x28:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
ret <4 x double> %s
|
||||
@ -298,7 +298,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x38:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
ret <4 x double> %s
|
||||
@ -307,7 +307,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
|
||||
define <4 x double> @vperm2z_0x80(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x80:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
ret <4 x double> %s
|
||||
@ -316,7 +316,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) {
|
||||
define <4 x double> @vperm2z_0x81(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x81:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
|
||||
ret <4 x double> %s
|
||||
@ -325,7 +325,7 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) {
|
||||
define <4 x double> @vperm2z_0x82(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x82:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
|
||||
ret <4 x double> %s
|
||||
@ -334,7 +334,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) {
|
||||
define <4 x double> @vperm2z_0x83(<4 x double> %a) {
|
||||
; ALL-LABEL: vperm2z_0x83:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
|
||||
ret <4 x double> %s
|
||||
@ -345,8 +345,8 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) {
|
||||
define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
|
||||
; ALL-LABEL: vperm2z_int_0x83:
|
||||
; ALL: # BB#0:
|
||||
; AVX1: vperm2f128 $129, %ymm0, %ymm0, %ymm0
|
||||
; AVX2: vperm2i128 $129, %ymm0, %ymm0, %ymm0
|
||||
; AVX1: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
|
||||
; AVX2: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
|
||||
%s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
|
||||
%c = add <4 x i64> %b, %s
|
||||
ret <4 x i64> %c
|
||||
|
Loading…
Reference in New Issue
Block a user