Add support for matching shuffle patterns with palignr.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84459 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2009-10-19 02:17:23 +00:00
parent 4d21ae7cf4
commit a09008bf6d
6 changed files with 186 additions and 27 deletions

View File

@ -2389,6 +2389,56 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
return ::isPSHUFLWMask(M, N->getValueType(0)); return ::isPSHUFLWMask(M, N->getValueType(0));
} }
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3) {
int i, e = VT.getVectorNumElements();
// Do not handle v2i64 / v2f64 shuffles with palignr.
if (e < 4 || !hasSSSE3)
return false;
for (i = 0; i != e; ++i)
if (Mask[i] >= 0)
break;
// All undef, not a palignr.
if (i == e)
return false;
// Determine if it's ok to perform a palignr with only the LHS, since we
// don't have access to the actual shuffle elements to see if RHS is undef.
bool Unary = Mask[i] < (int)e;
bool NeedsUnary = false;
int s = Mask[i] - i;
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
if (m < 0)
continue;
Unary = Unary && (m < (int)e);
NeedsUnary = NeedsUnary || (m < s);
if (NeedsUnary && !Unary)
return false;
if (Unary && m != ((s+i) & (e-1)))
return false;
if (!Unary && m != (s+i))
return false;
}
return true;
}
bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
SmallVector<int, 8> M;
N->getMask(M);
return ::isPALIGNRMask(M, N->getValueType(0), true);
}
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*. /// specifies a shuffle of elements that is suitable for input to SHUFP*.
static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
@ -2733,8 +2783,7 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
} }
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) { unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
int NumOperands = SVOp->getValueType(0).getVectorNumElements(); int NumOperands = SVOp->getValueType(0).getVectorNumElements();
@ -2753,8 +2802,7 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
} }
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
/// instructions.
unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
unsigned Mask = 0; unsigned Mask = 0;
@ -2770,8 +2818,7 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
} }
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
/// instructions.
unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
unsigned Mask = 0; unsigned Mask = 0;
@ -2786,6 +2833,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
return Mask; return Mask;
} }
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
EVT VVT = N->getValueType(0);
unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
int Val = 0;
unsigned i, e;
for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
}
return (Val - i) * EltSize;
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point /// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0. /// constant +0.0.
bool X86::isZeroNode(SDValue Elt) { bool X86::isZeroNode(SDValue Elt) {
@ -7274,7 +7338,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
if (VT.getSizeInBits() == 64) if (VT.getSizeInBits() == 64)
return false; return false;
// FIXME: pshufb, blends, palignr, shifts. // FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 || return (VT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) || ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, VT) || isMOVLMask(M, VT) ||
@ -7282,6 +7346,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isPSHUFDMask(M, VT) || isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) || isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) || isPSHUFLWMask(M, VT) ||
isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
isUNPCKLMask(M, VT) || isUNPCKLMask(M, VT) ||
isUNPCKHMask(M, VT) || isUNPCKHMask(M, VT) ||
isUNPCKL_v_undef_Mask(M, VT) || isUNPCKL_v_undef_Mask(M, VT) ||

View File

@ -323,21 +323,27 @@ namespace llvm {
/// specifies a shuffle of elements that is suitable for input to MOVDDUP. /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool isMOVDDUPMask(ShuffleVectorSDNode *N); bool isMOVDDUPMask(ShuffleVectorSDNode *N);
/// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PALIGNR.
bool isPALIGNRMask(ShuffleVectorSDNode *N);
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions. /// instructions.
unsigned getShuffleSHUFImmediate(SDNode *N); unsigned getShuffleSHUFImmediate(SDNode *N);
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
/// instructions.
unsigned getShufflePSHUFHWImmediate(SDNode *N); unsigned getShufflePSHUFHWImmediate(SDNode *N);
/// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
/// instructions.
unsigned getShufflePSHUFLWImmediate(SDNode *N); unsigned getShufflePSHUFLWImmediate(SDNode *N);
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned getShufflePALIGNRImmediate(SDNode *N);
/// isZeroNode - Returns true if Elt is a constant zero or a floating point /// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0. /// constant +0.0.
bool isZeroNode(SDValue Elt); bool isZeroNode(SDValue Elt);

View File

@ -197,6 +197,12 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>; }]>;
// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
// a PALIGNR imm.
def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePALIGNRImmediate(N));
}]>;
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{ (vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@ -283,6 +289,11 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshuflw_imm>; }], SHUFFLE_get_pshuflw_imm>;
def palign : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_palign_imm>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// SSE scalar FP Instructions // SSE scalar FP Instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -2062,6 +2073,7 @@ defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
// Shuffle and unpack instructions // Shuffle and unpack instructions
let AddedComplexity = 5 in {
def PSHUFDri : PDIi8<0x70, MRMSrcReg, def PSHUFDri : PDIi8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@ -2073,6 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
[(set VR128:$dst, (v4i32 (pshufd:$src2 [(set VR128:$dst, (v4i32 (pshufd:$src2
(bc_v4i32(memopv2i64 addr:$src1)), (bc_v4i32(memopv2i64 addr:$src1)),
(undef))))]>; (undef))))]>;
}
// SSE2 with ImmT == Imm8 and XS prefix. // SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg, def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@ -2839,6 +2852,26 @@ let Constraints = "$src1 = $dst" in {
imm:$src3))]>, OpSize; imm:$src3))]>, OpSize;
} }
// palignr patterns.
let AddedComplexity = 5 in {
def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask), def : Pat<(X86pshufb VR128:$src, VR128:$mask),
(PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),

View File

@ -0,0 +1,58 @@
; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: pshufd
; CHECK-YONAH: pshufd
%C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
ret <4 x i32> %C
}
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: palignr
; CHECK-YONAH: shufps
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
ret <4 x i32> %C
}
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: palignr
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
ret <4 x i32> %C
}
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: palignr
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x i32> %C
}
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK: palignr
%C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
ret <4 x float> %C
}
define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK: palignr
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
ret <8 x i16> %C
}
define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK: palignr
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
ret <8 x i16> %C
}
define <8 x i16> @test8(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK: palignr
%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
ret <8 x i16> %C
}
define <16 x i8> @test9(<16 x i8> %A, <16 x i8> %B) nounwind {
; CHECK: palignr
%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
ret <16 x i8> %C
}

View File

@ -1,19 +1,15 @@
; RUN: llc < %s -march=x86 -mcpu=pentium-m -o %t ; RUN: llc < %s -march=x86 -mcpu=pentium-m | FileCheck %s
; RUN: grep movlhps %t | count 1
; RUN: grep pshufd %t | count 1
; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
; RUN: grep movlhps %t | count 1
; RUN: grep movddup %t | count 1
define <4 x float> @t1(<4 x float> %a) nounwind { define <4 x float> @t1(<4 x float> %a) nounwind {
entry: ; CHECK: movlhps
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x float>> [#uses=1] %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp1 ret <4 x float> %tmp1
} }
define <4 x i32> @t2(<4 x i32>* %a) nounwind { define <4 x i32> @t2(<4 x i32>* %a) nounwind {
entry: ; CHECK: pshufd
%tmp1 = load <4 x i32>* %a; ; CHECK: ret
%tmp1 = load <4 x i32>* %a;
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1] %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 > ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp2 ret <4 x i32> %tmp2
} }

View File

@ -1,9 +1,10 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; RUN: grep punpck %t | count 2
; RUN: not grep pextrw %t
define <4 x i32> @test(i8** %ptr) { define <4 x i32> @test(i8** %ptr) {
entry: ; CHECK: xorps
; CHECK: punpcklbw
; CHECK: punpcklwd
%tmp = load i8** %ptr ; <i8*> [#uses=1] %tmp = load i8** %ptr ; <i8*> [#uses=1]
%tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1] %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
%tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1] %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]