X86: Make shuffle -> shift conversion more aggressive about undefs.

Shuffles that only move an element into position 0 of the vector are common in
the output of the loop vectorizer and often generate suboptimal code when SSSE3
is not available. Lower them to vector shifts if possible.

We still prefer palignr over psrldq because it has higher throughput on
sandybridge.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182102 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer
2013-05-17 14:48:34 +00:00
parent c032d1aca0
commit a0de26ce34
3 changed files with 54 additions and 20 deletions

View File

@@ -4756,19 +4756,27 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
/// getNumOfConsecutiveZeros - Return the number of elements of a vector /// getNumOfConsecutiveZeros - Return the number of elements of a vector
/// shuffle operation which come from a consecutively from a zero. The /// shuffle operation which come from a consecutively from a zero. The
/// search can start in two different directions, from left or right. /// search can start in two different directions, from left or right.
static /// We count undefs as zeros until PreferredNum is reached.
unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
bool ZerosFromLeft, SelectionDAG &DAG) { unsigned NumElems, bool ZerosFromLeft,
unsigned i; SelectionDAG &DAG,
for (i = 0; i != NumElems; ++i) { unsigned PreferredNum = -1U) {
unsigned Index = ZerosFromLeft ? i : NumElems-i-1; unsigned NumZeros = 0;
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!(Elt.getNode() && if (!Elt.getNode())
(Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) break;
if (X86::isZeroNode(Elt))
++NumZeros;
else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
NumZeros = std::min(NumZeros + 1, PreferredNum);
else
break; break;
} }
return i; return NumZeros;
} }
/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
@@ -4806,8 +4814,9 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, unsigned NumZeros = getNumOfConsecutiveZeros(
false /* check zeros from right */, DAG); SVOp, NumElems, false /* check zeros from right */, DAG,
SVOp->getMaskElt(0));
unsigned OpSrc; unsigned OpSrc;
if (!NumZeros) if (!NumZeros)
@@ -4839,8 +4848,9 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, unsigned NumZeros = getNumOfConsecutiveZeros(
true /* check zeros from left */, DAG); SVOp, NumElems, true /* check zeros from left */, DAG,
NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
unsigned OpSrc; unsigned OpSrc;
if (!NumZeros) if (!NumZeros)
@@ -6871,6 +6881,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
TargetMask, DAG); TargetMask, DAG);
} }
if (isPALIGNRMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
// Check if this can be converted into a logical shift. // Check if this can be converted into a logical shift.
bool isLeft = false; bool isLeft = false;
unsigned ShAmt = 0; unsigned ShAmt = 0;
@@ -6988,11 +7003,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// inlined here right now to enable us to directly emit target specific // inlined here right now to enable us to directly emit target specific
// nodes, and remove one by one until they don't return Op anymore. // nodes, and remove one by one until they don't return Op anymore.
if (isPALIGNRMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) { SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64 || VT == MVT::v2i64) if (VT == MVT::v2f64 || VT == MVT::v2i64)

View File

@@ -46,7 +46,7 @@ entry:
} }
; CHECK: palignr $8 ; CHECK: palignr $8
; CHECK: psrldq $8 ; CHECK: palignr $8
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp { define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
entry: entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>

View File

@@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s ; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
; There are no MMX operations in @t1 ; There are no MMX operations in @t1
define void @t1(i32 %a, x86_mmx* %P) nounwind { define void @t1(i32 %a, x86_mmx* %P) nounwind {
@@ -41,3 +41,27 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
; CHECK: t4: ; CHECK: t4:
; CHECK: psrldq $12 ; CHECK: psrldq $12
} }
define <16 x i8> @t5(<16 x i8> %x) nounwind {
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
ret <16 x i8> %s
; CHECK: t5:
; CHECK: psrldq $1
}
define <16 x i8> @t6(<16 x i8> %x) nounwind {
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s
; CHECK: t6:
; CHECK: palignr $1
}
define <16 x i8> @t7(<16 x i8> %x) nounwind {
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
ret <16 x i8> %s
; CHECK: t7:
; CHECK: pslldq $13
}