Allow 256-bit shuffles to be split if a 128-bit lane contains elements from a single source. This is a rewrite of the 256-bit shuffle splitting code based on similar code from legalize types. Fixes PR12413.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154166 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-04-06 07:45:23 +00:00
parent e45cddfa08
commit 9a2b6e1d7b
2 changed files with 66 additions and 82 deletions

View File

@ -5836,96 +5836,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
int MinRange[2][2] = { { static_cast<int>(NumElems),
static_cast<int>(NumElems) },
{ static_cast<int>(NumElems),
static_cast<int>(NumElems) } };
int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };
// Collect used ranges for each source in each lane
for (unsigned l = 0; l < 2; ++l) {
unsigned LaneStart = l*NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0)
continue;
int Input = 0;
if (Idx >= (int)NumElems) {
Idx -= NumElems;
Input = 1;
}
if (Idx > MaxRange[l][Input])
MaxRange[l][Input] = Idx;
if (Idx < MinRange[l][Input])
MinRange[l][Input] = Idx;
}
}
// Make sure each range is 128-bits
int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
for (unsigned l = 0; l < 2; ++l) {
for (unsigned Input = 0; Input < 2; ++Input) {
if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
continue;
if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems)
ExtractIdx[l][Input] = 0;
else if (MinRange[l][Input] >= (int)NumLaneElems &&
MaxRange[l][Input] < (int)NumElems)
ExtractIdx[l][Input] = NumLaneElems;
else
return SDValue();
}
}
DebugLoc dl = SVOp->getDebugLoc();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Shufs[2];
SDValue Ops[2][2];
SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
for (unsigned Input = 0; Input < 2; ++Input) {
if (ExtractIdx[l][Input] >= 0)
Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
DAG, dl);
else
Ops[l][Input] = DAG.getUNDEF(NVT);
}
}
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands (recorded in InputUsed).
// If building a suitable shuffle vector proves too hard, then bail
// out with useBuildVector set.
int InputUsed[2] = { -1U, -1U }; // Not yet discovered.
unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
// The mask element. This indexes into the input.
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0) {
// the mask element does not index into any input vector.
Mask.push_back(-1);
continue;
}
// Generate 128-bit shuffles
SmallVector<int, 16> Mask1, Mask2;
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Elt = SVOp->getMaskElt(i);
if (Elt >= (int)NumElems) {
Elt %= NumLaneElems;
Elt += NumLaneElems;
} else if (Elt >= 0) {
Elt %= NumLaneElems;
}
Mask1.push_back(Elt);
}
for (unsigned i = NumLaneElems; i != NumElems; ++i) {
int Elt = SVOp->getMaskElt(i);
if (Elt >= (int)NumElems) {
Elt %= NumLaneElems;
Elt += NumLaneElems;
} else if (Elt >= 0) {
Elt %= NumLaneElems;
}
Mask2.push_back(Elt);
}
// The input vector this mask element indexes into.
int Input = Idx / NumLaneElems;
SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
// Turn the index into an offset from the start of the input vector.
Idx -= Input * NumLaneElems;
// Find or create a shuffle vector operand to hold this input.
unsigned OpNo;
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input)
// This input vector is already an operand.
break;
if (InputUsed[OpNo] < 0) {
// Create a new operand for this input vector.
InputUsed[OpNo] = Input;
break;
}
}
if (OpNo >= array_lengthof(InputUsed)) {
// More than two input vectors used! Give up.
return SDValue();
}
// Add the mask index for the new shuffle vector.
Mask.push_back(Idx + OpNo * NumLaneElems);
}
if (InputUsed[0] < 0) {
// No input vectors were used! The result is undefined.
Shufs[l] = DAG.getUNDEF(NVT);
} else {
SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
DAG, dl);
// If only one input was used, use an undefined vector for the other.
SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
DAG, dl);
// At least one input vector was used. Create a new shuffle vector.
Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
Mask.clear();
}
// Concatenate the result back
SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
DAG.getConstant(0, MVT::i32), DAG, dl);
return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
DAG, dl);
}

View File

@ -45,7 +45,8 @@ entry:
ret <8 x float> %shuffle
}
; CHECK: vpermilps
; CHECK: palignr
; CHECK: palignr
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>