Improved shuffle normalization to avoid using extract/build when we

can extract using different indexes for two vectors. Added a few tests
for vector shuffles.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59399 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mon P Wang
2008-11-16 05:06:27 +00:00
parent 29cd5ba621
commit c7849c22f4
4 changed files with 203 additions and 97 deletions

View File

@@ -2292,8 +2292,8 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
// Utility for visitShuffleVector - Returns true if the mask is mask starting
// from SIndx and increasing to the element length (undefs are allowed).
static bool SequentialMask(SDValue Mask, unsigned SIndx) {
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
unsigned MaskNumElts = Mask.getNumOperands();
for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx != i + SIndx)
@@ -2304,161 +2304,187 @@ static bool SequentialMask(SDValue Mask, unsigned SIndx) {
}
void SelectionDAGLowering::visitShuffleVector(User &I) {
SDValue V1 = getValue(I.getOperand(0));
SDValue V2 = getValue(I.getOperand(1));
SDValue Srcs[2];
Srcs[0] = getValue(I.getOperand(0));
Srcs[1] = getValue(I.getOperand(1));
SDValue Mask = getValue(I.getOperand(2));
MVT VT = TLI.getValueType(I.getType());
MVT VT1 = V1.getValueType();
unsigned MaskNumElts = Mask.getNumOperands();
unsigned Src1NumElts = VT1.getVectorNumElements();
MVT SrcVT = Srcs[0].getValueType();
int MaskNumElts = Mask.getNumOperands();
int SrcNumElts = SrcVT.getVectorNumElements();
if (Src1NumElts == MaskNumElts) {
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
if (Src1NumElts < MaskNumElts && MaskNumElts % Src1NumElts == 0) {
// We can concat vectors to make the mask and input vector match.
if (Src1NumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
// The shuffle is concatenating two vectors.
setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, V1, V2));
MVT MaskEltVT = Mask.getValueType().getVectorElementType();
if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// length match.
if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
// The shuffle is concatenating two vectors together.
setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, Srcs[0], Srcs[1]));
return;
}
// Pad both vectors with undefs to the same size as the mask.
unsigned NumConcat = MaskNumElts / Src1NumElts;
std::vector<SDValue> UnOps(Src1NumElts,
DAG.getNode(ISD::UNDEF,
VT1.getVectorElementType()));
SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT1,
&UnOps[0], UnOps.size());
// Pad both vectors with undefs to make them the same length as the mask.
unsigned NumConcat = MaskNumElts / SrcNumElts;
SDValue UndefVal = DAG.getNode(ISD::UNDEF, SrcVT);
SmallVector<SDValue, 8> MOps1, MOps2;
MOps1.push_back(V1);
MOps2.push_back(V2);
MOps1.push_back(Srcs[0]);
MOps2.push_back(Srcs[1]);
for (unsigned i = 1; i != NumConcat; ++i) {
MOps1.push_back(UndefVal);
MOps2.push_back(UndefVal);
}
V1 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0], MOps1.size());
V2 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0], MOps2.size());
Srcs[0] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0], MOps1.size());
Srcs[1] = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0], MOps2.size());
// Readjust mask for new input vector length.
SmallVector<SDValue, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
for (int i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Mask.getOperand(i));
} else {
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx < Src1NumElts) {
MappedOps.push_back(DAG.getConstant(Idx,
Mask.getOperand(i).getValueType()));
} else {
MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - Src1NumElts,
Mask.getOperand(i).getValueType()));
}
int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx < SrcNumElts)
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
else
MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
MaskEltVT));
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
return;
}
if (Src1NumElts > MaskNumElts) {
if (SrcNumElts > MaskNumElts) {
// Resulting vector is shorter than the incoming vector.
if (Src1NumElts == MaskNumElts && SequentialMask(Mask,0)) {
if (SrcNumElts == MaskNumElts && SequentialMask(Mask,0)) {
// Shuffle extracts 1st vector.
setValue(&I, V1);
setValue(&I, Srcs[0]);
return;
}
if (Src1NumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
if (SrcNumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
// Shuffle extracts 2nd vector.
setValue(&I, V2);
setValue(&I, Srcs[1]);
return;
}
// Analyze the access pattern of the vector to see if we can extract each
// subvector and then do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors. If it is useful,
// we could do better by considering separate what elements are accessed
// in each vector (i.e., have min/max for each vector).
int MinRange = Src1NumElts+1;
int MaxRange = -1;
for (unsigned i = 0; i != MaskNumElts; ++i) {
// Analyze the access pattern of the vector to see if we can extract
// two subvectors and do the shuffle. The analysis is done by calculating
// the range of elements the mask access on both vectors.
int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
int MaxRange[2] = {-1, -1};
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx > (int) Src1NumElts)
Idx -= Src1NumElts;
if (Idx > MaxRange)
MaxRange = Idx;
if (Idx < MinRange)
MinRange = Idx;
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
int Input = 0;
if (Idx >= SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
if (Idx > MaxRange[Input])
MaxRange[Input] = Idx;
if (Idx < MinRange[Input])
MinRange[Input] = Idx;
}
}
// Adjust MinRange to start at an even boundary since this give us
// better quality splits later.
if ((unsigned) MinRange < Src1NumElts && MinRange%2 != 0)
MinRange = MinRange - 1;
if (MaxRange - MinRange < (int) MaskNumElts) {
// Extract subvector because the range is less than the new vector length
unsigned StartIdx = (MinRange/MaskNumElts)*MaskNumElts;
if (MaxRange - StartIdx < MaskNumElts) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V1,
DAG.getIntPtrConstant(MinRange));
V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V2,
DAG.getIntPtrConstant(MinRange));
// Readjust mask for new input vector length.
SmallVector<SDValue, 8> MappedOps;
for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Mask.getOperand(i));
} else {
unsigned Idx =
cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx < Src1NumElts) {
MappedOps.push_back(DAG.getConstant(Idx - StartIdx,
Mask.getOperand(i).getValueType()));
} else {
Idx = Idx - Src1NumElts - StartIdx + MaskNumElts;
MappedOps.push_back(DAG.getConstant(Idx,
Mask.getOperand(i).getValueType()));
}
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
return;
// Check if the access is smaller than the vector size and can we find
// a reasonable extract index.
int RangeUse[2]; // 0 = Unused, 1 = Extract, 2 = Can not Extract.
int StartIdx[2]; // StartIdx to extract from
for (int Input=0; Input < 2; ++Input) {
if (MinRange[Input] == SrcNumElts+1 && MaxRange[Input] == -1) {
RangeUse[Input] = 0; // Unused
StartIdx[Input] = 0;
} else if (MaxRange[Input] - MinRange[Input] < MaskNumElts) {
// Fits within range but we should see if we can find a good
// start index that a multiple of the mask length.
if (MaxRange[Input] < MaskNumElts) {
RangeUse[Input] = 1; // Extract from beginning of the vector
StartIdx[Input] = 0;
} else {
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
if (MaxRange[Input] - StartIdx[Input] < MaskNumElts)
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
else
RangeUse[Input] = 2; // Can not extract
}
} else
RangeUse[Input] = 2; // Access doesn't fit within range
}
if (RangeUse[0] == 0 && RangeUse[0] == 0) {
setValue(&I, DAG.getNode(ISD::UNDEF, VT)); // Vectors are not used.
return;
}
else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
// Extract appropriate subvector and generate a vector shuffle
for (int Input=0; Input < 2; ++Input) {
if (RangeUse[Input] == 0) {
Srcs[Input] = DAG.getNode(ISD::UNDEF, VT);
} else {
Srcs[Input] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, Srcs[Input],
DAG.getIntPtrConstant(StartIdx[Input]));
}
}
// Calculate new mask.
SmallVector<SDValue, 8> MappedOps;
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Arg);
} else {
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < SrcNumElts)
MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT));
else {
Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
}
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Srcs[0], Srcs[1], Mask));
return;
}
}
// We can't use either concat vectors or extract subvectors so we fall back
// to insert and extracts.
// We can't use either concat vectors or extract subvectors so fall back to
// replacing the shuffle with extract and build vector.
// to insert and build vector.
MVT EltVT = VT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
} else {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < Src1NumElts)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V1,
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < SrcNumElts)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Srcs[0],
DAG.getConstant(Idx, PtrVT)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V2,
DAG.getConstant(Idx - Src1NumElts, PtrVT)));
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, Srcs[1],
DAG.getConstant(Idx - SrcNumElts, PtrVT)));
}
}
setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()));