mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 21:29:41 +00:00
Improve 256-bit shuffle splitting to allow 2 sources in each 128-bit lane. As long as only a single lane of the source is used in the lane in the destination. This makes the splitting match much closer to what happens with 256-bit shuffles when AVX is disabled and only 128-bit XMM is allowed.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148537 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38b6d9dd22
commit
8f35c13842
@ -3162,17 +3162,6 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
|
|||||||
return (Val < 0) || (Val >= Low && Val < Hi);
|
return (Val < 0) || (Val >= Low && Val < Hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isUndefOrInRange - Return true if every element in Mask, begining
|
|
||||||
/// from position Pos and ending in Pos+Size, falls within the specified
|
|
||||||
/// range (L, L+Pos]. or is undef.
|
|
||||||
static bool isUndefOrInRange(ArrayRef<int> Mask,
|
|
||||||
int Pos, int Size, int Low, int Hi) {
|
|
||||||
for (int i = Pos, e = Pos+Size; i != e; ++i)
|
|
||||||
if (!isUndefOrInRange(Mask[i], Low, Hi))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
|
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
|
||||||
/// specified value.
|
/// specified value.
|
||||||
static bool isUndefOrEqual(int Val, int CmpVal) {
|
static bool isUndefOrEqual(int Val, int CmpVal) {
|
||||||
@ -5948,95 +5937,106 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
|
|||||||
OpVT, SrcOp)));
|
OpVT, SrcOp)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
|
|
||||||
/// shuffle node referes to only one lane in the sources.
|
|
||||||
static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
|
|
||||||
EVT VT = SVOp->getValueType(0);
|
|
||||||
int NumElems = VT.getVectorNumElements();
|
|
||||||
int HalfSize = NumElems/2;
|
|
||||||
ArrayRef<int> M = SVOp->getMask();
|
|
||||||
bool MatchA = false, MatchB = false;
|
|
||||||
|
|
||||||
for (int l = 0; l < NumElems*2; l += HalfSize) {
|
|
||||||
if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
|
|
||||||
MatchA = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int l = 0; l < NumElems*2; l += HalfSize) {
|
|
||||||
if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
|
|
||||||
MatchB = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return MatchA && MatchB;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
|
/// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
|
||||||
/// which could not be matched by any known target speficic shuffle
|
/// which could not be matched by any known target speficic shuffle
|
||||||
static SDValue
|
static SDValue
|
||||||
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
|
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
|
||||||
if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
|
EVT VT = SVOp->getValueType(0);
|
||||||
// If each half of a vector shuffle node referes to only one lane in the
|
|
||||||
// source vectors, extract each used 128-bit lane and shuffle them using
|
|
||||||
// 128-bit shuffles. Then, concatenate the results. Otherwise leave
|
|
||||||
// the work to the legalizer.
|
|
||||||
DebugLoc dl = SVOp->getDebugLoc();
|
|
||||||
EVT VT = SVOp->getValueType(0);
|
|
||||||
int NumElems = VT.getVectorNumElements();
|
|
||||||
int HalfSize = NumElems/2;
|
|
||||||
|
|
||||||
// Extract the reference for each half
|
unsigned NumElems = VT.getVectorNumElements();
|
||||||
int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
|
unsigned NumLaneElems = NumElems / 2;
|
||||||
int FstVecOpNum = 0, SndVecOpNum = 0;
|
|
||||||
for (int i = 0; i < HalfSize; ++i) {
|
int MinRange[2][2] = { { static_cast<int>(NumElems),
|
||||||
int Elt = SVOp->getMaskElt(i);
|
static_cast<int>(NumElems) },
|
||||||
if (SVOp->getMaskElt(i) < 0)
|
{ static_cast<int>(NumElems),
|
||||||
|
static_cast<int>(NumElems) } };
|
||||||
|
int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } };
|
||||||
|
|
||||||
|
// Collect used ranges for each source in each lane
|
||||||
|
for (unsigned l = 0; l < 2; ++l) {
|
||||||
|
unsigned LaneStart = l*NumLaneElems;
|
||||||
|
for (unsigned i = 0; i != NumLaneElems; ++i) {
|
||||||
|
int Idx = SVOp->getMaskElt(i+LaneStart);
|
||||||
|
if (Idx < 0)
|
||||||
continue;
|
continue;
|
||||||
FstVecOpNum = Elt/NumElems;
|
|
||||||
FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
for (int i = HalfSize; i < NumElems; ++i) {
|
|
||||||
int Elt = SVOp->getMaskElt(i);
|
|
||||||
if (SVOp->getMaskElt(i) < 0)
|
|
||||||
continue;
|
|
||||||
SndVecOpNum = Elt/NumElems;
|
|
||||||
SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract the subvectors
|
int Input = 0;
|
||||||
SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
|
if (Idx >= (int)NumElems) {
|
||||||
DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
|
Idx -= NumElems;
|
||||||
SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
|
Input = 1;
|
||||||
DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
|
}
|
||||||
|
|
||||||
// Generate 128-bit shuffles
|
if (Idx > MaxRange[l][Input])
|
||||||
SmallVector<int, 16> MaskV1, MaskV2;
|
MaxRange[l][Input] = Idx;
|
||||||
for (int i = 0; i < HalfSize; ++i) {
|
if (Idx < MinRange[l][Input])
|
||||||
int Elt = SVOp->getMaskElt(i);
|
MinRange[l][Input] = Idx;
|
||||||
MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
|
|
||||||
}
|
}
|
||||||
for (int i = HalfSize; i < NumElems; ++i) {
|
|
||||||
int Elt = SVOp->getMaskElt(i);
|
|
||||||
MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
EVT NVT = V1.getValueType();
|
|
||||||
V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
|
|
||||||
V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
|
|
||||||
|
|
||||||
// Concatenate the result back
|
|
||||||
SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
|
|
||||||
DAG.getConstant(0, MVT::i32), DAG, dl);
|
|
||||||
return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
|
|
||||||
DAG, dl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
// Make sure each range is 128-bits
|
||||||
|
int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } };
|
||||||
|
for (unsigned l = 0; l < 2; ++l) {
|
||||||
|
for (unsigned Input = 0; Input < 2; ++Input) {
|
||||||
|
if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (MinRange[l][Input] >= 0 && MinRange[l][Input] < (int)NumLaneElems)
|
||||||
|
ExtractIdx[l][Input] = 0;
|
||||||
|
else if (MinRange[l][Input] >= (int)NumLaneElems &&
|
||||||
|
MinRange[l][Input] < (int)NumElems)
|
||||||
|
ExtractIdx[l][Input] = NumLaneElems;
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugLoc dl = SVOp->getDebugLoc();
|
||||||
|
MVT EltVT = VT.getVectorElementType().getSimpleVT();
|
||||||
|
EVT NVT = MVT::getVectorVT(EltVT, NumElems/2);
|
||||||
|
|
||||||
|
SDValue Ops[2][2];
|
||||||
|
for (unsigned l = 0; l < 2; ++l) {
|
||||||
|
for (unsigned Input = 0; Input < 2; ++Input) {
|
||||||
|
if (ExtractIdx[l][Input] >= 0)
|
||||||
|
Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input),
|
||||||
|
DAG.getConstant(ExtractIdx[l][Input], MVT::i32),
|
||||||
|
DAG, dl);
|
||||||
|
else
|
||||||
|
Ops[l][Input] = DAG.getUNDEF(NVT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate 128-bit shuffles
|
||||||
|
SmallVector<int, 16> Mask1, Mask2;
|
||||||
|
for (unsigned i = 0; i != NumLaneElems; ++i) {
|
||||||
|
int Elt = SVOp->getMaskElt(i);
|
||||||
|
if (Elt >= (int)NumElems) {
|
||||||
|
Elt %= NumLaneElems;
|
||||||
|
Elt += NumLaneElems;
|
||||||
|
} else if (Elt >= 0) {
|
||||||
|
Elt %= NumLaneElems;
|
||||||
|
}
|
||||||
|
Mask1.push_back(Elt);
|
||||||
|
}
|
||||||
|
for (unsigned i = NumLaneElems; i != NumElems; ++i) {
|
||||||
|
int Elt = SVOp->getMaskElt(i);
|
||||||
|
if (Elt >= (int)NumElems) {
|
||||||
|
Elt %= NumLaneElems;
|
||||||
|
Elt += NumLaneElems;
|
||||||
|
} else if (Elt >= 0) {
|
||||||
|
Elt %= NumLaneElems;
|
||||||
|
}
|
||||||
|
Mask2.push_back(Elt);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]);
|
||||||
|
SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]);
|
||||||
|
|
||||||
|
// Concatenate the result back
|
||||||
|
SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1,
|
||||||
|
DAG.getConstant(0, MVT::i32), DAG, dl);
|
||||||
|
return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32),
|
||||||
|
DAG, dl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
|
/// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
|
||||||
|
Loading…
x
Reference in New Issue
Block a user