mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-20 12:31:40 +00:00
Merge detecting and handling for VSHUFPSY and VSHUFPDY since a lot of the code was similar for both.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145199 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
064caf9f07
commit
9d7025b56b
@ -3241,17 +3241,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
|
/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||||
/// specifies a shuffle of elements that is suitable for input to 256-bit
|
/// specifies a shuffle of elements that is suitable for input to 256-bit
|
||||||
/// VSHUFPSY.
|
/// VSHUFPSY.
|
||||||
static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||||
const X86Subtarget *Subtarget) {
|
const X86Subtarget *Subtarget) {
|
||||||
int NumElems = VT.getVectorNumElements();
|
int NumElems = VT.getVectorNumElements();
|
||||||
|
|
||||||
if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
|
if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (NumElems != 8)
|
if (NumElems != 4 && NumElems != 8)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// VSHUFPSY divides the resulting vector into 4 chunks.
|
// VSHUFPSY divides the resulting vector into 4 chunks.
|
||||||
@ -3264,6 +3264,15 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
// DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
|
// DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
|
||||||
// Y3..Y0, Y3..Y0, X3..X0, X3..X0
|
// Y3..Y0, Y3..Y0, X3..X0, X3..X0
|
||||||
//
|
//
|
||||||
|
// VSHUFPDY divides the resulting vector into 4 chunks.
|
||||||
|
// The sources are also splitted into 4 chunks, and each destination
|
||||||
|
// chunk must come from a different source chunk.
|
||||||
|
//
|
||||||
|
// SRC1 => X3 X2 X1 X0
|
||||||
|
// SRC2 => Y3 Y2 Y1 Y0
|
||||||
|
//
|
||||||
|
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
|
||||||
|
//
|
||||||
int QuarterSize = NumElems/4;
|
int QuarterSize = NumElems/4;
|
||||||
int HalfSize = QuarterSize*2;
|
int HalfSize = QuarterSize*2;
|
||||||
for (int i = 0; i < QuarterSize; ++i)
|
for (int i = 0; i < QuarterSize; ++i)
|
||||||
@ -3273,12 +3282,15 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
|
if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// The mask of the second half must be the same as the first but with
|
// For VSHUFPSY, the mask of the second half must be the same as the first
|
||||||
// the appropriate offsets. This works in the same way as VPERMILPS
|
// but with // the appropriate offsets. This works in the same way as
|
||||||
// works with masks.
|
// VPERMILPS // works with masks.
|
||||||
for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
|
for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
|
||||||
if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
|
if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
|
||||||
return false;
|
return false;
|
||||||
|
if (NumElems == 4)
|
||||||
|
continue;
|
||||||
|
// VSHUFPSY handling
|
||||||
int FstHalfIdx = i-HalfSize;
|
int FstHalfIdx = i-HalfSize;
|
||||||
if (Mask[FstHalfIdx] < 0)
|
if (Mask[FstHalfIdx] < 0)
|
||||||
continue;
|
continue;
|
||||||
@ -3289,6 +3301,9 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
|
if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
|
||||||
return false;
|
return false;
|
||||||
int FstHalfIdx = i-HalfSize;
|
int FstHalfIdx = i-HalfSize;
|
||||||
|
if (NumElems == 4)
|
||||||
|
continue;
|
||||||
|
// VSHUFPSY handling
|
||||||
if (Mask[FstHalfIdx] < 0)
|
if (Mask[FstHalfIdx] < 0)
|
||||||
continue;
|
continue;
|
||||||
if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
|
if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
|
||||||
@ -3299,89 +3314,28 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
|
/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle
|
||||||
/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
|
/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions.
|
||||||
static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
|
static unsigned getShuffleVSHUFPYImmediate(SDNode *N) {
|
||||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||||
EVT VT = SVOp->getValueType(0);
|
EVT VT = SVOp->getValueType(0);
|
||||||
int NumElems = VT.getVectorNumElements();
|
int NumElems = VT.getVectorNumElements();
|
||||||
|
|
||||||
assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
|
assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types");
|
||||||
"Only supports v8i32 and v8f32 types");
|
assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types");
|
||||||
|
|
||||||
int HalfSize = NumElems/2;
|
int HalfSize = NumElems/2;
|
||||||
|
unsigned Mul = (NumElems == 8) ? 2 : 1;
|
||||||
unsigned Mask = 0;
|
unsigned Mask = 0;
|
||||||
for (int i = 0; i != NumElems ; ++i) {
|
for (int i = 0; i != NumElems ; ++i) {
|
||||||
if (SVOp->getMaskElt(i) < 0)
|
int Elt = SVOp->getMaskElt(i);
|
||||||
|
if (Elt < 0)
|
||||||
continue;
|
continue;
|
||||||
// The mask of the first half must be equal to the second one.
|
Elt %= HalfSize;
|
||||||
unsigned Shamt = (i%HalfSize)*2;
|
unsigned Shamt = i;
|
||||||
unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
|
// For VSHUFPSY, the mask of the first half must be equal to the second one.
|
||||||
Mask |= Elt << Shamt;
|
if (NumElems == 8) Shamt %= HalfSize;
|
||||||
}
|
Mask |= Elt << (Shamt*Mul);
|
||||||
|
|
||||||
return Mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
|
|
||||||
/// specifies a shuffle of elements that is suitable for input to 256-bit
|
|
||||||
/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
|
|
||||||
/// version and the mask of the second half isn't binded with the first
|
|
||||||
/// one.
|
|
||||||
static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|
||||||
const X86Subtarget *Subtarget) {
|
|
||||||
int NumElems = VT.getVectorNumElements();
|
|
||||||
|
|
||||||
if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (NumElems != 4)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// VSHUFPSY divides the resulting vector into 4 chunks.
|
|
||||||
// The sources are also splitted into 4 chunks, and each destination
|
|
||||||
// chunk must come from a different source chunk.
|
|
||||||
//
|
|
||||||
// SRC1 => X3 X2 X1 X0
|
|
||||||
// SRC2 => Y3 Y2 Y1 Y0
|
|
||||||
//
|
|
||||||
// DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
|
|
||||||
//
|
|
||||||
int QuarterSize = NumElems/4;
|
|
||||||
int HalfSize = QuarterSize*2;
|
|
||||||
for (int i = 0; i < QuarterSize; ++i)
|
|
||||||
if (!isUndefOrInRange(Mask[i], 0, HalfSize))
|
|
||||||
return false;
|
|
||||||
for (int i = QuarterSize; i < QuarterSize*2; ++i)
|
|
||||||
if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
|
|
||||||
return false;
|
|
||||||
for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
|
|
||||||
if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
|
|
||||||
return false;
|
|
||||||
for (int i = QuarterSize*3; i < NumElems; ++i)
|
|
||||||
if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
|
|
||||||
/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
|
|
||||||
static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
|
|
||||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
|
||||||
EVT VT = SVOp->getValueType(0);
|
|
||||||
int NumElems = VT.getVectorNumElements();
|
|
||||||
|
|
||||||
assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
|
|
||||||
"Only supports v4i64 and v4f64 types");
|
|
||||||
|
|
||||||
int HalfSize = NumElems/2;
|
|
||||||
unsigned Mask = 0;
|
|
||||||
for (int i = 0; i != NumElems ; ++i) {
|
|
||||||
if (SVOp->getMaskElt(i) < 0)
|
|
||||||
continue;
|
|
||||||
int Elt = SVOp->getMaskElt(i) % HalfSize;
|
|
||||||
Mask |= Elt << i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Mask;
|
return Mask;
|
||||||
@ -3417,8 +3371,7 @@ static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
|||||||
CommutedMask.push_back(Mask[i]);
|
CommutedMask.push_back(Mask[i]);
|
||||||
|
|
||||||
CommuteVectorShuffleMask(CommutedMask, VT);
|
CommuteVectorShuffleMask(CommutedMask, VT);
|
||||||
return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget):
|
return isVSHUFPYMask(CommutedMask, VT, Subtarget);
|
||||||
isVSHUFPSYMask(CommutedMask, VT, Subtarget);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -6917,14 +6870,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
getShuffleVPERM2F128Immediate(SVOp), DAG);
|
getShuffleVPERM2F128Immediate(SVOp), DAG);
|
||||||
|
|
||||||
// Handle VSHUFPSY permutations
|
// Handle VSHUFPSY permutations
|
||||||
if (isVSHUFPSYMask(M, VT, Subtarget))
|
if (isVSHUFPYMask(M, VT, Subtarget))
|
||||||
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
|
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
|
||||||
getShuffleVSHUFPSYImmediate(SVOp), DAG);
|
getShuffleVSHUFPYImmediate(SVOp), DAG);
|
||||||
|
|
||||||
// Handle VSHUFPDY permutations
|
|
||||||
if (isVSHUFPDYMask(M, VT, Subtarget))
|
|
||||||
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
|
|
||||||
getShuffleVSHUFPDYImmediate(SVOp), DAG);
|
|
||||||
|
|
||||||
// Try to swap operands in the node to match x86 shuffle ops
|
// Try to swap operands in the node to match x86 shuffle ops
|
||||||
if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
|
if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
|
||||||
@ -6932,9 +6880,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
|
SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
|
||||||
V1 = SVOp->getOperand(0);
|
V1 = SVOp->getOperand(0);
|
||||||
V2 = SVOp->getOperand(1);
|
V2 = SVOp->getOperand(1);
|
||||||
unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp):
|
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
|
||||||
getShuffleVSHUFPSYImmediate(SVOp);
|
getShuffleVSHUFPYImmediate(SVOp), DAG);
|
||||||
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
Loading…
x
Reference in New Issue
Block a user