mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-13 09:33:50 +00:00
PR2957
ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes as the shuffle mask. A value of -1 represents UNDEF. In addition to eliminating the creation of illegal BUILD_VECTORS just to represent shuffle masks, we are better about canonicalizing the shuffle mask, resulting in substantially better code for some classes of shuffles. A clean up of x86 shuffle code, and some canonicalizing in DAGCombiner is next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69952 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
98d07102d6
commit
b706d29f9c
@ -353,6 +353,13 @@ public:
|
||||
SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
|
||||
SDValue STy,
|
||||
SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
|
||||
|
||||
/// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of
|
||||
/// elements in VT, which must be a vector type, must match the number of
|
||||
/// mask elements NumElts. A negative integer mask element is treated as
|
||||
/// undefined.
|
||||
SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
|
||||
const int *MaskElts);
|
||||
|
||||
/// getZeroExtendInReg - Return the expression required to zero extend the Op
|
||||
/// value assuming it was the smaller SrcTy value.
|
||||
|
@ -1703,6 +1703,32 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class ShuffleVectorSDNode : public SDNode {
|
||||
SDUse Ops[2];
|
||||
int *Mask;
|
||||
protected:
|
||||
friend class SelectionDAG;
|
||||
ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, int *M)
|
||||
: SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) {
|
||||
InitOperands(Ops, N1, N2);
|
||||
}
|
||||
public:
|
||||
|
||||
const int * getMask() const { return Mask; }
|
||||
|
||||
bool isSplat() { return isSplatMask(Mask, getValueType(0)); }
|
||||
int getSplatIndex() {
|
||||
assert(isSplat() && "Cannot get splat index for non-splat!");
|
||||
return Mask[0];
|
||||
}
|
||||
static bool isSplatMask(const int *Mask, MVT VT);
|
||||
|
||||
static bool classof(const ShuffleVectorSDNode *) { return true; }
|
||||
static bool classof(const SDNode *N) {
|
||||
return N->getOpcode() == ISD::VECTOR_SHUFFLE;
|
||||
}
|
||||
};
|
||||
|
||||
class ConstantSDNode : public SDNode {
|
||||
const ConstantInt *Value;
|
||||
friend class SelectionDAG;
|
||||
@ -2084,7 +2110,7 @@ public:
|
||||
return N->getOpcode() == ISD::CONDCODE;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the
|
||||
/// future and most targets don't support it.
|
||||
class CvtRndSatSDNode : public SDNode {
|
||||
|
@ -328,7 +328,7 @@ public:
|
||||
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
||||
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
|
||||
/// are assumed to be legal.
|
||||
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const {
|
||||
virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -336,9 +336,7 @@ public:
|
||||
/// used by Targets can use this to indicate if there is a suitable
|
||||
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
|
||||
/// pool entry.
|
||||
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
|
||||
MVT EVT,
|
||||
SelectionDAG &DAG) const {
|
||||
virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -51,15 +51,6 @@ class SDTCisOpSmallerThanOp<int SmallOp, int BigOp> : SDTypeConstraint<SmallOp>{
|
||||
int BigOperandNum = BigOp;
|
||||
}
|
||||
|
||||
/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are
|
||||
/// vector types, and that ThisOp is the result of
|
||||
/// MVT::getIntVectorWithNumElements with the number of elements
|
||||
/// that ThisOp has.
|
||||
class SDTCisIntVectorOfSameSize<int ThisOp, int OtherOp>
|
||||
: SDTypeConstraint<ThisOp> {
|
||||
int OtherOpNum = OtherOp;
|
||||
}
|
||||
|
||||
/// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same
|
||||
/// type as the element type of OtherOp, which is a vector type.
|
||||
class SDTCisEltOfVec<int ThisOp, int OtherOp>
|
||||
@ -175,8 +166,8 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
|
||||
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
|
||||
]>;
|
||||
|
||||
def SDTVecShuffle : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0>
|
||||
def SDTVecShuffle : SDTypeProfile<1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract
|
||||
SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2>
|
||||
|
@ -5098,7 +5098,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
InVec.getValueType(), &Ops[0], Ops.size());
|
||||
}
|
||||
// If the invec is an UNDEF and if EltNo is a constant, create a new
|
||||
// BUILD_VECTOR with undef elements and the inserted element.
|
||||
if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
|
||||
isa<ConstantSDNode>(EltNo)) {
|
||||
MVT VT = InVec.getValueType();
|
||||
MVT EVT = VT.getVectorElementType();
|
||||
unsigned NElts = VT.getVectorNumElements();
|
||||
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
|
||||
|
||||
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
||||
if (Elt < Ops.size())
|
||||
Ops[Elt] = InVal;
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
InVec.getValueType(), &Ops[0], Ops.size());
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -5160,9 +5174,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
// to examine the mask.
|
||||
if (BCNumEltsChanged)
|
||||
return SDValue();
|
||||
unsigned Idx = cast<ConstantSDNode>(InVec.getOperand(2).
|
||||
getOperand(Elt))->getZExtValue();
|
||||
unsigned NumElems = InVec.getOperand(2).getNumOperands();
|
||||
int Idx = cast<ShuffleVectorSDNode>(InVec)->getMask()[Elt];
|
||||
int NumElems = InVec.getValueType().getVectorNumElements();
|
||||
InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
|
||||
if (InVec.getOpcode() == ISD::BIT_CONVERT)
|
||||
InVec = InVec.getOperand(0);
|
||||
@ -5209,7 +5222,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
unsigned NumInScalars = N->getNumOperands();
|
||||
MVT VT = N->getValueType(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
MVT EltType = VT.getVectorElementType();
|
||||
|
||||
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
|
||||
@ -5252,56 +5264,36 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
}
|
||||
|
||||
// If everything is good, we can make a shuffle operation.
|
||||
MVT IndexVT = MVT::i32;
|
||||
if (VecIn1.getNode()) {
|
||||
SmallVector<SDValue, 8> BuildVecIndices;
|
||||
SmallVector<int, 8> Mask;
|
||||
for (unsigned i = 0; i != NumInScalars; ++i) {
|
||||
if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
|
||||
BuildVecIndices.push_back(DAG.getUNDEF(IndexVT));
|
||||
Mask.push_back(-1);
|
||||
continue;
|
||||
}
|
||||
|
||||
SDValue Extract = N->getOperand(i);
|
||||
|
||||
// If extracting from the first vector, just use the index directly.
|
||||
SDValue Extract = N->getOperand(i);
|
||||
SDValue ExtVal = Extract.getOperand(1);
|
||||
if (Extract.getOperand(0) == VecIn1) {
|
||||
if (ExtVal.getValueType() == IndexVT)
|
||||
BuildVecIndices.push_back(ExtVal);
|
||||
else {
|
||||
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
|
||||
BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT));
|
||||
}
|
||||
Mask.push_back(cast<ConstantSDNode>(ExtVal)->getZExtValue());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, use InIdx + VecSize
|
||||
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
|
||||
BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT));
|
||||
Mask.push_back(Idx+NumInScalars);
|
||||
}
|
||||
|
||||
// Add count and size info.
|
||||
MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts);
|
||||
if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes)
|
||||
if (!TLI.isTypeLegal(VT) && LegalTypes)
|
||||
return SDValue();
|
||||
|
||||
// Return the new VECTOR_SHUFFLE node.
|
||||
SDValue Ops[5];
|
||||
SDValue Ops[2];
|
||||
Ops[0] = VecIn1;
|
||||
if (VecIn2.getNode()) {
|
||||
Ops[1] = VecIn2;
|
||||
} else {
|
||||
// Use an undef build_vector as input for the second operand.
|
||||
std::vector<SDValue> UnOps(NumInScalars,
|
||||
DAG.getUNDEF(EltType));
|
||||
Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
|
||||
&UnOps[0], UnOps.size());
|
||||
AddToWorkList(Ops[1].getNode());
|
||||
}
|
||||
|
||||
Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT,
|
||||
&BuildVecIndices[0], BuildVecIndices.size());
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3);
|
||||
Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
|
||||
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
@ -5321,8 +5313,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
SDValue ShufMask = N->getOperand(2);
|
||||
unsigned NumElts = ShufMask.getNumOperands();
|
||||
return SDValue();
|
||||
|
||||
MVT VT = N->getValueType(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
@ -5330,60 +5324,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
assert(N0.getValueType().getVectorNumElements() == NumElts &&
|
||||
"Vector shuffle must be normalized in DAG");
|
||||
|
||||
// If the shuffle mask is an identity operation on the LHS, return the LHS.
|
||||
bool isIdentity = true;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
|
||||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() != i) {
|
||||
isIdentity = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isIdentity) return N->getOperand(0);
|
||||
|
||||
// If the shuffle mask is an identity operation on the RHS, return the RHS.
|
||||
isIdentity = true;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
|
||||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() !=
|
||||
i+NumElts) {
|
||||
isIdentity = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isIdentity) return N->getOperand(1);
|
||||
|
||||
// Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
|
||||
// needed at all.
|
||||
bool isUnary = true;
|
||||
bool isSplat = true;
|
||||
int VecNum = -1;
|
||||
unsigned BaseIdx = 0;
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
|
||||
unsigned Idx=cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue();
|
||||
int V = (Idx < NumElts) ? 0 : 1;
|
||||
if (VecNum == -1) {
|
||||
VecNum = V;
|
||||
BaseIdx = Idx;
|
||||
} else {
|
||||
if (BaseIdx != Idx)
|
||||
isSplat = false;
|
||||
if (VecNum != V) {
|
||||
isUnary = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize unary shuffle so the RHS is undef.
|
||||
if (isUnary && VecNum == 1)
|
||||
std::swap(N0, N1);
|
||||
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
|
||||
|
||||
// If it is a splat, check if the argument vector is a build_vector with
|
||||
// all scalar elements the same.
|
||||
if (isSplat) {
|
||||
if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
|
||||
SDNode *V = N0.getNode();
|
||||
|
||||
|
||||
// If this is a bit convert that changes the element type of the vector but
|
||||
// not the number of vector elements, look through it. Be careful not to
|
||||
@ -5397,6 +5344,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
|
||||
if (V->getOpcode() == ISD::BUILD_VECTOR) {
|
||||
unsigned NumElems = V->getNumOperands();
|
||||
unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
|
||||
if (NumElems > BaseIdx) {
|
||||
SDValue Base;
|
||||
bool AllSame = true;
|
||||
@ -5421,38 +5369,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If it is a unary or the LHS and the RHS are the same node, turn the RHS
|
||||
// into an undef.
|
||||
if (isUnary || N0 == N1) {
|
||||
// Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
|
||||
// first operand.
|
||||
SmallVector<SDValue, 8> MappedOps;
|
||||
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
|
||||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() <
|
||||
NumElts) {
|
||||
MappedOps.push_back(ShufMask.getOperand(i));
|
||||
} else {
|
||||
unsigned NewIdx =
|
||||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() -
|
||||
NumElts;
|
||||
MappedOps.push_back(DAG.getConstant(NewIdx,
|
||||
ShufMask.getOperand(i).getValueType()));
|
||||
}
|
||||
}
|
||||
|
||||
ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
ShufMask.getValueType(),
|
||||
&MappedOps[0], MappedOps.size());
|
||||
AddToWorkList(ShufMask.getNode());
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
|
||||
N->getValueType(0), N0,
|
||||
DAG.getUNDEF(N->getValueType(0)),
|
||||
ShufMask);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -5461,52 +5377,42 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
||||
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
|
||||
/// vector_shuffle V, Zero, <0, 4, 2, 4>
|
||||
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
||||
MVT VT = N->getValueType(0);
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
if (N->getOpcode() == ISD::AND) {
|
||||
if (RHS.getOpcode() == ISD::BIT_CONVERT)
|
||||
RHS = RHS.getOperand(0);
|
||||
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
|
||||
std::vector<SDValue> IdxOps;
|
||||
unsigned NumOps = RHS.getNumOperands();
|
||||
unsigned NumElts = NumOps;
|
||||
SmallVector<int, 8> Indices;
|
||||
unsigned NumElts = RHS.getNumOperands();
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
SDValue Elt = RHS.getOperand(i);
|
||||
if (!isa<ConstantSDNode>(Elt))
|
||||
return SDValue();
|
||||
else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
|
||||
IdxOps.push_back(DAG.getIntPtrConstant(i));
|
||||
Indices.push_back(i);
|
||||
else if (cast<ConstantSDNode>(Elt)->isNullValue())
|
||||
IdxOps.push_back(DAG.getIntPtrConstant(NumElts));
|
||||
Indices.push_back(NumElts);
|
||||
else
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Let's see if the target supports this vector_shuffle.
|
||||
if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG))
|
||||
MVT RVT = RHS.getValueType();
|
||||
if (!TLI.isVectorClearMaskLegal(&Indices[0], RVT))
|
||||
return SDValue();
|
||||
|
||||
// Return the new VECTOR_SHUFFLE node.
|
||||
MVT EVT = RHS.getValueType().getVectorElementType();
|
||||
MVT VT = MVT::getVectorVT(EVT, NumElts);
|
||||
MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts);
|
||||
std::vector<SDValue> Ops;
|
||||
LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS);
|
||||
Ops.push_back(LHS);
|
||||
AddToWorkList(LHS.getNode());
|
||||
std::vector<SDValue> ZeroOps(NumElts, DAG.getConstant(0, EVT));
|
||||
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
VT, &ZeroOps[0], ZeroOps.size()));
|
||||
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
MaskVT, &IdxOps[0], IdxOps.size()));
|
||||
SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
|
||||
VT, &Ops[0], Ops.size());
|
||||
|
||||
if (VT != N->getValueType(0))
|
||||
Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
|
||||
N->getValueType(0), Result);
|
||||
|
||||
return Result;
|
||||
MVT EVT = RVT.getVectorElementType();
|
||||
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
|
||||
DAG.getConstant(0, EVT));
|
||||
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
|
||||
RVT, &ZeroOps[0], ZeroOps.size());
|
||||
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
|
||||
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -267,16 +267,10 @@ private:
|
||||
bool isVolatile, SDValue ValOp,
|
||||
unsigned StWidth, DebugLoc dl);
|
||||
|
||||
/// isShuffleLegal - Return non-null if a vector shuffle is legal with the
|
||||
/// specified mask and type. Targets can specify exactly which masks they
|
||||
/// support and the code generator is tasked with not creating illegal masks.
|
||||
///
|
||||
/// Note that this will also return true for shuffles that are promoted to a
|
||||
/// different type.
|
||||
///
|
||||
/// If this is a legal shuffle, this method returns the (possibly promoted)
|
||||
/// build_vector Mask. If it's not a legal shuffle, it returns null.
|
||||
SDNode *isShuffleLegal(MVT VT, SDValue Mask) const;
|
||||
/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
|
||||
/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
|
||||
SDValue promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
|
||||
const int *Mask) const;
|
||||
|
||||
bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
|
||||
SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
|
||||
@ -319,50 +313,35 @@ private:
|
||||
};
|
||||
}
|
||||
|
||||
/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the
|
||||
/// specified mask and type. Targets can specify exactly which masks they
|
||||
/// support and the code generator is tasked with not creating illegal masks.
|
||||
///
|
||||
/// Note that this will also return true for shuffles that are promoted to a
|
||||
/// different type.
|
||||
SDNode *SelectionDAGLegalize::isShuffleLegal(MVT VT, SDValue Mask) const {
|
||||
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
|
||||
default: return 0;
|
||||
case TargetLowering::Legal:
|
||||
case TargetLowering::Custom:
|
||||
break;
|
||||
case TargetLowering::Promote: {
|
||||
// If this is promoted to a different type, convert the shuffle mask and
|
||||
// ask if it is legal in the promoted type!
|
||||
MVT NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
|
||||
MVT EltVT = NVT.getVectorElementType();
|
||||
/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
|
||||
/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
|
||||
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
|
||||
SDValue SelectionDAGLegalize::promoteShuffle(MVT NVT, MVT VT, DebugLoc dl,
|
||||
SDValue N1, SDValue N2,
|
||||
const int *Mask) const {
|
||||
MVT EltVT = NVT.getVectorElementType();
|
||||
int NumMaskElts = VT.getVectorNumElements();
|
||||
int NumDestElts = NVT.getVectorNumElements();
|
||||
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
|
||||
|
||||
// If we changed # elements, change the shuffle mask.
|
||||
unsigned NumEltsGrowth =
|
||||
NVT.getVectorNumElements() / VT.getVectorNumElements();
|
||||
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
|
||||
if (NumEltsGrowth > 1) {
|
||||
// Renumber the elements.
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) {
|
||||
SDValue InOp = Mask.getOperand(i);
|
||||
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
|
||||
if (InOp.getOpcode() == ISD::UNDEF)
|
||||
Ops.push_back(DAG.getUNDEF(EltVT));
|
||||
else {
|
||||
unsigned InEltNo = cast<ConstantSDNode>(InOp)->getZExtValue();
|
||||
Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, EltVT));
|
||||
}
|
||||
}
|
||||
}
|
||||
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(),
|
||||
NVT, &Ops[0], Ops.size());
|
||||
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
|
||||
|
||||
if (NumEltsGrowth == 1)
|
||||
return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
|
||||
|
||||
SmallVector<int, 8> NewMask;
|
||||
for (int i = 0; i != NumMaskElts; ++i) {
|
||||
int Idx = Mask[i];
|
||||
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
|
||||
if (Idx < 0)
|
||||
NewMask.push_back(-1);
|
||||
else
|
||||
NewMask.push_back(Idx * NumEltsGrowth + j);
|
||||
}
|
||||
VT = NVT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.getNode() : 0;
|
||||
assert((int)NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
|
||||
assert(TLI.isShuffleMaskLegal(&Mask[0], NVT) && "Shuffle not legal?");
|
||||
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
|
||||
}
|
||||
|
||||
SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
|
||||
@ -1652,25 +1631,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
|
||||
Tmp1.getValueType(), Tmp2);
|
||||
|
||||
unsigned NumElts = Tmp1.getValueType().getVectorNumElements();
|
||||
MVT ShufMaskVT =
|
||||
MVT::getIntVectorWithNumElements(NumElts);
|
||||
MVT ShufMaskEltVT = ShufMaskVT.getVectorElementType();
|
||||
|
||||
// We generate a shuffle of InVec and ScVec, so the shuffle mask
|
||||
// should be 0,1,2,3,4,5... with the appropriate element replaced with
|
||||
// elt 0 of the RHS.
|
||||
SmallVector<SDValue, 8> ShufOps;
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (i != InsertPos->getZExtValue())
|
||||
ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
|
||||
else
|
||||
ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT));
|
||||
}
|
||||
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, ShufMaskVT,
|
||||
&ShufOps[0], ShufOps.size());
|
||||
|
||||
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Tmp1.getValueType(),
|
||||
Tmp1, ScVec, ShufMask);
|
||||
SmallVector<int, 8> ShufOps;
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
|
||||
|
||||
Result = DAG.getVectorShuffle(Tmp1.getValueType(), dl, Tmp1, ScVec,
|
||||
&ShufOps[0]);
|
||||
Result = LegalizeOp(Result);
|
||||
break;
|
||||
}
|
||||
@ -1708,13 +1677,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors,
|
||||
Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask.
|
||||
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
|
||||
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
|
||||
|
||||
// Allow targets to custom lower the SHUFFLEs they support.
|
||||
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){
|
||||
default: assert(0 && "Unknown operation action!");
|
||||
case TargetLowering::Legal:
|
||||
assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) &&
|
||||
assert(TLI.isShuffleMaskLegal(cast<ShuffleVectorSDNode>(Node)->getMask(),
|
||||
Result.getValueType()) &&
|
||||
"vector shuffle should not be created if not legal!");
|
||||
break;
|
||||
case TargetLowering::Custom:
|
||||
@ -1728,23 +1698,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
|
||||
MVT VT = Node->getValueType(0);
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
MVT PtrVT = TLI.getPointerTy();
|
||||
SDValue Mask = Node->getOperand(2);
|
||||
unsigned NumElems = Mask.getNumOperands();
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i != NumElems; ++i) {
|
||||
SDValue Arg = Mask.getOperand(i);
|
||||
if (Arg.getOpcode() == ISD::UNDEF) {
|
||||
for (int i = 0; i != NumElems; ++i) {
|
||||
if (Mask[i] < 0) {
|
||||
Ops.push_back(DAG.getUNDEF(EltVT));
|
||||
} else {
|
||||
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
if (Idx < NumElems)
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
|
||||
DAG.getConstant(Idx, PtrVT)));
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
|
||||
DAG.getConstant(Idx - NumElems, PtrVT)));
|
||||
continue;
|
||||
}
|
||||
int Idx = Mask[i];
|
||||
if (Idx < NumElems)
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
|
||||
DAG.getConstant(Idx, PtrVT)));
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
|
||||
DAG.getConstant(Idx - NumElems, PtrVT)));
|
||||
}
|
||||
Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
|
||||
break;
|
||||
@ -1759,9 +1727,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
|
||||
Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2);
|
||||
|
||||
// Convert the shuffle mask to the right # elements.
|
||||
Tmp3 = SDValue(isShuffleLegal(OVT, Node->getOperand(2)), 0);
|
||||
assert(Tmp3.getNode() && "Shuffle not legal?");
|
||||
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NVT, Tmp1, Tmp2, Tmp3);
|
||||
Result = promoteShuffle(NVT, OVT, dl, Tmp1, Tmp2,
|
||||
cast<ShuffleVectorSDNode>(Node)->getMask());
|
||||
Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result);
|
||||
break;
|
||||
}
|
||||
@ -5490,6 +5457,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
|
||||
|
||||
// FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
|
||||
// and use a bitmask instead of a list of elements.
|
||||
// FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
|
||||
std::map<SDValue, std::vector<unsigned> > Values;
|
||||
Values[SplatValue].push_back(0);
|
||||
bool isConstant = true;
|
||||
@ -5546,21 +5514,17 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
|
||||
|
||||
if (SplatValue.getNode()) { // Splat of one value?
|
||||
// Build the shuffle constant vector: <0, 0, 0, 0>
|
||||
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
|
||||
SDValue Zero = DAG.getConstant(0, MaskVT.getVectorElementType());
|
||||
std::vector<SDValue> ZeroVec(NumElems, Zero);
|
||||
SDValue SplatMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
|
||||
&ZeroVec[0], ZeroVec.size());
|
||||
SmallVector<int, 8> ZeroVec(NumElems, 0);
|
||||
|
||||
// If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
|
||||
if (isShuffleLegal(VT, SplatMask)) {
|
||||
if (TLI.isShuffleMaskLegal(&ZeroVec[0], Node->getValueType(0))) {
|
||||
// Get the splatted value into the low element of a vector register.
|
||||
SDValue LowValVec =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
|
||||
|
||||
// Return shuffle(LowValVec, undef, <0,0,0,0>)
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LowValVec,
|
||||
DAG.getUNDEF(VT), SplatMask);
|
||||
return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
|
||||
&ZeroVec[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5582,35 +5546,25 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
|
||||
std::swap(Val1, Val2);
|
||||
|
||||
// Build the shuffle constant vector: e.g. <0, 4, 0, 4>
|
||||
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
|
||||
MVT MaskEltVT = MaskVT.getVectorElementType();
|
||||
std::vector<SDValue> MaskVec(NumElems);
|
||||
SmallVector<int, 8> ShuffleMask(NumElems, -1);
|
||||
|
||||
// Set elements of the shuffle mask for Val1.
|
||||
std::vector<unsigned> &Val1Elts = Values[Val1];
|
||||
for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
|
||||
MaskVec[Val1Elts[i]] = DAG.getConstant(0, MaskEltVT);
|
||||
ShuffleMask[Val1Elts[i]] = 0;
|
||||
|
||||
// Set elements of the shuffle mask for Val2.
|
||||
std::vector<unsigned> &Val2Elts = Values[Val2];
|
||||
for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
|
||||
if (Val2.getOpcode() != ISD::UNDEF)
|
||||
MaskVec[Val2Elts[i]] = DAG.getConstant(NumElems, MaskEltVT);
|
||||
else
|
||||
MaskVec[Val2Elts[i]] = DAG.getUNDEF(MaskEltVT);
|
||||
|
||||
SDValue ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
|
||||
&MaskVec[0], MaskVec.size());
|
||||
ShuffleMask[Val2Elts[i]] = NumElems;
|
||||
|
||||
// If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
|
||||
if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
|
||||
isShuffleLegal(VT, ShuffleMask)) {
|
||||
TLI.isShuffleMaskLegal(&ShuffleMask[0], VT)) {
|
||||
Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
|
||||
Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
|
||||
SDValue Ops[] = { Val1, Val2, ShuffleMask };
|
||||
|
||||
// Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Ops, 3);
|
||||
return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -8066,36 +8020,19 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
|
||||
case ISD::VECTOR_SHUFFLE: {
|
||||
SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
|
||||
SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT);
|
||||
// VECTOR_SHUFFLE 3rd operand must be a constant build vector that is
|
||||
// used as permutation array. We build the vector here instead of widening
|
||||
// because we don't want to legalize and have it turned to something else.
|
||||
SDValue PermOp = Node->getOperand(2);
|
||||
SDValueVector NewOps;
|
||||
MVT PVT = PermOp.getValueType().getVectorElementType();
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
|
||||
SmallVector<int, 8> NewMask;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) {
|
||||
NewOps.push_back(PermOp.getOperand(i));
|
||||
} else {
|
||||
unsigned Idx =
|
||||
cast<ConstantSDNode>(PermOp.getOperand(i))->getZExtValue();
|
||||
if (Idx < NumElts) {
|
||||
NewOps.push_back(PermOp.getOperand(i));
|
||||
}
|
||||
else {
|
||||
NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts,
|
||||
PermOp.getOperand(i).getValueType()));
|
||||
}
|
||||
}
|
||||
int Idx = Mask[i];
|
||||
if (Idx < (int)NumElts)
|
||||
NewMask.push_back(Idx);
|
||||
else
|
||||
NewMask.push_back(Idx + NewNumElts - NumElts);
|
||||
}
|
||||
for (unsigned i = NumElts; i < NewNumElts; ++i) {
|
||||
NewOps.push_back(DAG.getUNDEF(PVT));
|
||||
}
|
||||
|
||||
SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::getVectorVT(PVT, NewOps.size()),
|
||||
&NewOps[0], NewOps.size());
|
||||
|
||||
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, Tmp1, Tmp2, Tmp3);
|
||||
for (unsigned i = NumElts; i < NewNumElts; ++i)
|
||||
NewMask.push_back(-1);
|
||||
|
||||
Result = DAG.getVectorShuffle(WidenVT, dl, Tmp1, Tmp2, &NewMask[0]);
|
||||
break;
|
||||
}
|
||||
case ISD::LOAD: {
|
||||
|
@ -772,10 +772,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
|
||||
// If Lo or Hi uses elements from at most two of the four input vectors, then
|
||||
// express it as a vector shuffle of those two inputs. Otherwise extract the
|
||||
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
|
||||
SDValue Mask = N->getOperand(2);
|
||||
MVT IdxVT = Mask.getValueType().getVectorElementType();
|
||||
SmallVector<SDValue, 16> Ops;
|
||||
Ops.reserve(NewElts);
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
|
||||
SmallVector<int, 16> Ops;
|
||||
for (unsigned High = 0; High < 2; ++High) {
|
||||
SDValue &Output = High ? Hi : Lo;
|
||||
|
||||
@ -787,18 +785,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
|
||||
unsigned FirstMaskIdx = High * NewElts;
|
||||
bool useBuildVector = false;
|
||||
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
|
||||
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
|
||||
|
||||
// The mask element. This indexes into the input.
|
||||
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
|
||||
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
int Idx = Mask[FirstMaskIdx + MaskOffset];
|
||||
|
||||
// The input vector this mask element indexes into.
|
||||
unsigned Input = Idx / NewElts;
|
||||
unsigned Input = (unsigned)Idx / NewElts;
|
||||
|
||||
if (Input >= array_lengthof(Inputs)) {
|
||||
// The mask element does not index into any input vector.
|
||||
Ops.push_back(DAG.getUNDEF(IdxVT));
|
||||
Ops.push_back(-1);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -826,27 +821,24 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
|
||||
}
|
||||
|
||||
// Add the mask index for the new shuffle vector.
|
||||
Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
|
||||
Ops.push_back(Idx + OpNo * NewElts);
|
||||
}
|
||||
|
||||
if (useBuildVector) {
|
||||
MVT EltVT = NewVT.getVectorElementType();
|
||||
Ops.clear();
|
||||
SmallVector<SDValue, 16> SVOps;
|
||||
|
||||
// Extract the input elements by hand.
|
||||
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
|
||||
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
|
||||
|
||||
// The mask element. This indexes into the input.
|
||||
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
|
||||
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
int Idx = Mask[FirstMaskIdx + MaskOffset];
|
||||
|
||||
// The input vector this mask element indexes into.
|
||||
unsigned Input = Idx / NewElts;
|
||||
unsigned Input = (unsigned)Idx / NewElts;
|
||||
|
||||
if (Input >= array_lengthof(Inputs)) {
|
||||
// The mask element is "undef" or indexes off the end of the input.
|
||||
Ops.push_back(DAG.getUNDEF(EltVT));
|
||||
SVOps.push_back(DAG.getUNDEF(EltVT));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -854,25 +846,22 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
|
||||
Idx -= Input * NewElts;
|
||||
|
||||
// Extract the vector element by hand.
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
|
||||
Inputs[Input], DAG.getIntPtrConstant(Idx)));
|
||||
SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
|
||||
Inputs[Input], DAG.getIntPtrConstant(Idx)));
|
||||
}
|
||||
|
||||
// Construct the Lo/Hi output using a BUILD_VECTOR.
|
||||
Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, &Ops[0], Ops.size());
|
||||
Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
|
||||
} else if (InputUsed[0] == -1U) {
|
||||
// No input vectors were used! The result is undefined.
|
||||
Output = DAG.getUNDEF(NewVT);
|
||||
} else {
|
||||
// At least one input vector was used. Create a new shuffle vector.
|
||||
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::getVectorVT(IdxVT, Ops.size()),
|
||||
&Ops[0], Ops.size());
|
||||
SDValue Op0 = Inputs[InputUsed[0]];
|
||||
// If only one input was used, use an undefined vector for the other.
|
||||
SDValue Op1 = InputUsed[1] == -1U ?
|
||||
DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
|
||||
Output = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, Op0, Op1, NewMask);
|
||||
// At least one input vector was used. Create a new shuffle vector.
|
||||
Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
|
||||
}
|
||||
|
||||
Ops.clear();
|
||||
@ -1473,18 +1462,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
||||
|
||||
if (NumOperands == 2) {
|
||||
// Replace concat of two operands with a shuffle.
|
||||
MVT PtrVT = TLI.getPointerTy();
|
||||
SmallVector<SDValue, 16> MaskOps(WidenNumElts);
|
||||
SmallVector<int, 16> MaskOps(WidenNumElts);
|
||||
for (unsigned i=0; i < WidenNumElts/2; ++i) {
|
||||
MaskOps[i] = DAG.getConstant(i, PtrVT);
|
||||
MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT);
|
||||
MaskOps[i] = i;
|
||||
MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
|
||||
}
|
||||
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::getVectorVT(PtrVT, WidenNumElts),
|
||||
&MaskOps[0], WidenNumElts);
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT,
|
||||
GetWidenedVector(N->getOperand(0)),
|
||||
GetWidenedVector(N->getOperand(1)), Mask);
|
||||
return DAG.getVectorShuffle(WidenVT, dl,
|
||||
GetWidenedVector(N->getOperand(0)),
|
||||
GetWidenedVector(N->getOperand(1)),
|
||||
&MaskOps[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1762,7 +1748,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
|
||||
MVT VT = N->getValueType(0);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
MVT WidenVT = TLI.getTypeToTransformTo(VT);
|
||||
@ -1772,28 +1758,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
|
||||
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
|
||||
|
||||
// Adjust mask based on new input vector length.
|
||||
SDValue Mask = N->getOperand(2);
|
||||
SmallVector<SDValue, 16> MaskOps(WidenNumElts);
|
||||
MVT IdxVT = Mask.getValueType().getVectorElementType();
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Arg = Mask.getOperand(i);
|
||||
if (Arg.getOpcode() == ISD::UNDEF)
|
||||
MaskOps[i] = Arg;
|
||||
else {
|
||||
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
if (Idx < NumElts)
|
||||
MaskOps[i] = Arg;
|
||||
else
|
||||
MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT);
|
||||
}
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
|
||||
SmallVector<int, 16> NewMask;
|
||||
for (int i = 0; i < NumElts; ++i) {
|
||||
if (Mask[i] < NumElts)
|
||||
NewMask.push_back(Mask[i]);
|
||||
else
|
||||
NewMask.push_back(Mask[i] - NumElts + WidenNumElts);
|
||||
}
|
||||
for (unsigned i = NumElts; i < WidenNumElts; ++i)
|
||||
MaskOps[i] = DAG.getUNDEF(IdxVT);
|
||||
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
MVT::getVectorVT(IdxVT, WidenNumElts),
|
||||
&MaskOps[0], WidenNumElts);
|
||||
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, InOp1, InOp2, NewMask);
|
||||
NewMask.push_back(-1);
|
||||
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
|
||||
|
@ -456,6 +456,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
|
||||
ID.AddInteger(AT->getRawSubclassData());
|
||||
break;
|
||||
}
|
||||
case ISD::VECTOR_SHUFFLE: {
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
|
||||
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
|
||||
i != e; ++i)
|
||||
ID.AddInteger(Mask[i]);
|
||||
break;
|
||||
}
|
||||
} // end switch (N->getOpcode())
|
||||
}
|
||||
|
||||
@ -762,12 +769,6 @@ void SelectionDAG::VerifyNode(SDNode *N) {
|
||||
assert(N->getValueType(0).isVector() && "Wrong return type!");
|
||||
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
|
||||
"Wrong number of operands!");
|
||||
MVT EltVT = N->getValueType(0).getVectorElementType();
|
||||
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
|
||||
assert((I->getValueType() == EltVT ||
|
||||
(EltVT.isInteger() && I->getValueType().isInteger() &&
|
||||
EltVT.bitsLE(I->getValueType()))) &&
|
||||
"Wrong operand type!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1126,6 +1127,110 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
|
||||
return SDValue(CondCodeNodes[Cond], 0);
|
||||
}
|
||||
|
||||
static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
|
||||
std::swap(N1, N2);
|
||||
int NElts = M.size();
|
||||
for (int i = 0; i != NElts; ++i) {
|
||||
if (M[i] >= NElts)
|
||||
M[i] -= NElts;
|
||||
else if (M[i] >= 0)
|
||||
M[i] += NElts;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
|
||||
SDValue N2, const int *Mask) {
|
||||
assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
|
||||
assert(VT.isVector() && N1.getValueType().isVector() &&
|
||||
"Vector Shuffle VTs must be a vectors");
|
||||
assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
|
||||
&& "Vector Shuffle VTs must have same element type");
|
||||
|
||||
// Canonicalize shuffle undef, undef -> undef
|
||||
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
|
||||
return N1;
|
||||
|
||||
// Validate that all the indices past in in Mask are within the range of
|
||||
// elements input to the shuffle.
|
||||
int NElts = VT.getVectorNumElements();
|
||||
SmallVector<int, 8> MaskVec;
|
||||
for (int i = 0; i != NElts; ++i) {
|
||||
if (Mask[i] >= (NElts * 2)) {
|
||||
assert(0 && "Index out of range");
|
||||
return SDValue();
|
||||
}
|
||||
MaskVec.push_back(Mask[i]);
|
||||
}
|
||||
|
||||
// Canonicalize shuffle v, v -> v, undef
|
||||
if (N1 == N2) {
|
||||
N2 = getUNDEF(VT);
|
||||
for (int i = 0; i != NElts; ++i)
|
||||
if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
|
||||
}
|
||||
|
||||
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
|
||||
if (N1.getOpcode() == ISD::UNDEF)
|
||||
commuteShuffle(N1, N2, MaskVec);
|
||||
|
||||
// Canonicalize all index into lhs, -> shuffle lhs, undef
|
||||
// Canonicalize all index into rhs, -> shuffle rhs, undef
|
||||
bool AllLHS = true, AllRHS = true;
|
||||
bool N2Undef = N2.getOpcode() == ISD::UNDEF;
|
||||
for (int i = 0; i != NElts; ++i) {
|
||||
if (MaskVec[i] >= NElts) {
|
||||
if (N2Undef)
|
||||
MaskVec[i] = -1;
|
||||
else
|
||||
AllLHS = false;
|
||||
} else if (MaskVec[i] >= 0) {
|
||||
AllRHS = false;
|
||||
}
|
||||
}
|
||||
if (AllLHS && AllRHS)
|
||||
return getUNDEF(VT);
|
||||
if (AllLHS)
|
||||
N2 = getUNDEF(VT);
|
||||
if (AllRHS) {
|
||||
N1 = getUNDEF(VT);
|
||||
commuteShuffle(N1, N2, MaskVec);
|
||||
}
|
||||
|
||||
// If Identity shuffle, or all shuffle in to undef, return that node.
|
||||
bool AllUndef = true;
|
||||
bool Identity = true;
|
||||
for (int i = 0; i < NElts; ++i) {
|
||||
if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
|
||||
if (MaskVec[i] >= 0) AllUndef = false;
|
||||
}
|
||||
if (Identity)
|
||||
return N1;
|
||||
if (AllUndef)
|
||||
return getUNDEF(VT);
|
||||
|
||||
FoldingSetNodeID ID;
|
||||
SDValue Ops[2] = { N1, N2 };
|
||||
AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
|
||||
for (int i = 0; i != NElts; ++i)
|
||||
ID.AddInteger(MaskVec[i]);
|
||||
|
||||
void* IP = 0;
|
||||
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
|
||||
return SDValue(E, 0);
|
||||
|
||||
// Allocate the mask array for the node out of the BumpPtrAllocator, since
|
||||
// SDNode doesn't have access to it. This memory will be "leaked" when
|
||||
// the node is deallocated, but recovered when the NodeAllocator is released.
|
||||
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
|
||||
memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
|
||||
|
||||
ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
|
||||
new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
|
||||
CSEMap.InsertNode(N, IP);
|
||||
AllNodes.push_back(N);
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
|
||||
SDValue Val, SDValue DTy,
|
||||
SDValue STy, SDValue Rnd, SDValue Sat,
|
||||
@ -2087,19 +2192,18 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
|
||||
SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) {
|
||||
MVT VT = N->getValueType(0);
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
SDValue PermMask = N->getOperand(2);
|
||||
SDValue Idx = PermMask.getOperand(i);
|
||||
if (Idx.getOpcode() == ISD::UNDEF)
|
||||
const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask();
|
||||
if (PermMask[i] < 0)
|
||||
return getUNDEF(VT.getVectorElementType());
|
||||
unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
unsigned NumElems = PermMask.getNumOperands();
|
||||
int Index = PermMask[i];
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
||||
Index %= NumElems;
|
||||
|
||||
if (V.getOpcode() == ISD::BIT_CONVERT) {
|
||||
V = V.getOperand(0);
|
||||
MVT VVT = V.getValueType();
|
||||
if (!VVT.isVector() || VVT.getVectorNumElements() != NumElems)
|
||||
if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
|
||||
return SDValue();
|
||||
}
|
||||
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||
@ -2793,12 +2897,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
|
||||
}
|
||||
break;
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
N1.getValueType().isVector() &&
|
||||
VT.isVector() && N3.getValueType().isVector() &&
|
||||
N3.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
VT.getVectorNumElements() == N3.getNumOperands() &&
|
||||
"Illegal VECTOR_SHUFFLE node!");
|
||||
assert(0 && "should use getVectorShuffle constructor!");
|
||||
break;
|
||||
case ISD::BIT_CONVERT:
|
||||
// Fold bit_convert nodes from a type to themselves.
|
||||
@ -5322,14 +5421,14 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
|
||||
|
||||
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
|
||||
if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
SDNode *Mask = getOperand(2).getNode();
|
||||
const int *Mask = cast<ShuffleVectorSDNode>(this)->getMask();
|
||||
OS << "<";
|
||||
for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) {
|
||||
for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
|
||||
if (i) OS << ",";
|
||||
if (Mask->getOperand(i).getOpcode() == ISD::UNDEF)
|
||||
if (Mask[i] < 0)
|
||||
OS << "u";
|
||||
else
|
||||
OS << cast<ConstantSDNode>(Mask->getOperand(i))->getZExtValue();
|
||||
OS << Mask[i];
|
||||
}
|
||||
OS << ">";
|
||||
}
|
||||
@ -5610,3 +5709,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
|
||||
SplatBitSize = sz;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
|
||||
int Idx = -1;
|
||||
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
|
||||
if (Idx < 0) Idx = Mask[i];
|
||||
if (Mask[i] >= 0 && Mask[i] != Idx)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -870,8 +870,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
|
||||
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
|
||||
return N = DAG.getConstantFP(*CFP, VT);
|
||||
|
||||
if (isa<UndefValue>(C) && !isa<VectorType>(V->getType()) &&
|
||||
!V->getType()->isAggregateType())
|
||||
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
|
||||
return N = DAG.getUNDEF(VT);
|
||||
|
||||
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
|
||||
@ -925,14 +924,11 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
|
||||
for (unsigned i = 0; i != NumElements; ++i)
|
||||
Ops.push_back(getValue(CP->getOperand(i)));
|
||||
} else {
|
||||
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
|
||||
"Unknown vector constant!");
|
||||
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
|
||||
MVT EltVT = TLI.getValueType(VecTy->getElementType());
|
||||
|
||||
SDValue Op;
|
||||
if (isa<UndefValue>(C))
|
||||
Op = DAG.getUNDEF(EltVT);
|
||||
else if (EltVT.isFloatingPoint())
|
||||
if (EltVT.isFloatingPoint())
|
||||
Op = DAG.getConstantFP(0, EltVT);
|
||||
else
|
||||
Op = DAG.getConstant(0, EltVT);
|
||||
@ -2435,37 +2431,42 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
|
||||
|
||||
// Utility for visitShuffleVector - Returns true if the mask is mask starting
|
||||
// from SIndx and increasing to the element length (undefs are allowed).
|
||||
static bool SequentialMask(SDValue Mask, unsigned SIndx) {
|
||||
unsigned MaskNumElts = Mask.getNumOperands();
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i) {
|
||||
if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
|
||||
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
|
||||
if (Idx != i + SIndx)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static bool SequentialMask(SmallVectorImpl<int> &Mask, int SIndx) {
|
||||
int MaskNumElts = Mask.size();
|
||||
for (int i = 0; i != MaskNumElts; ++i)
|
||||
if ((Mask[i] >= 0) && (Mask[i] != i + SIndx))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void SelectionDAGLowering::visitShuffleVector(User &I) {
|
||||
SmallVector<int, 8> Mask;
|
||||
SDValue Src1 = getValue(I.getOperand(0));
|
||||
SDValue Src2 = getValue(I.getOperand(1));
|
||||
SDValue Mask = getValue(I.getOperand(2));
|
||||
|
||||
// Convert the ConstantVector mask operand into an array of ints, with -1
|
||||
// representing undef values.
|
||||
SmallVector<Constant*, 8> MaskElts;
|
||||
cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
|
||||
int MaskNumElts = MaskElts.size();
|
||||
for (int i = 0; i != MaskNumElts; ++i) {
|
||||
if (isa<UndefValue>(MaskElts[i]))
|
||||
Mask.push_back(-1);
|
||||
else
|
||||
Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
|
||||
}
|
||||
|
||||
MVT VT = TLI.getValueType(I.getType());
|
||||
MVT SrcVT = Src1.getValueType();
|
||||
int MaskNumElts = Mask.getNumOperands();
|
||||
int SrcNumElts = SrcVT.getVectorNumElements();
|
||||
|
||||
if (SrcNumElts == MaskNumElts) {
|
||||
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
|
||||
VT, Src1, Src2, Mask));
|
||||
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
|
||||
&Mask[0]));
|
||||
return;
|
||||
}
|
||||
|
||||
// Normalize the shuffle vector since mask and vector length don't match.
|
||||
MVT MaskEltVT = Mask.getValueType().getVectorElementType();
|
||||
|
||||
if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
|
||||
// Mask is longer than the source vectors and is a multiple of the source
|
||||
// vectors. We can use concatenate vector to make the mask and vectors
|
||||
@ -2479,44 +2480,33 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
|
||||
|
||||
// Pad both vectors with undefs to make them the same length as the mask.
|
||||
unsigned NumConcat = MaskNumElts / SrcNumElts;
|
||||
bool Src1U = Src1.getOpcode() == ISD::UNDEF;
|
||||
bool Src2U = Src2.getOpcode() == ISD::UNDEF;
|
||||
SDValue UndefVal = DAG.getUNDEF(SrcVT);
|
||||
|
||||
SDValue* MOps1 = new SDValue[NumConcat];
|
||||
SDValue* MOps2 = new SDValue[NumConcat];
|
||||
SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
|
||||
SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
|
||||
MOps1[0] = Src1;
|
||||
MOps2[0] = Src2;
|
||||
for (unsigned i = 1; i != NumConcat; ++i) {
|
||||
MOps1[i] = UndefVal;
|
||||
MOps2[i] = UndefVal;
|
||||
}
|
||||
Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
|
||||
VT, MOps1, NumConcat);
|
||||
Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
|
||||
VT, MOps2, NumConcat);
|
||||
|
||||
delete [] MOps1;
|
||||
delete [] MOps2;
|
||||
|
||||
Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
|
||||
getCurDebugLoc(), VT,
|
||||
&MOps1[0], NumConcat);
|
||||
Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
|
||||
getCurDebugLoc(), VT,
|
||||
&MOps2[0], NumConcat);
|
||||
|
||||
// Readjust mask for new input vector length.
|
||||
SmallVector<SDValue, 8> MappedOps;
|
||||
SmallVector<int, 8> MappedOps;
|
||||
for (int i = 0; i != MaskNumElts; ++i) {
|
||||
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
|
||||
MappedOps.push_back(Mask.getOperand(i));
|
||||
} else {
|
||||
int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
|
||||
if (Idx < SrcNumElts)
|
||||
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
|
||||
else
|
||||
MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
|
||||
MaskEltVT));
|
||||
}
|
||||
int Idx = Mask[i];
|
||||
if (Idx < SrcNumElts)
|
||||
MappedOps.push_back(Idx);
|
||||
else
|
||||
MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
|
||||
}
|
||||
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
|
||||
Mask.getValueType(),
|
||||
&MappedOps[0], MappedOps.size());
|
||||
|
||||
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
|
||||
VT, Src1, Src2, Mask));
|
||||
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
|
||||
&MappedOps[0]));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2541,20 +2531,19 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
|
||||
int MaxRange[2] = {-1, -1};
|
||||
|
||||
for (int i = 0; i != MaskNumElts; ++i) {
|
||||
SDValue Arg = Mask.getOperand(i);
|
||||
if (Arg.getOpcode() != ISD::UNDEF) {
|
||||
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
int Input = 0;
|
||||
if (Idx >= SrcNumElts) {
|
||||
Input = 1;
|
||||
Idx -= SrcNumElts;
|
||||
}
|
||||
if (Idx > MaxRange[Input])
|
||||
MaxRange[Input] = Idx;
|
||||
if (Idx < MinRange[Input])
|
||||
MinRange[Input] = Idx;
|
||||
int Idx = Mask[i];
|
||||
int Input = 0;
|
||||
if (Idx < 0)
|
||||
continue;
|
||||
|
||||
if (Idx >= SrcNumElts) {
|
||||
Input = 1;
|
||||
Idx -= SrcNumElts;
|
||||
}
|
||||
if (Idx > MaxRange[Input])
|
||||
MaxRange[Input] = Idx;
|
||||
if (Idx < MinRange[Input])
|
||||
MinRange[Input] = Idx;
|
||||
}
|
||||
|
||||
// Check if the access is smaller than the vector size and can we find
|
||||
@ -2596,26 +2585,18 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
|
||||
}
|
||||
}
|
||||
// Calculate new mask.
|
||||
SmallVector<SDValue, 8> MappedOps;
|
||||
SmallVector<int, 8> MappedOps;
|
||||
for (int i = 0; i != MaskNumElts; ++i) {
|
||||
SDValue Arg = Mask.getOperand(i);
|
||||
if (Arg.getOpcode() == ISD::UNDEF) {
|
||||
MappedOps.push_back(Arg);
|
||||
} else {
|
||||
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
if (Idx < SrcNumElts)
|
||||
MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT));
|
||||
else {
|
||||
Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
|
||||
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
|
||||
}
|
||||
}
|
||||
int Idx = Mask[i];
|
||||
if (Idx < 0)
|
||||
MappedOps.push_back(Idx);
|
||||
else if (Idx < SrcNumElts)
|
||||
MappedOps.push_back(Idx - StartIdx[0]);
|
||||
else
|
||||
MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
|
||||
}
|
||||
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
|
||||
Mask.getValueType(),
|
||||
&MappedOps[0], MappedOps.size());
|
||||
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
|
||||
VT, Src1, Src2, Mask));
|
||||
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
|
||||
&MappedOps[0]));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2627,12 +2608,10 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
|
||||
MVT PtrVT = TLI.getPointerTy();
|
||||
SmallVector<SDValue,8> Ops;
|
||||
for (int i = 0; i != MaskNumElts; ++i) {
|
||||
SDValue Arg = Mask.getOperand(i);
|
||||
if (Arg.getOpcode() == ISD::UNDEF) {
|
||||
if (Mask[i] < 0) {
|
||||
Ops.push_back(DAG.getUNDEF(EltVT));
|
||||
} else {
|
||||
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
|
||||
int Idx = Mask[i];
|
||||
if (Idx < SrcNumElts)
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
|
||||
EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
|
||||
|
@ -1672,7 +1672,7 @@ SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
|
||||
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue V1 = Op.getOperand(0);
|
||||
SDValue V2 = Op.getOperand(1);
|
||||
SDValue PermMask = Op.getOperand(2);
|
||||
const int *PermMask = cast<ShuffleVectorSDNode>(Op)->getMask();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
|
||||
@ -1703,39 +1703,40 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
} else
|
||||
assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
|
||||
|
||||
for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
|
||||
if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
|
||||
unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
|
||||
for (unsigned i = 0; i != MaxElts; ++i) {
|
||||
if (PermMask[i] < 0)
|
||||
continue;
|
||||
|
||||
unsigned SrcElt = PermMask[i];
|
||||
|
||||
if (monotonic) {
|
||||
if (SrcElt >= V2EltIdx0) {
|
||||
if (1 >= (++EltsFromV2)) {
|
||||
V2Elt = (V2EltIdx0 - SrcElt) << 2;
|
||||
}
|
||||
} else if (CurrElt != SrcElt) {
|
||||
monotonic = false;
|
||||
if (monotonic) {
|
||||
if (SrcElt >= V2EltIdx0) {
|
||||
if (1 >= (++EltsFromV2)) {
|
||||
V2Elt = (V2EltIdx0 - SrcElt) << 2;
|
||||
}
|
||||
|
||||
++CurrElt;
|
||||
} else if (CurrElt != SrcElt) {
|
||||
monotonic = false;
|
||||
}
|
||||
|
||||
if (rotate) {
|
||||
if (PrevElt > 0 && SrcElt < MaxElts) {
|
||||
if ((PrevElt == SrcElt - 1)
|
||||
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
|
||||
PrevElt = SrcElt;
|
||||
if (SrcElt == 0)
|
||||
V0Elt = i;
|
||||
} else {
|
||||
rotate = false;
|
||||
}
|
||||
} else if (PrevElt == 0) {
|
||||
// First time through, need to keep track of previous element
|
||||
++CurrElt;
|
||||
}
|
||||
|
||||
if (rotate) {
|
||||
if (PrevElt > 0 && SrcElt < MaxElts) {
|
||||
if ((PrevElt == SrcElt - 1)
|
||||
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
|
||||
PrevElt = SrcElt;
|
||||
if (SrcElt == 0)
|
||||
V0Elt = i;
|
||||
} else {
|
||||
// This isn't a rotation, takes elements from vector 2
|
||||
rotate = false;
|
||||
}
|
||||
} else if (PrevElt == 0) {
|
||||
// First time through, need to keep track of previous element
|
||||
PrevElt = SrcElt;
|
||||
} else {
|
||||
// This isn't a rotation, takes elements from vector 2
|
||||
rotate = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1768,12 +1769,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
|
||||
|
||||
SmallVector<SDValue, 16> ResultMask;
|
||||
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
|
||||
unsigned SrcElt;
|
||||
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
|
||||
SrcElt = 0;
|
||||
else
|
||||
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
|
||||
for (unsigned i = 0, e = MaxElts; i != e; ++i) {
|
||||
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
|
||||
|
||||
for (unsigned j = 0; j < BytesPerElement; ++j) {
|
||||
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
||||
|
@ -456,22 +456,22 @@ static bool isFloatingPointZero(SDValue Op) {
|
||||
|
||||
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
|
||||
/// true if Op is undef or if it matches the specified value.
|
||||
static bool isConstantOrUndef(SDValue Op, unsigned Val) {
|
||||
return Op.getOpcode() == ISD::UNDEF ||
|
||||
cast<ConstantSDNode>(Op)->getZExtValue() == Val;
|
||||
static bool isConstantOrUndef(int Op, int Val) {
|
||||
return Op < 0 || Op == Val;
|
||||
}
|
||||
|
||||
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
|
||||
/// VPKUHUM instruction.
|
||||
bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
|
||||
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
|
||||
const int *Mask = N->getMask();
|
||||
if (!isUnary) {
|
||||
for (unsigned i = 0; i != 16; ++i)
|
||||
if (!isConstantOrUndef(N->getOperand(i), i*2+1))
|
||||
if (!isConstantOrUndef(Mask[i], i*2+1))
|
||||
return false;
|
||||
} else {
|
||||
for (unsigned i = 0; i != 8; ++i)
|
||||
if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||
|
||||
!isConstantOrUndef(N->getOperand(i+8), i*2+1))
|
||||
if (!isConstantOrUndef(Mask[i], i*2+1) ||
|
||||
!isConstantOrUndef(Mask[i+8], i*2+1))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -479,18 +479,19 @@ bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
|
||||
|
||||
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
|
||||
/// VPKUWUM instruction.
|
||||
bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
|
||||
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
|
||||
const int *Mask = N->getMask();
|
||||
if (!isUnary) {
|
||||
for (unsigned i = 0; i != 16; i += 2)
|
||||
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
|
||||
!isConstantOrUndef(N->getOperand(i+1), i*2+3))
|
||||
if (!isConstantOrUndef(Mask[i ], i*2+2) ||
|
||||
!isConstantOrUndef(Mask[i+1], i*2+3))
|
||||
return false;
|
||||
} else {
|
||||
for (unsigned i = 0; i != 8; i += 2)
|
||||
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
|
||||
!isConstantOrUndef(N->getOperand(i+1), i*2+3) ||
|
||||
!isConstantOrUndef(N->getOperand(i+8), i*2+2) ||
|
||||
!isConstantOrUndef(N->getOperand(i+9), i*2+3))
|
||||
if (!isConstantOrUndef(Mask[i ], i*2+2) ||
|
||||
!isConstantOrUndef(Mask[i+1], i*2+3) ||
|
||||
!isConstantOrUndef(Mask[i+8], i*2+2) ||
|
||||
!isConstantOrUndef(Mask[i+9], i*2+3))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -498,27 +499,29 @@ bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
|
||||
|
||||
/// isVMerge - Common function, used to match vmrg* shuffles.
|
||||
///
|
||||
static bool isVMerge(SDNode *N, unsigned UnitSize,
|
||||
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
unsigned LHSStart, unsigned RHSStart) {
|
||||
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
|
||||
assert(N->getValueType(0) == MVT::v16i8 &&
|
||||
"PPC only supports shuffles by bytes!");
|
||||
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
|
||||
"Unsupported merge size!");
|
||||
|
||||
const int *Mask = N->getMask();
|
||||
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
|
||||
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
|
||||
if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
|
||||
if (!isConstantOrUndef(Mask[i*UnitSize*2+j],
|
||||
LHSStart+j+i*UnitSize) ||
|
||||
!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
|
||||
!isConstantOrUndef(Mask[i*UnitSize*2+UnitSize+j],
|
||||
RHSStart+j+i*UnitSize))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
|
||||
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
|
||||
bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
||||
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
bool isUnary) {
|
||||
if (!isUnary)
|
||||
return isVMerge(N, UnitSize, 8, 24);
|
||||
return isVMerge(N, UnitSize, 8, 8);
|
||||
@ -526,7 +529,8 @@ bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
||||
|
||||
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
|
||||
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
|
||||
bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
||||
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
bool isUnary) {
|
||||
if (!isUnary)
|
||||
return isVMerge(N, UnitSize, 0, 16);
|
||||
return isVMerge(N, UnitSize, 0, 0);
|
||||
@ -536,91 +540,92 @@ bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
|
||||
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
|
||||
/// amount, otherwise return -1.
|
||||
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
|
||||
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
|
||||
assert(N->getValueType(0) == MVT::v16i8 &&
|
||||
"PPC only supports shuffles by bytes!");
|
||||
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
|
||||
// Find the first non-undef value in the shuffle mask.
|
||||
const int *Mask = SVOp->getMask();
|
||||
unsigned i;
|
||||
for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
|
||||
for (i = 0; i != 16 && Mask[i] < 0; ++i)
|
||||
/*search*/;
|
||||
|
||||
if (i == 16) return -1; // all undef.
|
||||
|
||||
// Otherwise, check to see if the rest of the elements are consequtively
|
||||
// Otherwise, check to see if the rest of the elements are consecutively
|
||||
// numbered from this value.
|
||||
unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue();
|
||||
unsigned ShiftAmt = Mask[i];
|
||||
if (ShiftAmt < i) return -1;
|
||||
ShiftAmt -= i;
|
||||
|
||||
if (!isUnary) {
|
||||
// Check the rest of the elements to see if they are consequtive.
|
||||
// Check the rest of the elements to see if they are consecutive.
|
||||
for (++i; i != 16; ++i)
|
||||
if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
|
||||
if (!isConstantOrUndef(Mask[i], ShiftAmt+i))
|
||||
return -1;
|
||||
} else {
|
||||
// Check the rest of the elements to see if they are consequtive.
|
||||
// Check the rest of the elements to see if they are consecutive.
|
||||
for (++i; i != 16; ++i)
|
||||
if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
|
||||
if (!isConstantOrUndef(Mask[i], (ShiftAmt+i) & 15))
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ShiftAmt;
|
||||
}
|
||||
|
||||
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a splat of a single element that is suitable for input to
|
||||
/// VSPLTB/VSPLTH/VSPLTW.
|
||||
bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
|
||||
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N->getNumOperands() == 16 &&
|
||||
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
|
||||
assert(N->getValueType(0) == MVT::v16i8 &&
|
||||
(EltSize == 1 || EltSize == 2 || EltSize == 4));
|
||||
|
||||
// This is a splat operation if each element of the permute is the same, and
|
||||
// if the value doesn't reference the second vector.
|
||||
unsigned ElementBase = 0;
|
||||
SDValue Elt = N->getOperand(0);
|
||||
if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
|
||||
ElementBase = EltV->getZExtValue();
|
||||
else
|
||||
return false; // FIXME: Handle UNDEF elements too!
|
||||
|
||||
if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16)
|
||||
const int *Mask = N->getMask();
|
||||
unsigned ElementBase = Mask[0];
|
||||
|
||||
// FIXME: Handle UNDEF elements too!
|
||||
if (ElementBase >= 16)
|
||||
return false;
|
||||
|
||||
// Check that they are consequtive.
|
||||
for (unsigned i = 1; i != EltSize; ++i) {
|
||||
if (!isa<ConstantSDNode>(N->getOperand(i)) ||
|
||||
cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase)
|
||||
// Check that the indices are consecutive, in the case of a multi-byte element
|
||||
// splatted with a v16i8 mask.
|
||||
for (unsigned i = 1; i != EltSize; ++i)
|
||||
if (Mask[i] < 0 || Mask[i] != (int)(i+ElementBase))
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
|
||||
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
|
||||
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
|
||||
"Invalid VECTOR_SHUFFLE mask!");
|
||||
if (Mask[i] < 0) continue;
|
||||
for (unsigned j = 0; j != EltSize; ++j)
|
||||
if (N->getOperand(i+j) != N->getOperand(j))
|
||||
if (Mask[i+j] != Mask[j])
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
|
||||
/// are -0.0.
|
||||
bool PPC::isAllNegativeZeroVector(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::BUILD_VECTOR);
|
||||
if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
|
||||
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))
|
||||
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
|
||||
|
||||
APInt APVal, APUndef;
|
||||
unsigned BitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
||||
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
|
||||
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
|
||||
return CFP->getValueAPF().isNegZero();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
|
||||
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
|
||||
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
|
||||
assert(isSplatShuffleMask(N, EltSize));
|
||||
return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize;
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
assert(isSplatShuffleMask(SVOp, EltSize));
|
||||
return SVOp->getMask()[0] / EltSize;
|
||||
}
|
||||
|
||||
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
|
||||
@ -3149,11 +3154,10 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
|
||||
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
|
||||
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
|
||||
|
||||
SDValue Ops[16];
|
||||
int Ops[16];
|
||||
for (unsigned i = 0; i != 16; ++i)
|
||||
Ops[i] = DAG.getConstant(i+Amt, MVT::i8);
|
||||
SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16));
|
||||
Ops[i] = i + Amt;
|
||||
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
|
||||
}
|
||||
|
||||
@ -3354,7 +3358,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
|
||||
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
|
||||
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
|
||||
|
||||
unsigned ShufIdxs[16];
|
||||
int ShufIdxs[16];
|
||||
switch (OpNum) {
|
||||
default: assert(0 && "Unknown i32 permute!");
|
||||
case OP_VMRGHW:
|
||||
@ -3392,13 +3396,11 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
|
||||
case OP_VSLDOI12:
|
||||
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
|
||||
}
|
||||
SDValue Ops[16];
|
||||
for (unsigned i = 0; i != 16; ++i)
|
||||
Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8);
|
||||
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(),
|
||||
OpLHS, OpRHS,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
|
||||
MVT VT = OpLHS.getValueType();
|
||||
OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
|
||||
OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
|
||||
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
|
||||
}
|
||||
|
||||
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
|
||||
@ -3406,28 +3408,30 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
|
||||
/// return the code it can be lowered into. Worst case, it can always be
|
||||
/// lowered into a vperm.
|
||||
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
SelectionDAG &DAG) {
|
||||
SelectionDAG &DAG) {
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
SDValue V1 = Op.getOperand(0);
|
||||
SDValue V2 = Op.getOperand(1);
|
||||
SDValue PermMask = Op.getOperand(2);
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
const int *PermMask = SVOp->getMask();
|
||||
MVT VT = Op.getValueType();
|
||||
|
||||
// Cases that are handled by instructions that take permute immediates
|
||||
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
|
||||
// selected by the instruction selector.
|
||||
if (V2.getOpcode() == ISD::UNDEF) {
|
||||
if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) ||
|
||||
PPC::isSplatShuffleMask(PermMask.getNode(), 2) ||
|
||||
PPC::isSplatShuffleMask(PermMask.getNode(), 4) ||
|
||||
PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) ||
|
||||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) ||
|
||||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) {
|
||||
if (PPC::isSplatShuffleMask(SVOp, 1) ||
|
||||
PPC::isSplatShuffleMask(SVOp, 2) ||
|
||||
PPC::isSplatShuffleMask(SVOp, 4) ||
|
||||
PPC::isVPKUWUMShuffleMask(SVOp, true) ||
|
||||
PPC::isVPKUHUMShuffleMask(SVOp, true) ||
|
||||
PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
|
||||
return Op;
|
||||
}
|
||||
}
|
||||
@ -3435,15 +3439,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
// Altivec has a variety of "shuffle immediates" that take two vector inputs
|
||||
// and produce a fixed permutation. If any of these match, do not lower to
|
||||
// VPERM.
|
||||
if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) ||
|
||||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) ||
|
||||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) ||
|
||||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) ||
|
||||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false))
|
||||
if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
|
||||
PPC::isVPKUHUMShuffleMask(SVOp, false) ||
|
||||
PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
|
||||
PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
|
||||
PPC::isVMRGHShuffleMask(SVOp, 4, false))
|
||||
return Op;
|
||||
|
||||
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
|
||||
@ -3453,11 +3457,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
|
||||
unsigned EltNo = 8; // Start out undef.
|
||||
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
|
||||
if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
|
||||
if (PermMask[i*4+j] < 0)
|
||||
continue; // Undef, ignore it.
|
||||
|
||||
unsigned ByteSource =
|
||||
cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue();
|
||||
unsigned ByteSource = PermMask[i*4+j];
|
||||
if ((ByteSource & 3) != j) {
|
||||
isFourElementShuffle = false;
|
||||
break;
|
||||
@ -3509,12 +3512,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
|
||||
|
||||
SmallVector<SDValue, 16> ResultMask;
|
||||
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
|
||||
unsigned SrcElt;
|
||||
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
|
||||
SrcElt = 0;
|
||||
else
|
||||
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
|
||||
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
|
||||
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
|
||||
|
||||
for (unsigned j = 0; j != BytesPerElement; ++j)
|
||||
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
|
||||
@ -3704,13 +3703,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
|
||||
OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
|
||||
|
||||
// Merge the results together.
|
||||
SDValue Ops[16];
|
||||
int Ops[16];
|
||||
for (unsigned i = 0; i != 8; ++i) {
|
||||
Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8);
|
||||
Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);
|
||||
Ops[i*2 ] = 2*i+1;
|
||||
Ops[i*2+1] = 2*i+1+16;
|
||||
}
|
||||
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
|
||||
return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
|
||||
} else {
|
||||
assert(0 && "Unknown mul to lower!");
|
||||
abort();
|
||||
|
@ -175,19 +175,21 @@ namespace llvm {
|
||||
namespace PPC {
|
||||
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
|
||||
/// VPKUHUM instruction.
|
||||
bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary);
|
||||
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
|
||||
|
||||
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
|
||||
/// VPKUWUM instruction.
|
||||
bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary);
|
||||
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
|
||||
|
||||
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
|
||||
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
|
||||
bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
|
||||
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
bool isUnary);
|
||||
|
||||
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
|
||||
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
|
||||
bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
|
||||
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
|
||||
bool isUnary);
|
||||
|
||||
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
|
||||
/// amount, otherwise return -1.
|
||||
@ -196,7 +198,7 @@ namespace llvm {
|
||||
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a splat of a single element that is suitable for input to
|
||||
/// VSPLTB/VSPLTH/VSPLTW.
|
||||
bool isSplatShuffleMask(SDNode *N, unsigned EltSize);
|
||||
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
|
||||
|
||||
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
|
||||
/// are -0.0.
|
||||
|
@ -15,96 +15,118 @@
|
||||
// Altivec transformation functions and pattern fragments.
|
||||
//
|
||||
|
||||
/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid
|
||||
/// shuffle mask for the VPKUHUM or VPKUWUM instructions.
|
||||
def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVPKUHUMShuffleMask(N, false);
|
||||
}]>;
|
||||
def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVPKUWUMShuffleMask(N, false);
|
||||
}]>;
|
||||
|
||||
def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVPKUHUMShuffleMask(N, true);
|
||||
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
|
||||
}]>;
|
||||
def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVPKUWUMShuffleMask(N, true);
|
||||
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
|
||||
}]>;
|
||||
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
|
||||
}]>;
|
||||
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
|
||||
}]>;
|
||||
|
||||
|
||||
def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 1, false);
|
||||
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
|
||||
}]>;
|
||||
def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 2, false);
|
||||
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
|
||||
}]>;
|
||||
def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 4, false);
|
||||
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
|
||||
}]>;
|
||||
def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 1, false);
|
||||
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
|
||||
}]>;
|
||||
def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 2, false);
|
||||
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
|
||||
}]>;
|
||||
def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 4, false);
|
||||
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
|
||||
}]>;
|
||||
|
||||
def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 1, true);
|
||||
|
||||
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
|
||||
}]>;
|
||||
def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 2, true);
|
||||
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
|
||||
}]>;
|
||||
def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGLShuffleMask(N, 4, true);
|
||||
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
|
||||
}]>;
|
||||
def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 1, true);
|
||||
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
|
||||
}]>;
|
||||
def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 2, true);
|
||||
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
|
||||
}]>;
|
||||
def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isVMRGHShuffleMask(N, 4, true);
|
||||
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
|
||||
}]>;
|
||||
|
||||
def VSLDOI_get_imm : SDNodeXForm<build_vector, [{
|
||||
|
||||
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
|
||||
}]>;
|
||||
def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVSLDOIShuffleMask(N, false) != -1;
|
||||
}], VSLDOI_get_imm>;
|
||||
|
||||
|
||||
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
|
||||
/// vector_shuffle(X,undef,mask) by the dag combiner.
|
||||
def VSLDOI_unary_get_imm : SDNodeXForm<build_vector, [{
|
||||
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
|
||||
}]>;
|
||||
def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isVSLDOIShuffleMask(N, true) != -1;
|
||||
}], VSLDOI_unary_get_imm>;
|
||||
|
||||
|
||||
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
|
||||
def VSPLTB_get_imm : SDNodeXForm<build_vector, [{
|
||||
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::getVSPLTImmediate(N, 1));
|
||||
}]>;
|
||||
def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isSplatShuffleMask(N, 1);
|
||||
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
|
||||
}], VSPLTB_get_imm>;
|
||||
def VSPLTH_get_imm : SDNodeXForm<build_vector, [{
|
||||
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::getVSPLTImmediate(N, 2));
|
||||
}]>;
|
||||
def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isSplatShuffleMask(N, 2);
|
||||
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
|
||||
}], VSPLTH_get_imm>;
|
||||
def VSPLTW_get_imm : SDNodeXForm<build_vector, [{
|
||||
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI32Imm(PPC::getVSPLTImmediate(N, 4));
|
||||
}]>;
|
||||
def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return PPC::isSplatShuffleMask(N, 4);
|
||||
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
|
||||
}], VSPLTW_get_imm>;
|
||||
|
||||
|
||||
@ -268,8 +290,7 @@ def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
|
||||
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
|
||||
"vsldoi $vD, $vA, $vB, $SH", VecFP,
|
||||
[(set VRRC:$vD,
|
||||
(vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB,
|
||||
VSLDOI_shuffle_mask:$SH))]>;
|
||||
(vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
|
||||
|
||||
// VX-Form instructions. AltiVec arithmetic ops.
|
||||
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
@ -345,28 +366,22 @@ def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
|
||||
|
||||
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrghb $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGHB_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrghh $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGHH_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrghw $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGHW_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrglb $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGLB_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrglh $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGLH_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vmrglw $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VMRGLW_shuffle_mask))]>;
|
||||
[(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
|
||||
|
||||
def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
|
||||
def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
|
||||
@ -440,16 +455,16 @@ def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
|
||||
|
||||
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
|
||||
"vspltb $vD, $vB, $UIMM", VecPerm,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
|
||||
VSPLTB_shuffle_mask:$UIMM))]>;
|
||||
[(set VRRC:$vD,
|
||||
(vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
|
||||
def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
|
||||
"vsplth $vD, $vB, $UIMM", VecPerm,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
|
||||
VSPLTH_shuffle_mask:$UIMM))]>;
|
||||
[(set VRRC:$vD,
|
||||
(vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
|
||||
def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
|
||||
"vspltw $vD, $vB, $UIMM", VecPerm,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
|
||||
VSPLTW_shuffle_mask:$UIMM))]>;
|
||||
[(set VRRC:$vD,
|
||||
(vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
|
||||
|
||||
def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
|
||||
def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
|
||||
@ -479,13 +494,13 @@ def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
|
||||
def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
|
||||
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vpkuhum $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VPKUHUM_shuffle_mask))]>;
|
||||
[(set VRRC:$vD,
|
||||
(vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
|
||||
def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
|
||||
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
|
||||
"vpkuwum $vD, $vA, $vB", VecFP,
|
||||
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
|
||||
VRRC:$vB, VPKUWUM_shuffle_mask))]>;
|
||||
[(set VRRC:$vD,
|
||||
(vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
|
||||
def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
|
||||
|
||||
// Vector Unpack.
|
||||
@ -603,25 +618,25 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
// Shuffles.
|
||||
|
||||
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in),
|
||||
(VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in),
|
||||
def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
|
||||
(VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
|
||||
def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VPKUWUM VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in),
|
||||
def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VPKUHUM VRRC:$vA, VRRC:$vA)>;
|
||||
|
||||
// Match vmrg*(x,x)
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGLB VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGLH VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGLW VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGHB VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGHH VRRC:$vA, VRRC:$vA)>;
|
||||
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in),
|
||||
def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
|
||||
(VMRGHW VRRC:$vA, VRRC:$vA)>;
|
||||
|
||||
// Logical Operations
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -230,7 +230,8 @@ namespace llvm {
|
||||
|
||||
// VSHL, VSRL - Vector logical left / right shift.
|
||||
VSHL, VSRL,
|
||||
|
||||
|
||||
// CMPPD, CMPPS - Vector double/float comparison.
|
||||
// CMPPD, CMPPS - Vector double/float comparison.
|
||||
CMPPD, CMPPS,
|
||||
|
||||
@ -251,80 +252,72 @@ namespace llvm {
|
||||
namespace X86 {
|
||||
/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||
bool isPSHUFDMask(SDNode *N);
|
||||
bool isPSHUFDMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||
bool isPSHUFHWMask(SDNode *N);
|
||||
bool isPSHUFHWMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
|
||||
bool isPSHUFLWMask(SDNode *N);
|
||||
bool isPSHUFLWMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
|
||||
bool isSHUFPMask(SDNode *N);
|
||||
bool isSHUFPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
|
||||
bool isMOVHLPSMask(SDNode *N);
|
||||
bool isMOVHLPSMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
|
||||
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
|
||||
/// <2, 3, 2, 3>
|
||||
bool isMOVHLPS_v_undef_Mask(SDNode *N);
|
||||
bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
|
||||
bool isMOVLPMask(SDNode *N);
|
||||
/// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
|
||||
bool isMOVLPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
|
||||
/// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
|
||||
/// as well as MOVLHPS.
|
||||
bool isMOVHPMask(SDNode *N);
|
||||
bool isMOVHPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
|
||||
bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
|
||||
bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
|
||||
|
||||
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
|
||||
bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
|
||||
bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
|
||||
|
||||
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
|
||||
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
|
||||
/// <0, 0, 1, 1>
|
||||
bool isUNPCKL_v_undef_Mask(SDNode *N);
|
||||
bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
|
||||
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
|
||||
/// <2, 2, 3, 3>
|
||||
bool isUNPCKH_v_undef_Mask(SDNode *N);
|
||||
bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVSS,
|
||||
/// MOVSD, and MOVD, i.e. setting the lowest element.
|
||||
bool isMOVLMask(SDNode *N);
|
||||
bool isMOVLMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
|
||||
bool isMOVSHDUPMask(SDNode *N);
|
||||
bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
|
||||
bool isMOVSLDUPMask(SDNode *N);
|
||||
|
||||
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a splat of a single element.
|
||||
bool isSplatMask(SDNode *N);
|
||||
|
||||
/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a splat of zero element.
|
||||
bool isSplatLoMask(SDNode *N);
|
||||
bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
|
||||
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
|
||||
bool isMOVDDUPMask(SDNode *N);
|
||||
bool isMOVDDUPMask(ShuffleVectorSDNode *N);
|
||||
|
||||
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
|
||||
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
|
||||
@ -477,14 +470,13 @@ namespace llvm {
|
||||
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
|
||||
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
|
||||
/// values are assumed to be legal.
|
||||
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const;
|
||||
virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const;
|
||||
|
||||
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
|
||||
/// used by Targets can use this to indicate if there is a suitable
|
||||
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
|
||||
/// pool entry.
|
||||
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
|
||||
MVT EVT, SelectionDAG &DAG) const;
|
||||
virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const;
|
||||
|
||||
/// ShouldShrinkFPConstant - If true, then instruction selection should
|
||||
/// seek to shrink the FP constant of the specified type to a smaller type
|
||||
|
@ -3801,6 +3801,7 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
|
||||
(implicit EFLAGS)),
|
||||
(DEC32m addr:$dst)>, Requires<[In32BitMode]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Floating Point Stack Support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -30,33 +30,37 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
|
||||
|
||||
// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
|
||||
// PSHUFW imm.
|
||||
def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
|
||||
def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
||||
}]>;
|
||||
|
||||
// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
|
||||
def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKHMask(N);
|
||||
def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
|
||||
def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKLMask(N);
|
||||
def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
|
||||
def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKH_v_undef_Mask(N);
|
||||
def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
|
||||
def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKL_v_undef_Mask(N);
|
||||
def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
// Patterns for shuffling.
|
||||
def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isPSHUFDMask(N);
|
||||
def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
|
||||
}], MMX_SHUFFLE_get_shuf_imm>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -185,9 +189,8 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
|
||||
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
|
||||
"movq2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle immAllZerosV,
|
||||
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))),
|
||||
MOVL_shuffle_mask)))]>;
|
||||
(movl immAllZerosV,
|
||||
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
|
||||
|
||||
let neverHasSideEffects = 1 in
|
||||
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
|
||||
@ -319,86 +322,74 @@ let isTwoAddress = 1 in {
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1,
|
||||
(bc_v8i8 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v8i8 (mmx_unpckh VR64:$src1,
|
||||
(bc_v8i8 (load_mmx addr:$src2)))))]>;
|
||||
|
||||
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1,
|
||||
(bc_v4i16 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v4i16 (mmx_unpckh VR64:$src1,
|
||||
(bc_v4i16 (load_mmx addr:$src2)))))]>;
|
||||
|
||||
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1,
|
||||
(bc_v2i32 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
(v2i32 (mmx_unpckh VR64:$src1,
|
||||
(bc_v2i32 (load_mmx addr:$src2)))))]>;
|
||||
|
||||
// Unpack Low Packed Data Instructions
|
||||
def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1,
|
||||
(bc_v8i8 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v8i8 (mmx_unpckl VR64:$src1,
|
||||
(bc_v8i8 (load_mmx addr:$src2)))))]>;
|
||||
|
||||
def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1,
|
||||
(bc_v4i16 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v4i16 (mmx_unpckl VR64:$src1,
|
||||
(bc_v4i16 (load_mmx addr:$src2)))))]>;
|
||||
|
||||
def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
|
||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
|
||||
def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
|
||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1,
|
||||
(bc_v2i32 (load_mmx addr:$src2)),
|
||||
MMX_UNPCKL_shuffle_mask)))]>;
|
||||
(v2i32 (mmx_unpckl VR64:$src1,
|
||||
(bc_v2i32 (load_mmx addr:$src2)))))]>;
|
||||
}
|
||||
|
||||
// -- Pack Instructions
|
||||
@ -411,17 +402,13 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle
|
||||
VR64:$src1, (undef),
|
||||
MMX_PSHUFW_shuffle_mask:$src2)))]>;
|
||||
(v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
|
||||
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle
|
||||
(bc_v4i16 (load_mmx addr:$src1)),
|
||||
(undef),
|
||||
MMX_PSHUFW_shuffle_mask:$src2)))]>;
|
||||
(mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
|
||||
(undef)))]>;
|
||||
|
||||
// -- Conversion Instructions
|
||||
let neverHasSideEffects = 1 in {
|
||||
@ -627,34 +614,27 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
|
||||
|
||||
// Patterns to perform canonical versions of vector shuffling.
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
|
||||
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
|
||||
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
|
||||
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
|
||||
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
|
||||
MMX_UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
|
||||
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
|
||||
}
|
||||
|
||||
// Patterns to perform vector shuffling with a zeroed out vector.
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV,
|
||||
(v2i32 (scalar_to_vector (load_mmx addr:$src))),
|
||||
MMX_UNPCKL_shuffle_mask)),
|
||||
def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
|
||||
(v2i32 (scalar_to_vector (load_mmx addr:$src))))),
|
||||
(MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
|
||||
}
|
||||
|
||||
|
@ -175,103 +175,108 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
|
||||
|
||||
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
|
||||
// SHUFP* etc. imm.
|
||||
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
|
||||
def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
||||
}]>;
|
||||
|
||||
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
|
||||
// PSHUFHW imm.
|
||||
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
|
||||
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
|
||||
}]>;
|
||||
|
||||
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
|
||||
// PSHUFLW imm.
|
||||
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
|
||||
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
|
||||
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
|
||||
}]>;
|
||||
|
||||
def SSE_splat_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isSplatMask(N);
|
||||
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
|
||||
}]>;
|
||||
|
||||
def movddup : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movhp : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movl : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
|
||||
}], SHUFFLE_get_shuf_imm>;
|
||||
|
||||
def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isSplatLoMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVDDUPMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVHLPSMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVHLPS_v_undef_Mask(N);
|
||||
}]>;
|
||||
|
||||
def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVHPMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVLPMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVLMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVSHDUPMask(N);
|
||||
}]>;
|
||||
|
||||
def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isMOVSLDUPMask(N);
|
||||
}]>;
|
||||
|
||||
def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKLMask(N);
|
||||
}]>;
|
||||
|
||||
def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKHMask(N);
|
||||
}]>;
|
||||
|
||||
def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKL_v_undef_Mask(N);
|
||||
}]>;
|
||||
|
||||
def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKH_v_undef_Mask(N);
|
||||
}]>;
|
||||
|
||||
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isPSHUFDMask(N);
|
||||
def shufp : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
|
||||
}], SHUFFLE_get_shuf_imm>;
|
||||
|
||||
def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isPSHUFHWMask(N);
|
||||
def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
|
||||
}], SHUFFLE_get_pshufhw_imm>;
|
||||
|
||||
def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isPSHUFLWMask(N);
|
||||
def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
|
||||
}], SHUFFLE_get_pshuflw_imm>;
|
||||
|
||||
def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isPSHUFDMask(N);
|
||||
}], SHUFFLE_get_shuf_imm>;
|
||||
|
||||
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isSHUFPMask(N);
|
||||
}], SHUFFLE_get_shuf_imm>;
|
||||
|
||||
def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isSHUFPMask(N);
|
||||
}], SHUFFLE_get_shuf_imm>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE scalar FP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -704,16 +709,14 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movlps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
|
||||
MOVLP_shuffle_mask)))]>;
|
||||
(movlp VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
||||
def MOVHPSrm : PSI<0x16, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
|
||||
MOVHP_shuffle_mask)))]>;
|
||||
(movhp VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
@ -728,29 +731,25 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movhps\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
(v2f64 (vector_shuffle
|
||||
(bc_v2f64 (v4f32 VR128:$src)), (undef),
|
||||
UNPCKH_shuffle_mask)), (iPTR 0))),
|
||||
addr:$dst)]>;
|
||||
(unpckh (bc_v2f64 (v4f32 VR128:$src)),
|
||||
(undef)), (iPTR 0))), addr:$dst)]>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let AddedComplexity = 20 in {
|
||||
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movlhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVHP_shuffle_mask)))]>;
|
||||
(v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
|
||||
|
||||
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movhlps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVHLPS_shuffle_mask)))]>;
|
||||
(v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
|
||||
def : Pat<(v4f32 (movddup VR128:$src, (undef))),
|
||||
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
||||
|
||||
|
||||
@ -908,51 +907,41 @@ let Constraints = "$src1 = $dst" in {
|
||||
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
|
||||
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1,
|
||||
VR128:$src2, i32i8imm:$src3),
|
||||
VR128:$src2, i8imm:$src3),
|
||||
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
SHUFP_shuffle_mask:$src3)))]>;
|
||||
(v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
||||
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1,
|
||||
f128mem:$src2, i32i8imm:$src3),
|
||||
f128mem:$src2, i8imm:$src3),
|
||||
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, (memopv4f32 addr:$src2),
|
||||
SHUFP_shuffle_mask:$src3)))]>;
|
||||
(v4f32 (shufp:$src3
|
||||
VR128:$src1, (memopv4f32 addr:$src2))))]>;
|
||||
|
||||
let AddedComplexity = 10 in {
|
||||
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, (memopv4f32 addr:$src2),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v4f32 (unpckh VR128:$src1,
|
||||
(memopv4f32 addr:$src2))))]>;
|
||||
|
||||
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle
|
||||
VR128:$src1, (memopv4f32 addr:$src2),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
@ -1044,8 +1033,7 @@ let neverHasSideEffects = 1 in
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVL_shuffle_mask)))]>;
|
||||
(v4f32 (movl VR128:$src1, VR128:$src2)))]>;
|
||||
}
|
||||
|
||||
// Move to lower bits of a VR128 and zeroing upper bits.
|
||||
@ -1451,16 +1439,14 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movlpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)),
|
||||
MOVLP_shuffle_mask)))]>;
|
||||
(v2f64 (movlp VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
||||
def MOVHPDrm : PDI<0x16, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||
"movhpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)),
|
||||
MOVHP_shuffle_mask)))]>;
|
||||
(v2f64 (movhp VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
@ -1474,9 +1460,8 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
||||
"movhpd\t{$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract
|
||||
(v2f64 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_shuffle_mask)), (iPTR 0))),
|
||||
addr:$dst)]>;
|
||||
(v2f64 (unpckh VR128:$src, (undef))),
|
||||
(iPTR 0))), addr:$dst)]>;
|
||||
|
||||
// SSE2 instructions without OpSize prefix
|
||||
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
@ -1744,48 +1729,39 @@ let Constraints = "$src1 = $dst" in {
|
||||
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst, (v2f64 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
SHUFP_shuffle_mask:$src3)))]>;
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
||||
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1,
|
||||
f128mem:$src2, i8imm:$src3),
|
||||
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
VR128:$src1, (memopv2f64 addr:$src2),
|
||||
SHUFP_shuffle_mask:$src3)))]>;
|
||||
(v2f64 (shufp:$src3
|
||||
VR128:$src1, (memopv2f64 addr:$src2))))]>;
|
||||
|
||||
let AddedComplexity = 10 in {
|
||||
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
VR128:$src1, (memopv2f64 addr:$src2),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v2f64 (unpckh VR128:$src1,
|
||||
(memopv2f64 addr:$src2))))]>;
|
||||
|
||||
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
VR128:$src1, (memopv2f64 addr:$src2),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
|
||||
} // AddedComplexity
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
@ -2043,49 +2019,43 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
|
||||
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
|
||||
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||
VR128:$src1, (undef),
|
||||
PSHUFD_shuffle_mask:$src2)))]>;
|
||||
[(set VR128:$dst, (v4i32 (pshufd:$src2
|
||||
VR128:$src1, (undef))))]>;
|
||||
def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
||||
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v4i32 (vector_shuffle
|
||||
[(set VR128:$dst, (v4i32 (pshufd:$src2
|
||||
(bc_v4i32(memopv2i64 addr:$src1)),
|
||||
(undef),
|
||||
PSHUFD_shuffle_mask:$src2)))]>;
|
||||
(undef))))]>;
|
||||
|
||||
// SSE2 with ImmT == Imm8 and XS prefix.
|
||||
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
|
||||
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||
VR128:$src1, (undef),
|
||||
PSHUFHW_shuffle_mask:$src2)))]>,
|
||||
[(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
|
||||
(undef))))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
||||
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||
(bc_v8i16 (memopv2i64 addr:$src1)),
|
||||
(undef),
|
||||
PSHUFHW_shuffle_mask:$src2)))]>,
|
||||
[(set VR128:$dst, (v8i16 (pshufhw:$src2
|
||||
(bc_v8i16 (memopv2i64 addr:$src1)),
|
||||
(undef))))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
|
||||
// SSE2 with ImmT == Imm8 and XD prefix.
|
||||
def PSHUFLWri : Ii8<0x70, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
|
||||
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||
VR128:$src1, (undef),
|
||||
PSHUFLW_shuffle_mask:$src2)))]>,
|
||||
[(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
|
||||
(undef))))]>,
|
||||
XD, Requires<[HasSSE2]>;
|
||||
def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
|
||||
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
||||
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (v8i16 (vector_shuffle
|
||||
(bc_v8i16 (memopv2i64 addr:$src1)),
|
||||
(undef),
|
||||
PSHUFLW_shuffle_mask:$src2)))]>,
|
||||
[(set VR128:$dst, (v8i16 (pshuflw:$src2
|
||||
(bc_v8i16 (memopv2i64 addr:$src1)),
|
||||
(undef))))]>,
|
||||
XD, Requires<[HasSSE2]>;
|
||||
|
||||
|
||||
@ -2094,107 +2064,91 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (vector_shuffle VR128:$src1,
|
||||
(bc_v16i8 (memopv2i64 addr:$src2)),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(unpckl VR128:$src1,
|
||||
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v8i16 (vector_shuffle VR128:$src1,
|
||||
(bc_v8i16 (memopv2i64 addr:$src2)),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(unpckl VR128:$src1,
|
||||
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(unpckl VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle VR128:$src1,
|
||||
(memopv2i64 addr:$src2),
|
||||
UNPCKL_shuffle_mask)))]>;
|
||||
(v2i64 (unpckl VR128:$src1,
|
||||
(memopv2i64 addr:$src2))))]>;
|
||||
|
||||
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (vector_shuffle VR128:$src1,
|
||||
(bc_v16i8 (memopv2i64 addr:$src2)),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
[(set VR128:$dst,
|
||||
(unpckh VR128:$src1,
|
||||
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v8i16 (vector_shuffle VR128:$src1,
|
||||
(bc_v8i16 (memopv2i64 addr:$src2)),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(unpckh VR128:$src1,
|
||||
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(unpckh VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
||||
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (vector_shuffle VR128:$src1,
|
||||
(memopv2i64 addr:$src2),
|
||||
UNPCKH_shuffle_mask)))]>;
|
||||
(v2i64 (unpckh VR128:$src1,
|
||||
(memopv2i64 addr:$src2))))]>;
|
||||
}
|
||||
|
||||
// Extract / Insert
|
||||
@ -2357,8 +2311,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"movsd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVL_shuffle_mask)))]>;
|
||||
(v2f64 (movl VR128:$src1, VR128:$src2)))]>;
|
||||
}
|
||||
|
||||
// Store / copy lower 64-bits of a XMM register.
|
||||
@ -2449,44 +2402,35 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
|
||||
// Move Instructions
|
||||
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movshdup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||
VR128:$src, (undef),
|
||||
MOVSHDUP_shuffle_mask)))]>;
|
||||
[(set VR128:$dst, (v4f32 (movshdup
|
||||
VR128:$src, (undef))))]>;
|
||||
def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"movshdup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||
(memopv4f32 addr:$src), (undef),
|
||||
MOVSHDUP_shuffle_mask)))]>;
|
||||
[(set VR128:$dst, (movshdup
|
||||
(memopv4f32 addr:$src), (undef)))]>;
|
||||
|
||||
def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movsldup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||
VR128:$src, (undef),
|
||||
MOVSLDUP_shuffle_mask)))]>;
|
||||
[(set VR128:$dst, (v4f32 (movsldup
|
||||
VR128:$src, (undef))))]>;
|
||||
def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"movsldup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v4f32 (vector_shuffle
|
||||
(memopv4f32 addr:$src), (undef),
|
||||
MOVSLDUP_shuffle_mask)))]>;
|
||||
[(set VR128:$dst, (movsldup
|
||||
(memopv4f32 addr:$src), (undef)))]>;
|
||||
|
||||
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movddup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle VR128:$src, (undef),
|
||||
MOVDDUP_shuffle_mask)))]>;
|
||||
[(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
|
||||
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"movddup\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (vector_shuffle
|
||||
(scalar_to_vector (loadf64 addr:$src)),
|
||||
(undef), MOVDDUP_shuffle_mask)))]>;
|
||||
(v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
|
||||
(undef))))]>;
|
||||
|
||||
def : Pat<(vector_shuffle
|
||||
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
|
||||
(undef), MOVDDUP_shuffle_mask),
|
||||
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
|
||||
(undef)),
|
||||
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||
def : Pat<(vector_shuffle
|
||||
(memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
|
||||
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
|
||||
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||
|
||||
|
||||
@ -2555,22 +2499,18 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
|
||||
|
||||
// vector_shuffle v1, <undef> <1, 1, 3, 3>
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
MOVSHDUP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
|
||||
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
|
||||
let AddedComplexity = 20 in
|
||||
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
|
||||
MOVSHDUP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
|
||||
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||
|
||||
// vector_shuffle v1, <undef> <0, 0, 2, 2>
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
MOVSLDUP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
|
||||
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
|
||||
let AddedComplexity = 20 in
|
||||
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
|
||||
MOVSLDUP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
|
||||
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2911,207 +2851,173 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
|
||||
|
||||
// Splat v2f64 / v2i64
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
|
||||
def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
|
||||
(UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
|
||||
def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
|
||||
(UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
|
||||
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
||||
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
|
||||
def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
|
||||
(PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// Special unary SHUFPSrri case.
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src1,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE1]>;
|
||||
let AddedComplexity = 5 in
|
||||
def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
|
||||
(PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special unary SHUFPDrri case.
|
||||
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special unary SHUFPDrri case.
|
||||
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
|
||||
def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm),
|
||||
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
|
||||
(PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[HasSSE2]>;
|
||||
|
||||
// Special binary v4i32 shuffle cases with SHUFPS.
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
|
||||
PSHUFD_binary_shuffle_mask:$sm)),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(SHUFPSrri VR128:$src1, VR128:$src2,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1,
|
||||
(bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)),
|
||||
(SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(SHUFPSrmi VR128:$src1, addr:$src2,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special binary v2i64 shuffle cases using SHUFPDrri.
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
SHUFP_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>,
|
||||
def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src2,
|
||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||
Requires<[HasSSE2]>;
|
||||
// Special unary SHUFPDrri case.
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
|
||||
SHUFP_unary_shuffle_mask:$sm)),
|
||||
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||
Requires<[HasSSE2]>;
|
||||
|
||||
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask:$sm)),
|
||||
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
|
||||
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[OptForSpeed, HasSSE2]>;
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask:$sm)),
|
||||
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
|
||||
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[OptForSpeed, HasSSE2]>;
|
||||
}
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
|
||||
(UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
|
||||
(PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
|
||||
(PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKL_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
|
||||
(PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask:$sm)),
|
||||
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
|
||||
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[OptForSpeed, HasSSE2]>;
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask:$sm)),
|
||||
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||
def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
|
||||
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
|
||||
Requires<[OptForSpeed, HasSSE2]>;
|
||||
}
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
|
||||
(UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
|
||||
(PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
|
||||
(PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||
UNPCKH_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
|
||||
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVHP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
|
||||
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVHLPS_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
|
||||
(MOVHLPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
|
||||
MOVHLPS_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
|
||||
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
|
||||
MOVHLPS_v_undef_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
|
||||
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
|
||||
}
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
|
||||
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVHP_shuffle_mask)),
|
||||
def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVHP_shuffle_mask)),
|
||||
def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVHP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
|
||||
MOVHP_shuffle_mask)),
|
||||
def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
|
||||
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
|
||||
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVLP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVLP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVHP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVHP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(store (v4i32 (vector_shuffle
|
||||
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
|
||||
MOVLP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
|
||||
addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVLP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4i32 (vector_shuffle
|
||||
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
|
||||
MOVHP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
|
||||
addr:$src1),
|
||||
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
|
||||
MOVHP_shuffle_mask)), addr:$src1),
|
||||
def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
|
||||
let AddedComplexity = 15 in {
|
||||
// Setting the lowest element in the vector.
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVL_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVL_shuffle_mask)),
|
||||
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
|
||||
def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||
MOVLP_shuffle_mask)),
|
||||
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
|
||||
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
}
|
||||
|
||||
// Set lowest element and zero upper elements.
|
||||
let AddedComplexity = 15 in
|
||||
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
|
||||
MOVL_shuffle_mask)),
|
||||
def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
|
||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
|
||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
@ -1,5 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep and
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | grep psrldq
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
|
||||
; RUN: not grep and %t
|
||||
; RUN: not grep psrldq %t
|
||||
; RUN: grep xorps %t
|
||||
|
||||
define <4 x float> @test(<4 x float>* %v1) nounwind {
|
||||
%tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1]
|
||||
|
@ -1,9 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
|
||||
; RUN: grep unpcklps | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
|
||||
; RUN: grep unpckhps | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
|
||||
; RUN: not grep {sub.*esp}
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||
; RUN: grep unpcklps %t | count 1
|
||||
; RUN: grep pshufd %t | count 1
|
||||
; RUN: not grep {sub.*esp} %t
|
||||
|
||||
define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
|
||||
%tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 | grep shufps | count 4
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | grep mov | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
|
||||
; RUN: grep shufps %t | count 4
|
||||
; RUN: grep movaps %t | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
|
||||
; RUN: grep pshufd %t | count 4
|
||||
; RUN: not grep shufps %t
|
||||
; RUN: not grep mov %t
|
||||
|
||||
define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind {
|
||||
%tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
|
@ -1,8 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
|
||||
; RUN: grep pshufhw %t | grep 161 | count 1
|
||||
; RUN: grep pslldq %t | count 1
|
||||
|
||||
|
||||
; RUN: grep shufps %t | count 1
|
||||
; RUN: not grep pslldq %t
|
||||
|
||||
; Test case when creating pshufhw, we incorrectly set the higher order bit
|
||||
; for an undef,
|
||||
@ -20,4 +19,4 @@ entry:
|
||||
%0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
|
||||
store <4 x i32> %0, <4 x i32>* %dest
|
||||
ret void
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
|
||||
; RUN: grep pextrw %t | count 1
|
||||
; RUN: grep punpcklqdq %t | count 1
|
||||
; RUN: grep movlhps %t | count 1
|
||||
; RUN: grep pshufhw %t | count 1
|
||||
; RUN: grep pinsrw %t | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f
|
||||
|
@ -194,10 +194,6 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
|
||||
ConstraintType = SDTCisOpSmallerThanOp;
|
||||
x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
|
||||
R->getValueAsInt("BigOperandNum");
|
||||
} else if (R->isSubClassOf("SDTCisIntVectorOfSameSize")) {
|
||||
ConstraintType = SDTCisIntVectorOfSameSize;
|
||||
x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum =
|
||||
R->getValueAsInt("OtherOpNum");
|
||||
} else if (R->isSubClassOf("SDTCisEltOfVec")) {
|
||||
ConstraintType = SDTCisEltOfVec;
|
||||
x.SDTCisEltOfVec_Info.OtherOperandNum =
|
||||
@ -365,23 +361,9 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
|
||||
}
|
||||
return MadeChange;
|
||||
}
|
||||
case SDTCisIntVectorOfSameSize: {
|
||||
TreePatternNode *OtherOperand =
|
||||
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum,
|
||||
N, NumResults);
|
||||
if (OtherOperand->hasTypeSet()) {
|
||||
if (!isVector(OtherOperand->getTypeNum(0)))
|
||||
TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
|
||||
MVT IVT = OtherOperand->getTypeNum(0);
|
||||
unsigned NumElements = IVT.getVectorNumElements();
|
||||
IVT = MVT::getIntVectorWithNumElements(NumElements);
|
||||
return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case SDTCisEltOfVec: {
|
||||
TreePatternNode *OtherOperand =
|
||||
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum,
|
||||
getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum,
|
||||
N, NumResults);
|
||||
if (OtherOperand->hasTypeSet()) {
|
||||
if (!isVector(OtherOperand->getTypeNum(0)))
|
||||
@ -925,25 +907,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
|
||||
if (NI.getNumResults() == 0)
|
||||
MadeChange |= UpdateNodeType(MVT::isVoid, TP);
|
||||
|
||||
// If this is a vector_shuffle operation, apply types to the build_vector
|
||||
// operation. The types of the integers don't matter, but this ensures they
|
||||
// won't get checked.
|
||||
if (getOperator()->getName() == "vector_shuffle" &&
|
||||
getChild(2)->getOperator()->getName() == "build_vector") {
|
||||
TreePatternNode *BV = getChild(2);
|
||||
const std::vector<MVT::SimpleValueType> &LegalVTs
|
||||
= CDP.getTargetInfo().getLegalValueTypes();
|
||||
MVT::SimpleValueType LegalIntVT = MVT::Other;
|
||||
for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
|
||||
if (isInteger(LegalVTs[i]) && !isVector(LegalVTs[i])) {
|
||||
LegalIntVT = LegalVTs[i];
|
||||
break;
|
||||
}
|
||||
assert(LegalIntVT != MVT::Other && "No legal integer VT?");
|
||||
|
||||
for (unsigned i = 0, e = BV->getNumChildren(); i != e; ++i)
|
||||
MadeChange |= BV->getChild(i)->UpdateNodeType(LegalIntVT, TP);
|
||||
}
|
||||
return MadeChange;
|
||||
} else if (getOperator()->isSubClassOf("Instruction")) {
|
||||
const DAGInstruction &Inst = CDP.getInstruction(getOperator());
|
||||
@ -2086,6 +2049,9 @@ void CodeGenDAGPatterns::ParsePatterns() {
|
||||
IterateInference |= Result->getTree(0)->
|
||||
UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result);
|
||||
} while (IterateInference);
|
||||
|
||||
// Blah?
|
||||
Result->getTree(0)->setTransformFn(Pattern->getTree(0)->getTransformFn());
|
||||
|
||||
// Verify that we inferred enough types that we can do something with the
|
||||
// pattern and result. If these fire the user has to add type casts.
|
||||
|
@ -62,8 +62,7 @@ struct SDTypeConstraint {
|
||||
unsigned OperandNo; // The operand # this constraint applies to.
|
||||
enum {
|
||||
SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs,
|
||||
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisIntVectorOfSameSize,
|
||||
SDTCisEltOfVec
|
||||
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec
|
||||
} ConstraintType;
|
||||
|
||||
union { // The discriminated union.
|
||||
@ -79,9 +78,6 @@ struct SDTypeConstraint {
|
||||
struct {
|
||||
unsigned BigOperandNum;
|
||||
} SDTCisOpSmallerThanOp_Info;
|
||||
struct {
|
||||
unsigned OtherOperandNum;
|
||||
} SDTCisIntVectorOfSameSize_Info;
|
||||
struct {
|
||||
unsigned OtherOperandNum;
|
||||
} SDTCisEltOfVec_Info;
|
||||
|
Loading…
x
Reference in New Issue
Block a user