Remove code that prevented lowering shuffles if they are used by load and themselves used by a extract_vector_elt. This was done to allow the DAG combiner to collapse to a single element load. Unfortunately, sometimes the extract_vector_elt would disappear before DAG combine could do the transformation leaving a vector_shuffle that isel couldn't handle. New code lets the shuffle be converted to a target specific node, but then adds a combine routine that can convert target specific nodes back to vector_shuffles if the folding criteria are met.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153080 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-03-20 07:17:59 +00:00
parent a1ffc681ed
commit 89f4e6639d

View File

@ -4346,11 +4346,13 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
/// target specific opcode. Returns true if the Mask could be calculated.
/// Sets IsUnary to true if only uses one source.
static bool getTargetShuffleMask(SDNode *N, EVT VT,
SmallVectorImpl<int> &Mask) {
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
IsUnary = false;
switch(N->getOpcode()) {
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
@ -4372,14 +4374,17 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT,
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD: {
@ -4440,8 +4445,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
SDValue ImmN;
bool IsUnary;
if (!getTargetShuffleMask(N, VT, ShuffleMask))
if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
return SDValue();
Index = ShuffleMask[Index];
@ -6093,88 +6099,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
return false;
}
/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
/// a vector extract, and if both can be later optimized into a single load.
/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
/// here because otherwise a target specific shuffle node is going to be
/// emitted for this shuffle, and the optimization not done.
/// FIXME: This is probably not the best approach, but fix the problem
/// until the right path is decided.
static
bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT VT = V.getValueType();
ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
// Be sure that the vector shuffle is present in a pattern like this:
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
if (!V.hasOneUse())
return false;
SDNode *N = *V.getNode()->use_begin();
if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
SDValue EltNo = N->getOperand(1);
if (!isa<ConstantSDNode>(EltNo))
return false;
// If the bit convert changed the number of elements, it is unsafe
// to examine the mask.
bool HasShuffleIntoBitcast = false;
if (V.getOpcode() == ISD::BITCAST) {
EVT SrcVT = V.getOperand(0).getValueType();
if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
return false;
V = V.getOperand(0);
HasShuffleIntoBitcast = true;
}
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
// If we are accessing the upper part of a YMM register
// then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of
// EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point
// because the legalization of N did not happen yet.
if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256)
return false;
// Skip one more bit_convert if necessary
if (V.getOpcode() == ISD::BITCAST) {
if (!V.hasOneUse())
return false;
V = V.getOperand(0);
}
if (!ISD::isNormalLoad(V.getNode()))
return false;
// Is the original load suitable?
LoadSDNode *LN0 = cast<LoadSDNode>(V);
if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
return false;
if (!HasShuffleIntoBitcast)
return true;
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
unsigned NewAlign =
TLI.getTargetData()->getABITypeAlignment(
VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
return false;
return true;
}
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
@ -6295,12 +6219,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (SVOp->isSplat()) {
unsigned NumElem = VT.getVectorNumElements();
int Size = VT.getSizeInBits();
// Special case, this is the only place now where it's allowed to return
// a vector_shuffle operation without using a target specific node, because
// *hopefully* it will be optimized away by the dag combiner. FIXME: should
// this be moved to DAGCombine instead?
if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
// Use vbroadcast whenever the splat comes from a foldable load
SDValue LD = isVectorBroadcast(Op, Subtarget);
@ -13018,11 +12936,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
/// specific shuffle of a load can be folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
/// shuffles have been customed lowered so we need to handle those here.
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
EVT VT = InVec.getValueType();
bool HasShuffleIntoBitcast = false;
if (InVec.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
HasShuffleIntoBitcast = true;
}
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
// Don't duplicate a load with other uses.
if (!InVec.hasOneUse())
return SDValue();
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
// If inputs to shuffle are the same for both ops, then allow 2 uses
unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
if (LdNode.getOpcode() == ISD::BITCAST) {
// Don't duplicate a load with other uses.
if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
return SDValue();
AllowedUses = 1; // only allow 1 load use if we have a bitcast
LdNode = LdNode.getOperand(0);
}
if (!ISD::isNormalLoad(LdNode.getNode()))
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
if (HasShuffleIntoBitcast) {
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NewAlign = TLI.getTargetData()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
return SDValue();
}
// All checks match so transform back to vector_shuffle so that DAG combiner
// can finish the job
DebugLoc dl = N->getDebugLoc();
// Create shuffle node taking into account the case that its a unary shuffle
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
TargetLowering::DAGCombinerInfo &DCI) {
SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
if (NewOp.getNode())
return NewOp;
SDValue InputVector = N->getOperand(0);
// Only operate on vectors of 4 elements, where the alternative shuffling
@ -13083,6 +13099,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@ -13106,6 +13123,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@ -14910,7 +14929,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);