mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
Slightly generalize the code that handles shuffles of consecutive loads
on x86 to handle more cases. Fix a bug in said code that would cause it to read past the end of an object. Rewrite the code in SelectionDAGLegalize::ExpandBUILD_VECTOR to be a bit more general. Remove PerformBuildVectorCombine, which is no longer necessary with these changes. In addition to simplifying the code, with this change, we can now catch a few more cases of consecutive loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73012 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dcef849ab0
commit
7a5e55509b
@ -1785,48 +1785,41 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
|
||||
/// support the operation, but do support the resultant vector type.
|
||||
SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
|
||||
unsigned NumElems = Node->getNumOperands();
|
||||
SDValue SplatValue = Node->getOperand(0);
|
||||
SDValue Value1, Value2;
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
MVT VT = Node->getValueType(0);
|
||||
MVT OpVT = SplatValue.getValueType();
|
||||
MVT OpVT = Node->getOperand(0).getValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
|
||||
// If the only non-undef value is the low element, turn this into a
|
||||
// SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
|
||||
bool isOnlyLowElement = true;
|
||||
|
||||
// FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
|
||||
// and use a bitmask instead of a list of elements.
|
||||
// FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
|
||||
std::map<SDValue, std::vector<unsigned> > Values;
|
||||
Values[SplatValue].push_back(0);
|
||||
bool MoreThanTwoValues = false;
|
||||
bool isConstant = true;
|
||||
if (!isa<ConstantFPSDNode>(SplatValue) && !isa<ConstantSDNode>(SplatValue) &&
|
||||
SplatValue.getOpcode() != ISD::UNDEF)
|
||||
isConstant = false;
|
||||
|
||||
for (unsigned i = 1; i < NumElems; ++i) {
|
||||
for (unsigned i = 0; i < NumElems; ++i) {
|
||||
SDValue V = Node->getOperand(i);
|
||||
Values[V].push_back(i);
|
||||
if (V.getOpcode() != ISD::UNDEF)
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
if (i > 0)
|
||||
isOnlyLowElement = false;
|
||||
if (SplatValue != V)
|
||||
SplatValue = SDValue(0, 0);
|
||||
|
||||
// If this isn't a constant element or an undef, we can't use a constant
|
||||
// pool load.
|
||||
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V) &&
|
||||
V.getOpcode() != ISD::UNDEF)
|
||||
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
|
||||
isConstant = false;
|
||||
|
||||
if (!Value1.getNode()) {
|
||||
Value1 = V;
|
||||
} else if (!Value2.getNode()) {
|
||||
if (V != Value1)
|
||||
Value2 = V;
|
||||
} else if (V != Value1 && V != Value2) {
|
||||
MoreThanTwoValues = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isOnlyLowElement) {
|
||||
// If the low element is an undef too, then this whole things is an undef.
|
||||
if (Node->getOperand(0).getOpcode() == ISD::UNDEF)
|
||||
return DAG.getUNDEF(VT);
|
||||
// Otherwise, turn this into a scalar_to_vector node.
|
||||
if (!Value1.getNode())
|
||||
return DAG.getUNDEF(VT);
|
||||
|
||||
if (isOnlyLowElement)
|
||||
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
|
||||
}
|
||||
|
||||
// If all elements are constants, create a load from the constant pool.
|
||||
if (isConstant) {
|
||||
@ -1852,59 +1845,25 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
|
||||
false, Alignment);
|
||||
}
|
||||
|
||||
if (SplatValue.getNode()) { // Splat of one value?
|
||||
// Build the shuffle constant vector: <0, 0, 0, 0>
|
||||
SmallVector<int, 8> ZeroVec(NumElems, 0);
|
||||
|
||||
// If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
|
||||
if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {
|
||||
if (!MoreThanTwoValues) {
|
||||
SmallVector<int, 8> ShuffleVec(NumElems, -1);
|
||||
for (unsigned i = 0; i < NumElems; ++i) {
|
||||
SDValue V = Node->getOperand(i);
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
ShuffleVec[i] = V == Value1 ? 0 : NumElems;
|
||||
}
|
||||
if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
|
||||
// Get the splatted value into the low element of a vector register.
|
||||
SDValue LowValVec =
|
||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
|
||||
SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
|
||||
SDValue Vec2;
|
||||
if (Value2.getNode())
|
||||
Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
|
||||
else
|
||||
Vec2 = DAG.getUNDEF(VT);
|
||||
|
||||
// Return shuffle(LowValVec, undef, <0,0,0,0>)
|
||||
return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
|
||||
&ZeroVec[0]);
|
||||
}
|
||||
}
|
||||
|
||||
// If there are only two unique elements, we may be able to turn this into a
|
||||
// vector shuffle.
|
||||
if (Values.size() == 2) {
|
||||
// Get the two values in deterministic order.
|
||||
SDValue Val1 = Node->getOperand(1);
|
||||
SDValue Val2;
|
||||
std::map<SDValue, std::vector<unsigned> >::iterator MI = Values.begin();
|
||||
if (MI->first != Val1)
|
||||
Val2 = MI->first;
|
||||
else
|
||||
Val2 = (++MI)->first;
|
||||
|
||||
// If Val1 is an undef, make sure it ends up as Val2, to ensure that our
|
||||
// vector shuffle has the undef vector on the RHS.
|
||||
if (Val1.getOpcode() == ISD::UNDEF)
|
||||
std::swap(Val1, Val2);
|
||||
|
||||
// Build the shuffle constant vector: e.g. <0, 4, 0, 4>
|
||||
SmallVector<int, 8> ShuffleMask(NumElems, -1);
|
||||
|
||||
// Set elements of the shuffle mask for Val1.
|
||||
std::vector<unsigned> &Val1Elts = Values[Val1];
|
||||
for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
|
||||
ShuffleMask[Val1Elts[i]] = 0;
|
||||
|
||||
// Set elements of the shuffle mask for Val2.
|
||||
std::vector<unsigned> &Val2Elts = Values[Val2];
|
||||
for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
|
||||
if (Val2.getOpcode() != ISD::UNDEF)
|
||||
ShuffleMask[Val2Elts[i]] = NumElems;
|
||||
|
||||
// If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
|
||||
if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
|
||||
TLI.isShuffleMaskLegal(ShuffleMask, VT)) {
|
||||
Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
|
||||
Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
|
||||
return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
|
||||
return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7691,13 +7691,15 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
|
||||
}
|
||||
|
||||
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
|
||||
MVT EVT, SDNode *&Base,
|
||||
MVT EVT, LoadSDNode *&LDBase,
|
||||
unsigned &LastLoadedElt,
|
||||
SelectionDAG &DAG, MachineFrameInfo *MFI,
|
||||
const TargetLowering &TLI) {
|
||||
Base = NULL;
|
||||
LDBase = NULL;
|
||||
LastLoadedElt = -1;
|
||||
for (unsigned i = 0; i < NumElems; ++i) {
|
||||
if (N->getMaskElt(i) < 0) {
|
||||
if (!Base)
|
||||
if (!LDBase)
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
@ -7706,19 +7708,20 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
|
||||
if (!Elt.getNode() ||
|
||||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
|
||||
return false;
|
||||
if (!Base) {
|
||||
Base = Elt.getNode();
|
||||
if (Base->getOpcode() == ISD::UNDEF)
|
||||
if (!LDBase) {
|
||||
if (Elt.getNode()->getOpcode() == ISD::UNDEF)
|
||||
return false;
|
||||
LDBase = cast<LoadSDNode>(Elt.getNode());
|
||||
LastLoadedElt = i;
|
||||
continue;
|
||||
}
|
||||
if (Elt.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Elt);
|
||||
LoadSDNode *LDBase = cast<LoadSDNode>(Base);
|
||||
if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
|
||||
return false;
|
||||
LastLoadedElt = i;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -7737,6 +7740,9 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
|
||||
if (VT.getSizeInBits() != 128)
|
||||
return SDValue();
|
||||
|
||||
// For x86-32 machines, if we see an insert and then a shuffle in a v2i64
|
||||
// where the upper half is 0, it is advantageous to rewrite it as a build
|
||||
// vector of (0, val) so it can use movq.
|
||||
@ -7764,107 +7770,24 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Try to combine a vector_shuffle into a 128-bit load.
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
SDNode *Base = NULL;
|
||||
if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI))
|
||||
LoadSDNode *LD = NULL;
|
||||
unsigned LastLoadedElt;
|
||||
if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG,
|
||||
MFI, TLI))
|
||||
return SDValue();
|
||||
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||
if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
|
||||
if (LastLoadedElt == NumElems - 1) {
|
||||
if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
|
||||
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
|
||||
LD->getSrcValue(), LD->getSrcValueOffset(),
|
||||
LD->isVolatile());
|
||||
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
|
||||
LD->getSrcValue(), LD->getSrcValueOffset(),
|
||||
LD->isVolatile());
|
||||
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
|
||||
LD->getSrcValue(), LD->getSrcValueOffset(),
|
||||
LD->isVolatile(), LD->getAlignment());
|
||||
}
|
||||
|
||||
/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
|
||||
static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget,
|
||||
const TargetLowering &TLI) {
|
||||
unsigned NumOps = N->getNumOperands();
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
// Ignore single operand BUILD_VECTOR.
|
||||
if (NumOps == 1)
|
||||
return SDValue();
|
||||
|
||||
MVT VT = N->getValueType(0);
|
||||
MVT EVT = VT.getVectorElementType();
|
||||
|
||||
// Before or during type legalization, we want to try and convert a
|
||||
// build_vector of an i64 load and a zero value into vzext_movl before the
|
||||
// legalizer can break it up.
|
||||
// FIXME: does the case below remove the need to do this?
|
||||
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
|
||||
if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
|
||||
return SDValue();
|
||||
|
||||
// This must be an insertion into a zero vector.
|
||||
SDValue HighElt = N->getOperand(1);
|
||||
if (!isZeroNode(HighElt))
|
||||
return SDValue();
|
||||
|
||||
// Value must be a load.
|
||||
SDNode *Base = N->getOperand(0).getNode();
|
||||
if (!isa<LoadSDNode>(Base)) {
|
||||
if (Base->getOpcode() != ISD::BIT_CONVERT)
|
||||
return SDValue();
|
||||
Base = Base->getOperand(0).getNode();
|
||||
if (!isa<LoadSDNode>(Base))
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Transform it into VZEXT_LOAD addr.
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||
|
||||
// Load must not be an extload.
|
||||
if (LD->getExtensionType() != ISD::NON_EXTLOAD)
|
||||
return SDValue();
|
||||
|
||||
// Load type should legal type so we don't have to legalize it.
|
||||
if (!TLI.isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||
LD->isVolatile(), LD->getAlignment());
|
||||
} else if (NumElems == 4 && LastLoadedElt == 1) {
|
||||
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
|
||||
SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
|
||||
SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
|
||||
TargetLowering::TargetLoweringOpt TLO(DAG);
|
||||
TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
|
||||
DCI.CommitTargetLoweringOpt(TLO);
|
||||
return ResNode;
|
||||
}
|
||||
|
||||
// The type legalizer will have broken apart v2i64 build_vector created during
|
||||
// widening before the code which handles that case is run. Look for build
|
||||
// vector (load, load + 4, 0/undef, 0/undef)
|
||||
if (VT == MVT::v4i32 || VT == MVT::v4f32) {
|
||||
LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
|
||||
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
|
||||
if (!LD0 || !LD1)
|
||||
return SDValue();
|
||||
if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
|
||||
LD1->getExtensionType() != ISD::NON_EXTLOAD)
|
||||
return SDValue();
|
||||
// Make sure the second elt is a consecutive load.
|
||||
if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
|
||||
DAG.getMachineFunction().getFrameInfo()))
|
||||
return SDValue();
|
||||
|
||||
SDValue N2 = N->getOperand(2);
|
||||
SDValue N3 = N->getOperand(3);
|
||||
if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
|
||||
return SDValue();
|
||||
|
||||
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
|
||||
SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
|
||||
SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
|
||||
TargetLowering::TargetLoweringOpt TLO(DAG);
|
||||
TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
|
||||
DCI.CommitTargetLoweringOpt(TLO);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
|
||||
}
|
||||
return SDValue();
|
||||
@ -8466,14 +8389,25 @@ static SDValue PerformBTCombine(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue Op = N->getOperand(0);
|
||||
if (Op.getOpcode() == ISD::BIT_CONVERT)
|
||||
Op = Op.getOperand(0);
|
||||
MVT VT = N->getValueType(0), OpVT = Op.getValueType();
|
||||
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
|
||||
VT.getVectorElementType().getSizeInBits() ==
|
||||
OpVT.getVectorElementType().getSizeInBits()) {
|
||||
return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
|
||||
case ISD::BUILD_VECTOR:
|
||||
return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
|
||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
||||
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
|
||||
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
|
||||
@ -8485,6 +8419,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
12
test/CodeGen/X86/vec_loadsingles.ll
Normal file
12
test/CodeGen/X86/vec_loadsingles.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
|
||||
|
||||
define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
|
||||
entry:
|
||||
%tmp1 = load float* %p
|
||||
%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
|
||||
%add.ptr = getelementptr float* %p, i32 1
|
||||
%tmp5 = load float* %add.ptr
|
||||
%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
|
||||
ret <4 x float> %vecins7
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||
; RUN: grep movlhps %t | count 1
|
||||
; RUN: grep movq %t | count 1
|
||||
; RUN: grep movsd %t | count 1
|
||||
; RUN: grep movq %t | count 2
|
||||
|
||||
define <4 x float> @test1(float %a, float %b) nounwind {
|
||||
%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0 ; <<4 x float>> [#uses=1]
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||
; RUN: grep movss %t | count 1
|
||||
; RUN: grep movups %t | count 1
|
||||
; RUN: grep movq %t | count 1
|
||||
; RUN: grep shufps %t | count 1
|
||||
|
||||
define <4 x float> @test(float %a, float %b, float %c) nounwind {
|
||||
|
Loading…
Reference in New Issue
Block a user