mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-13 08:26:02 +00:00
Improve codegen for BUILD_VECTORs on ARM.
If we have a BUILD_VECTOR that is mostly a constant splat, it is often better to splat that constant then insertelement the non-constant lanes instead of insertelementing every lane from an undef base. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163304 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -4161,10 +4161,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scan through the operands to see if only one value is used.
|
// Scan through the operands to see if only one value is used.
|
||||||
|
//
|
||||||
|
// As an optimisation, even if more than one value is used it may be more
|
||||||
|
// profitable to splat with one value then change some lanes.
|
||||||
|
//
|
||||||
|
// Heuristically we decide to do this if the vector has a "dominant" value,
|
||||||
|
// defined as splatted to more than half of the lanes.
|
||||||
unsigned NumElts = VT.getVectorNumElements();
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
bool isOnlyLowElement = true;
|
bool isOnlyLowElement = true;
|
||||||
bool usesOnlyOneValue = true;
|
bool usesOnlyOneValue = true;
|
||||||
|
bool hasDominantValue = false;
|
||||||
bool isConstant = true;
|
bool isConstant = true;
|
||||||
|
|
||||||
|
// Map of the number of times a particular SDValue appears in the
|
||||||
|
// element list.
|
||||||
|
DenseMap<SDValue, int> ValueCounts;
|
||||||
SDValue Value;
|
SDValue Value;
|
||||||
for (unsigned i = 0; i < NumElts; ++i) {
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
SDValue V = Op.getOperand(i);
|
SDValue V = Op.getOperand(i);
|
||||||
@@ -4175,13 +4186,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
|||||||
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
|
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
|
||||||
isConstant = false;
|
isConstant = false;
|
||||||
|
|
||||||
if (!Value.getNode())
|
ValueCounts.insert(std::make_pair(V, 0));
|
||||||
|
int &Count = ValueCounts[V];
|
||||||
|
|
||||||
|
// Is this value dominant? (takes up more than half of the lanes)
|
||||||
|
if (++Count > (NumElts / 2)) {
|
||||||
|
hasDominantValue = true;
|
||||||
Value = V;
|
Value = V;
|
||||||
else if (V != Value)
|
}
|
||||||
usesOnlyOneValue = false;
|
|
||||||
}
|
}
|
||||||
|
if (ValueCounts.size() != 1)
|
||||||
|
usesOnlyOneValue = false;
|
||||||
|
if (!Value.getNode() && ValueCounts.size() > 0)
|
||||||
|
Value = ValueCounts.begin()->first;
|
||||||
|
|
||||||
if (!Value.getNode())
|
if (ValueCounts.size() == 0)
|
||||||
return DAG.getUNDEF(VT);
|
return DAG.getUNDEF(VT);
|
||||||
|
|
||||||
if (isOnlyLowElement)
|
if (isOnlyLowElement)
|
||||||
@@ -4191,9 +4210,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
|||||||
|
|
||||||
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
|
// Use VDUP for non-constant splats. For f32 constant splats, reduce to
|
||||||
// i32 and try again.
|
// i32 and try again.
|
||||||
if (usesOnlyOneValue && EltSize <= 32) {
|
if (hasDominantValue && EltSize <= 32) {
|
||||||
if (!isConstant)
|
if (!isConstant) {
|
||||||
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
|
SDValue N;
|
||||||
|
|
||||||
|
// If we are VDUPing a value that comes directly from a vector, that will
|
||||||
|
// cause an unnecessary move to and from a GPR, where instead we could
|
||||||
|
// just use VDUPLANE.
|
||||||
|
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
|
||||||
|
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
|
||||||
|
Value->getOperand(0), Value->getOperand(1));
|
||||||
|
else
|
||||||
|
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
|
||||||
|
|
||||||
|
if (!usesOnlyOneValue) {
|
||||||
|
// The dominant value was splatted as 'N', but we now have to insert
|
||||||
|
// all differing elements.
|
||||||
|
for (unsigned I = 0; I < NumElts; ++I) {
|
||||||
|
if (Op.getOperand(I) == Value)
|
||||||
|
continue;
|
||||||
|
SmallVector<SDValue, 3> Ops;
|
||||||
|
Ops.push_back(N);
|
||||||
|
Ops.push_back(Op.getOperand(I));
|
||||||
|
Ops.push_back(DAG.getConstant(I, MVT::i32));
|
||||||
|
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return N;
|
||||||
|
}
|
||||||
if (VT.getVectorElementType().isFloatingPoint()) {
|
if (VT.getVectorElementType().isFloatingPoint()) {
|
||||||
SmallVector<SDValue, 8> Ops;
|
SmallVector<SDValue, 8> Ops;
|
||||||
for (unsigned i = 0; i < NumElts; ++i)
|
for (unsigned i = 0; i < NumElts; ++i)
|
||||||
@@ -4205,9 +4249,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
|||||||
if (Val.getNode())
|
if (Val.getNode())
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
|
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
|
||||||
}
|
}
|
||||||
SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
|
if (usesOnlyOneValue) {
|
||||||
if (Val.getNode())
|
SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
|
||||||
return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
|
if (isConstant && Val.getNode())
|
||||||
|
return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If all elements are constants and the case above didn't get hit, fall back
|
// If all elements are constants and the case above didn't get hit, fall back
|
||||||
|
@@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind {
|
|||||||
store <8 x i8> %2, <8 x i8>* %ptr, align 8
|
store <8 x i8> %2, <8 x i8>* %ptr, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @tdupi(i32 %x, i32 %y) {
|
||||||
|
;CHECK: tdupi
|
||||||
|
;CHECK: vdup.32
|
||||||
|
%1 = insertelement <4 x i32> undef, i32 %x, i32 0
|
||||||
|
%2 = insertelement <4 x i32> %1, i32 %x, i32 1
|
||||||
|
%3 = insertelement <4 x i32> %2, i32 %x, i32 2
|
||||||
|
%4 = insertelement <4 x i32> %3, i32 %y, i32 3
|
||||||
|
ret <4 x i32> %4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @tdupf(float %x, float %y) {
|
||||||
|
;CHECK: tdupf
|
||||||
|
;CHECK: vdup.32
|
||||||
|
%1 = insertelement <4 x float> undef, float %x, i32 0
|
||||||
|
%2 = insertelement <4 x float> %1, float %x, i32 1
|
||||||
|
%3 = insertelement <4 x float> %2, float %x, i32 2
|
||||||
|
%4 = insertelement <4 x float> %3, float %y, i32 3
|
||||||
|
ret <4 x float> %4
|
||||||
|
}
|
||||||
|
|
||||||
|
; This test checks that when splatting an element from a vector into another,
|
||||||
|
; the value isn't moved out to GPRs first.
|
||||||
|
define <4 x i32> @tduplane(<4 x i32> %invec) {
|
||||||
|
;CHECK: tduplane
|
||||||
|
;CHECK-NOT: vmov {{.*}}, d16[1]
|
||||||
|
;CHECK: vdup.32 {{.*}}, d16[1]
|
||||||
|
%in = extractelement <4 x i32> %invec, i32 1
|
||||||
|
%1 = insertelement <4 x i32> undef, i32 %in, i32 0
|
||||||
|
%2 = insertelement <4 x i32> %1, i32 %in, i32 1
|
||||||
|
%3 = insertelement <4 x i32> %2, i32 %in, i32 2
|
||||||
|
%4 = insertelement <4 x i32> %3, i32 255, i32 3
|
||||||
|
ret <4 x i32> %4
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user