Fixed PR13938: the ARM backend was crashing because it couldn't select a VDUPLANE node with the vector input size different from the output size. This was bacause the BUILD_VECTOR lowering code didn't check that the size of the input vector was correct for using VDUPLANE.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165929 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Silviu Baranga 2012-10-15 09:41:32 +00:00
parent 81ff90db44
commit bb1078ea13
2 changed files with 55 additions and 2 deletions

View File

@ -4230,9 +4230,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// If we are VDUPing a value that comes directly from a vector, that will
// cause an unnecessary move to and from a GPR, where instead we could
// just use VDUPLANE.
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
ConstantSDNode *constIndex;
constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
assert(constIndex && "The index is not a constant!");
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
Value, DAG.getConstant(index, MVT::i32)),
DAG.getConstant(index, MVT::i32));
} else {
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
Value->getOperand(0), Value->getOperand(1));
}
}
else
N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);

View File

@ -295,3 +295,39 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
%4 = insertelement <4 x i32> %3, i32 255, i32 3
ret <4 x i32> %4
}
define <2 x float> @check_f32(<4 x float> %v) nounwind {
;CHECK: check_f32:
;CHECK: vdup.32 {{.*}}, d{{..}}[1]
%x = extractelement <4 x float> %v, i32 3
%1 = insertelement <2 x float> undef, float %x, i32 0
%2 = insertelement <2 x float> %1, float %x, i32 1
ret <2 x float> %2
}
define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
;CHECK: check_i32:
;CHECK: vdup.32 {{.*}}, d{{..}}[1]
%x = extractelement <4 x i32> %v, i32 3
%1 = insertelement <2 x i32> undef, i32 %x, i32 0
%2 = insertelement <2 x i32> %1, i32 %x, i32 1
ret <2 x i32> %2
}
define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
;CHECK: check_i16:
;CHECK: vdup.16 {{.*}}, d{{..}}[3]
%x = extractelement <8 x i16> %v, i32 3
%1 = insertelement <4 x i16> undef, i16 %x, i32 0
%2 = insertelement <4 x i16> %1, i16 %x, i32 1
ret <4 x i16> %2
}
define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
;CHECK: check_i8:
;CHECK: vdup.8 {{.*}}, d{{..}}[3]
%x = extractelement <16 x i8> %v, i32 3
%1 = insertelement <8 x i8> undef, i8 %x, i32 0
%2 = insertelement <8 x i8> %1, i8 %x, i32 1
ret <8 x i8> %2
}