Since vectors with all ones can't be created with a 256-bit instruction,

avoid returning early for v8i32 types, which would only be valid for
vector with all zeros. Also split the handling of zeros and ones into separate
checking logic since they are handled differently. This fixes PR10547

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136642 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2011-08-01 19:51:53 +00:00
parent 8ead80db20
commit 531f19f767
2 changed files with 22 additions and 11 deletions

View File

@ -4673,24 +4673,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// All zero's:
// - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
// All one's:
// - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorAllOnes(Op.getNode())) {
// Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
// Vectors containing all zeros can be matched by pxor and xorps later
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
if (Op.getValueType() == MVT::v4i32 ||
Op.getValueType() == MVT::v8i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
return getOnesVector(Op.getValueType(), DAG, dl);
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
}
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors.
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
if (Op.getValueType() == MVT::v4i32)
return Op;
return getOnesVector(Op.getValueType(), DAG, dl);
}
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;

View File

@ -24,3 +24,12 @@ allocas:
float>* %ptr2vec615, align 32
ret void
}
; CHECK: vpcmpeqd
; CHECK: vinsertf128 $1
define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
allocas:
%ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>*
store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
ret void
}