Reapplied D7816 & rL230177 & rL230278 - with an additional fix toensure that the smallest build vector input scalar type is always used. Additional (crash) test cases already committed.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230388 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2015-02-24 22:08:56 +00:00
parent 0115755020
commit 41cda40157
2 changed files with 75 additions and 66 deletions

View File

@ -11430,36 +11430,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
}
// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
// We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
if (N->getNumOperands() == 2 &&
N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
auto IsBuildVectorOrUndef = [](const SDValue &Op) {
return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
};
bool AllBuildVectorsOrUndefs =
std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
unsigned BuildVecNumElts = N0.getNumOperands();
EVT SVT = VT.getScalarType();
EVT SclTy0 = N0.getOperand(0)->getValueType(0);
EVT SclTy1 = N1.getOperand(0)->getValueType(0);
if (SclTy0.isFloatingPoint()) {
for (unsigned i = 0; i != BuildVecNumElts; ++i)
Opnds.push_back(N0.getOperand(i));
for (unsigned i = 0; i != BuildVecNumElts; ++i)
Opnds.push_back(N1.getOperand(i));
} else {
EVT MinVT = SVT;
if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
// operand types. Get the smaller type and truncate all operands to it.
EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
for (unsigned i = 0; i != BuildVecNumElts; ++i)
Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
N0.getOperand(i)));
for (unsigned i = 0; i != BuildVecNumElts; ++i)
Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
N1.getOperand(i)));
// operand types. Get the smallest type and truncate all operands to it.
bool FoundMinVT = false;
for (const SDValue &Op : N->ops())
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
EVT OpSVT = Op.getOperand(0)->getValueType(0);
MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
FoundMinVT = true;
}
assert(FoundMinVT && "Concat vector type mismatch");
}
for (const SDValue &Op : N->ops()) {
EVT OpVT = Op.getValueType();
unsigned NumElts = OpVT.getVectorNumElements();
if (ISD::UNDEF == Op.getOpcode())
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(DAG.getUNDEF(MinVT));
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
if (SVT.isFloatingPoint()) {
assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(Op.getOperand(i));
} else {
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(
DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
}
}
}
assert(VT.getVectorNumElements() == Opnds.size() &&
"Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}

View File

@ -359,21 +359,15 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
entry:
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
@ -410,22 +404,17 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
;
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
entry:
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>