mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-07-21 18:29:45 +00:00
[DagCombiner] Generalized BuildVector Vector Concatenation
The CONCAT_VECTORS combiner pass can transform the concat of two BUILD_VECTOR nodes into a single BUILD_VECTOR node. This patch generalises this to support any number of BUILD_VECTOR nodes, and also permits UNDEF nodes to be included as well. This was noticed as AVX vec128 -> vec256 canonicalization sometimes creates a CONCAT_VECTOR with a real vec128 lower and an vec128 UNDEF upper. Differential Revision: http://reviews.llvm.org/D7816 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230177 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0e8abfa6ed
commit
66c960350c
@ -11430,36 +11430,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
|
||||||
|
// We have already tested above for an UNDEF only concatenation.
|
||||||
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
|
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
|
||||||
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
|
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
|
||||||
if (N->getNumOperands() == 2 &&
|
auto IsBuildVectorOrUndef = [](const SDValue &Op) {
|
||||||
N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
|
return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
|
||||||
N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
|
};
|
||||||
EVT VT = N->getValueType(0);
|
bool AllBuildVectorsOrUndefs =
|
||||||
SDValue N0 = N->getOperand(0);
|
std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
|
||||||
SDValue N1 = N->getOperand(1);
|
if (AllBuildVectorsOrUndefs) {
|
||||||
SmallVector<SDValue, 8> Opnds;
|
SmallVector<SDValue, 8> Opnds;
|
||||||
unsigned BuildVecNumElts = N0.getNumOperands();
|
EVT SVT = VT.getScalarType();
|
||||||
|
|
||||||
EVT SclTy0 = N0.getOperand(0)->getValueType(0);
|
EVT MinVT = SVT;
|
||||||
EVT SclTy1 = N1.getOperand(0)->getValueType(0);
|
if (!SVT.isFloatingPoint())
|
||||||
if (SclTy0.isFloatingPoint()) {
|
|
||||||
for (unsigned i = 0; i != BuildVecNumElts; ++i)
|
|
||||||
Opnds.push_back(N0.getOperand(i));
|
|
||||||
for (unsigned i = 0; i != BuildVecNumElts; ++i)
|
|
||||||
Opnds.push_back(N1.getOperand(i));
|
|
||||||
} else {
|
|
||||||
// If BUILD_VECTOR are from built from integer, they may have different
|
// If BUILD_VECTOR are from built from integer, they may have different
|
||||||
// operand types. Get the smaller type and truncate all operands to it.
|
// operand types. Get the smaller type and truncate all operands to it.
|
||||||
EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
|
for (const SDValue &Op : N->ops()) {
|
||||||
for (unsigned i = 0; i != BuildVecNumElts; ++i)
|
EVT OpSVT = Op.getValueType().getScalarType();
|
||||||
Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
|
MinVT = MinVT.bitsLE(OpSVT) ? MinVT : OpSVT;
|
||||||
N0.getOperand(i)));
|
}
|
||||||
for (unsigned i = 0; i != BuildVecNumElts; ++i)
|
|
||||||
Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
|
for (const SDValue &Op : N->ops()) {
|
||||||
N1.getOperand(i)));
|
EVT OpVT = Op.getValueType();
|
||||||
|
unsigned NumElts = OpVT.getVectorNumElements();
|
||||||
|
|
||||||
|
if (ISD::UNDEF == Op.getOpcode())
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i)
|
||||||
|
Opnds.push_back(DAG.getUNDEF(MinVT));
|
||||||
|
|
||||||
|
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
|
||||||
|
if (SVT.isFloatingPoint()) {
|
||||||
|
assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i)
|
||||||
|
Opnds.push_back(Op.getOperand(i));
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i)
|
||||||
|
Opnds.push_back(
|
||||||
|
DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(VT.getVectorNumElements() == Opnds.size() &&
|
||||||
|
"Concat vector type mismatch");
|
||||||
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
|
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,22 +358,16 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
|
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
|
||||||
; AVX1: # BB#0: # %entry
|
; AVX1: # BB#0: # %entry
|
||||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
|
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
|
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
|
||||||
; AVX2: # BB#0: # %entry
|
; AVX2: # BB#0: # %entry
|
||||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX2-NEXT: # kill
|
||||||
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
|
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
|
||||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
|
||||||
; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
|
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
|
||||||
@ -410,22 +404,17 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
|
|||||||
;
|
;
|
||||||
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
|
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
|
||||||
; AVX1: # BB#0: # %entry
|
; AVX1: # BB#0: # %entry
|
||||||
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
|
||||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
|
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
|
; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
|
||||||
; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
|
||||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
|
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
|
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
|
||||||
; AVX2: # BB#0: # %entry
|
; AVX2: # BB#0: # %entry
|
||||||
; AVX2-NEXT: # kill
|
; AVX2-NEXT: # kill
|
||||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||||
; AVX2-NEXT: xorl %eax, %eax
|
|
||||||
; AVX2-NEXT: vmovd %eax, %xmm1
|
|
||||||
; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
|
|
||||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
|
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
|
||||||
|
Loading…
Reference in New Issue
Block a user