mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 00:11:00 +00:00
[CodeGen] Re-apply r234809 (concat of scalars), with an x86_mmx fix.
The only type that isn't an integer, isn't floating point, and isn't a vector; ladies and gentlemen, the gift that keeps on giving: x86_mmx! Fixes PR23246. Original message (reverted in r235062): [CodeGen] Combine concat_vectors of scalars into build_vector. Combine something like: (v8i8 concat_vectors (v2i8 bitcast (i16)) x4) into: (v8i8 (bitcast (v4i16 BUILD_VECTOR (i16) x4))) If any of the scalars are floating point, use that throughout. Differential Revision: http://reviews.llvm.org/D8948 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235072 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ce6a39f31f
commit
363a2799ff
@ -11499,6 +11499,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
EVT OpVT = N->getOperand(0).getValueType();
|
||||||
|
|
||||||
|
// If the operands are legal vectors, leave them alone.
|
||||||
|
if (TLI.isTypeLegal(OpVT))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDLoc DL(N);
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
|
||||||
|
EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
|
||||||
|
SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
|
||||||
|
|
||||||
|
// Keep track of what we encounter.
|
||||||
|
bool AnyInteger = false;
|
||||||
|
bool AnyFP = false;
|
||||||
|
for (const SDValue &Op : N->ops()) {
|
||||||
|
if (ISD::BITCAST == Op.getOpcode() &&
|
||||||
|
!Op.getOperand(0).getValueType().isVector())
|
||||||
|
Ops.push_back(Op.getOperand(0));
|
||||||
|
else if (ISD::UNDEF == Op.getOpcode())
|
||||||
|
Ops.push_back(ScalarUndef);
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Note whether we encounter an integer or floating point scalar.
|
||||||
|
// If it's neither, bail out, it could be something weird like x86mmx.
|
||||||
|
EVT LastOpVT = Ops.back().getValueType();
|
||||||
|
if (LastOpVT.isFloatingPoint())
|
||||||
|
AnyFP = true;
|
||||||
|
else if (LastOpVT.isInteger())
|
||||||
|
AnyInteger = true;
|
||||||
|
else
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If any of the operands is a floating point scalar bitcast to a vector,
|
||||||
|
// use floating point types throughout, and bitcast everything.
|
||||||
|
// Replace UNDEFs by another scalar UNDEF node, of the final desired type.
|
||||||
|
if (AnyFP) {
|
||||||
|
SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
|
||||||
|
ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
|
||||||
|
if (AnyInteger) {
|
||||||
|
for (SDValue &Op : Ops) {
|
||||||
|
if (Op.getValueType() == SVT)
|
||||||
|
continue;
|
||||||
|
if (Op.getOpcode() == ISD::UNDEF)
|
||||||
|
Op = ScalarUndef;
|
||||||
|
else
|
||||||
|
Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
|
||||||
|
VT.getSizeInBits() / SVT.getSizeInBits());
|
||||||
|
return DAG.getNode(ISD::BITCAST, DL, VT,
|
||||||
|
DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
||||||
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
|
// TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
|
||||||
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
|
// EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
|
||||||
@ -11601,6 +11663,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
|||||||
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
|
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
|
||||||
|
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
|
||||||
|
return V;
|
||||||
|
|
||||||
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
|
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
|
||||||
// nodes often generate nop CONCAT_VECTOR nodes.
|
// nodes often generate nop CONCAT_VECTOR nodes.
|
||||||
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
|
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
|
||||||
|
125
test/CodeGen/AArch64/concat_vector-scalar-combine.ll
Normal file
125
test/CodeGen/AArch64/concat_vector-scalar-combine.ll
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||||
|
|
||||||
|
; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
|
||||||
|
|
||||||
|
define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
|
||||||
|
; CHECK-NEXT: dup.4h v0, w0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%t = trunc i32 %x to i16
|
||||||
|
%0 = bitcast i16 %t to <2 x i8>
|
||||||
|
%1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
|
||||||
|
ret <8 x i8> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
|
||||||
|
; CHECK-NEXT: dup.2s v0, w0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%0 = bitcast i32 %x to <4 x i8>
|
||||||
|
%1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||||
|
ret <8 x i8> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
|
||||||
|
; CHECK-NEXT: dup.4s v0, w0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%0 = bitcast i32 %x to <2 x i16>
|
||||||
|
%1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
|
||||||
|
; CHECK-NEXT: ins.h v0[0], w0
|
||||||
|
; CHECK-NEXT: ins.h v0[1], w1
|
||||||
|
; CHECK-NEXT: ins.h v0[3], w1
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%tx = trunc i32 %x to i16
|
||||||
|
%ty = trunc i32 %y to i16
|
||||||
|
%bx = bitcast i16 %tx to <2 x i8>
|
||||||
|
%by = bitcast i16 %ty to <2 x i8>
|
||||||
|
%r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
|
||||||
|
ret <8 x i8> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
|
||||||
|
; CHECK-NEXT: fmov s0, w1
|
||||||
|
; CHECK-NEXT: ins.s v0[1], w0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%bx = bitcast i32 %x to <4 x i8>
|
||||||
|
%by = bitcast i32 %y to <4 x i8>
|
||||||
|
%r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
|
||||||
|
ret <8 x i8> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
|
||||||
|
; CHECK-NEXT: fmov s0, w0
|
||||||
|
; CHECK-NEXT: ins.s v0[1], w1
|
||||||
|
; CHECK-NEXT: ins.s v0[2], w1
|
||||||
|
; CHECK-NEXT: ins.s v0[3], w0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%bx = bitcast i32 %x to <2 x i16>
|
||||||
|
%by = bitcast i32 %y to <2 x i16>
|
||||||
|
%r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
|
||||||
|
ret <8 x i16> %r
|
||||||
|
}
|
||||||
|
|
||||||
|
; Also make sure we minimize bitcasts.
|
||||||
|
|
||||||
|
; This is a pretty artificial testcase: make sure we bitcast to floating-point
|
||||||
|
; if any of the scalars is floating-point.
|
||||||
|
define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
|
||||||
|
; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
|
||||||
|
; CHECK-NEXT: ins.h v0[0], v[[X]][0]
|
||||||
|
; CHECK-NEXT: ins.h v0[1], v1[0]
|
||||||
|
; CHECK-NEXT: ins.h v0[2], v[[X]][0]
|
||||||
|
; CHECK-NEXT: ins.h v0[3], v1[0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%t = trunc i32 %x to i16
|
||||||
|
%0 = bitcast i16 %t to <2 x i8>
|
||||||
|
%y0 = bitcast half %y to <2 x i8>
|
||||||
|
%1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||||
|
ret <8 x i8> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
|
||||||
|
; CHECK-NEXT: ins.h v0[0], v1[0]
|
||||||
|
; CHECK-NEXT: ins.h v0[1], v2[0]
|
||||||
|
; CHECK-NEXT: ins.h v0[2], v1[0]
|
||||||
|
; CHECK-NEXT: ins.h v0[3], v2[0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%0 = bitcast half %x to <2 x i8>
|
||||||
|
%y0 = bitcast half %y to <2 x i8>
|
||||||
|
%1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||||
|
%2 = bitcast <8 x i8> %1 to <2 x float>
|
||||||
|
ret <2 x float> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
|
||||||
|
; CHECK-NEXT: dup.4s v0, v0[0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%0 = bitcast float %x to <2 x i16>
|
||||||
|
%1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
|
||||||
|
%2 = bitcast <8 x i16> %1 to <4 x float>
|
||||||
|
ret <4 x float> %2
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
19
test/CodeGen/X86/pr23246.ll
Normal file
19
test/CodeGen/X86/pr23246.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llc < %s -mtriple x86_64-unknown-unknown | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
|
; PR23246
|
||||||
|
; We're really only interested in doing something sane with the shuffle.
|
||||||
|
|
||||||
|
; CHECK-LABEL: test:
|
||||||
|
; CHECK: movq2dq %mm0, %xmm0
|
||||||
|
; CHECK-NEXT: pshufd {{.*}} xmm0 = xmm0[0,1,0,1]
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
define <2 x i64> @test(x86_mmx %a) #0 {
|
||||||
|
entry:
|
||||||
|
%b = bitcast x86_mmx %a to <1 x i64>
|
||||||
|
%s = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
|
||||||
|
ret <2 x i64> %s
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user