mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 17:39:16 +00:00
ARM: Improve codegen for generic vselect.
Fall back to by-element insert rather than building it up on the stack. rdar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185846 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
66f464ee26
commit
dc2d418dd2
@ -4734,6 +4734,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
|
||||
}
|
||||
|
||||
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
|
||||
// know the default expansion would otherwise fall back on something even
|
||||
// worse. For a vector with one or two non-undef values, that's
|
||||
// scalar_to_vector for the elements followed by a shuffle (provided the
|
||||
// shuffle is valid for the target) and materialization element by element
|
||||
// on the stack followed by a load for everything else.
|
||||
if (!isConstant && !usesOnlyOneValue) {
|
||||
SDValue Vec = DAG.getUNDEF(VT);
|
||||
for (unsigned i = 0 ; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
|
||||
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
|
||||
}
|
||||
return Vec;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -136,20 +136,26 @@ define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
|
||||
; We should ignore a build_vector with more than two sources.
|
||||
; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
|
||||
; Try to look for fallback to stack expansion.
|
||||
; Try to look for fallback to by-element inserts.
|
||||
define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
|
||||
;CHECK: test_multisource:
|
||||
;CHECK: vst1.16
|
||||
;CHECK: vmov.16 [[REG:d[0-9]+]][0]
|
||||
;CHECK: vmov.16 [[REG]][1]
|
||||
;CHECK: vmov.16 [[REG]][2]
|
||||
;CHECK: vmov.16 [[REG]][3]
|
||||
%tmp1 = load <32 x i16>* %B
|
||||
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
; We don't handle shuffles using more than half of a 128-bit vector.
|
||||
; Again, test for fallback to stack expansion
|
||||
; Again, test for fallback to by-element inserts.
|
||||
define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
|
||||
;CHECK: test_largespan:
|
||||
;CHECK: vst1.16
|
||||
;CHECK: vmov.16 [[REG:d[0-9]+]][0]
|
||||
;CHECK: vmov.16 [[REG]][1]
|
||||
;CHECK: vmov.16 [[REG]][2]
|
||||
;CHECK: vmov.16 [[REG]][3]
|
||||
%tmp1 = load <8 x i16>* %B
|
||||
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
ret <4 x i16> %tmp2
|
||||
@ -160,7 +166,14 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
|
||||
; lowering loop can result otherwise).
|
||||
define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
;CHECK: test_illegal:
|
||||
;CHECK: vst1.16
|
||||
;CHECK: vmov.16 [[REG:d[0-9]+]][0]
|
||||
;CHECK: vmov.16 [[REG]][1]
|
||||
;CHECK: vmov.16 [[REG]][2]
|
||||
;CHECK: vmov.16 [[REG]][3]
|
||||
;CHECK: vmov.16 [[REG2:d[0-9]+]][0]
|
||||
;CHECK: vmov.16 [[REG2]][1]
|
||||
;CHECK: vmov.16 [[REG2]][2]
|
||||
;CHECK: vmov.16 [[REG2]][3]
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = load <8 x i16>* %B
|
||||
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
|
||||
|
@ -1,3 +1,4 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
|
||||
; Make sure that ARM backend with NEON handles vselect.
|
||||
|
||||
@ -20,11 +21,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
|
||||
%v0 = load %T0_10* %loadaddr
|
||||
%v1 = load %T0_10* %loadaddr2
|
||||
%c = icmp slt %T0_10 %v0, %v1
|
||||
; CHECK: vst1
|
||||
; CHECK: vst1
|
||||
; CHECK: vst1
|
||||
; CHECK: vst1
|
||||
; CHECK: vld
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; COST: func_blend10
|
||||
; COST: cost of 40 {{.*}} select
|
||||
%r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
|
||||
@ -39,10 +37,8 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
|
||||
%v0 = load %T0_14* %loadaddr
|
||||
%v1 = load %T0_14* %loadaddr2
|
||||
%c = icmp slt %T0_14 %v0, %v1
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; COST: func_blend14
|
||||
; COST: cost of 41 {{.*}} select
|
||||
%r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
|
||||
@ -54,13 +50,11 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
|
||||
; CHECK: func_blend15:
|
||||
define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
|
||||
%T1_15* %blend, %T0_15* %storeaddr) {
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
%v0 = load %T0_15* %loadaddr
|
||||
%v1 = load %T0_15* %loadaddr2
|
||||
%c = icmp slt %T0_15 %v0, %v1
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; COST: func_blend15
|
||||
; COST: cost of 82 {{.*}} select
|
||||
%r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
|
||||
@ -72,13 +66,11 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
|
||||
; CHECK: func_blend18:
|
||||
define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
|
||||
%T1_18* %blend, %T0_18* %storeaddr) {
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
%v0 = load %T0_18* %loadaddr
|
||||
%v1 = load %T0_18* %loadaddr2
|
||||
%c = icmp slt %T0_18 %v0, %v1
|
||||
; CHECK: strh
|
||||
; CHECK: strh
|
||||
; CHECK: strh
|
||||
; CHECK: strh
|
||||
; COST: func_blend18
|
||||
; COST: cost of 19 {{.*}} select
|
||||
%r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
|
||||
@ -90,13 +82,13 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
|
||||
; CHECK: func_blend19:
|
||||
define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
|
||||
%T1_19* %blend, %T0_19* %storeaddr) {
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
%v0 = load %T0_19* %loadaddr
|
||||
%v1 = load %T0_19* %loadaddr2
|
||||
%c = icmp slt %T0_19 %v0, %v1
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; COST: func_blend19
|
||||
; COST: cost of 50 {{.*}} select
|
||||
%r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
|
||||
@ -108,13 +100,17 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
|
||||
; CHECK: func_blend20:
|
||||
define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
|
||||
%T1_20* %blend, %T0_20* %storeaddr) {
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
; CHECK: vbsl
|
||||
%v0 = load %T0_20* %loadaddr
|
||||
%v1 = load %T0_20* %loadaddr2
|
||||
%c = icmp slt %T0_20 %v0, %v1
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; CHECK: strb
|
||||
; COST: func_blend20
|
||||
; COST: cost of 100 {{.*}} select
|
||||
%r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
|
||||
|
Loading…
x
Reference in New Issue
Block a user