[AArch64]Fix the pattern match failure for v1i8/v1i16/v1i32 types.

Currently we have such types as legal vector types. The DAG combiner may generate some DAG nodes having such types but we don't have patterns to match them.
E.g. a load i32 and a bitcast i32 to v1i32 will be combined into a load v1i32:
     bitcast (load i32) to v1i32 -> load v1i32.
So this patch fixes such problems for load/dup instructions.
If v1i8/v1i16/v1i32 are not legal any more, the code in this patch can be deleted. So I also add some FIXME.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197361 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hao Liu 2013-12-16 02:51:28 +00:00
parent 10f818b5dd
commit 00a5490f87
3 changed files with 61 additions and 1 deletions

View File

@ -4053,8 +4053,12 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// just use DUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the DUP from lane instructions only have
// constant-index forms.
// FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they
// are not legal any more, no need to check the type size in bits should
// be large than 64.
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(Value->getOperand(1))) {
isa<ConstantSDNode>(Value->getOperand(1)) &&
Value->getOperand(0).getValueType().getSizeInBits() >= 64) {
N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
Value->getOperand(0), Value->getOperand(1));
} else

View File

@ -3252,6 +3252,21 @@ def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
(ST1_8B GPR64xsp:$addr, VPR64:$value)>;
// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
// these patterns are not needed any more.
def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
(LSFP8_STR $value, $addr, 0)>;
def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
(LSFP16_STR $value, $addr, 0)>;
def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
(LSFP32_STR $value, $addr, 0)>;
// End of vector load/store multiple N-element structure(class SIMD lselem)
// The followings are post-index vector load/store multiple N-element

View File

@ -662,4 +662,45 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}}
%b = insertelement <2 x i64> undef, i64 %a, i32 0
ret <2 x i64> %b
}
define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
; CHECK-LABEL: testDUP.v1i8:
; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
%b = extractelement <1 x i8> %a, i32 0
%c = insertelement <8 x i8> undef, i8 %b, i32 0
%d = insertelement <8 x i8> %c, i8 %b, i32 1
%e = insertelement <8 x i8> %d, i8 %b, i32 2
%f = insertelement <8 x i8> %e, i8 %b, i32 3
%g = insertelement <8 x i8> %f, i8 %b, i32 4
%h = insertelement <8 x i8> %g, i8 %b, i32 5
%i = insertelement <8 x i8> %h, i8 %b, i32 6
%j = insertelement <8 x i8> %i, i8 %b, i32 7
ret <8 x i8> %j
}
define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
; CHECK-LABEL: testDUP.v1i16:
; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
%b = extractelement <1 x i16> %a, i32 0
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = insertelement <8 x i16> %c, i16 %b, i32 1
%e = insertelement <8 x i16> %d, i16 %b, i32 2
%f = insertelement <8 x i16> %e, i16 %b, i32 3
%g = insertelement <8 x i16> %f, i16 %b, i32 4
%h = insertelement <8 x i16> %g, i16 %b, i32 5
%i = insertelement <8 x i16> %h, i16 %b, i32 6
%j = insertelement <8 x i16> %i, i16 %b, i32 7
ret <8 x i16> %j
}
define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
; CHECK-LABEL: testDUP.v1i32:
; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
%b = extractelement <1 x i32> %a, i32 0
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = insertelement <4 x i32> %c, i32 %b, i32 1
%e = insertelement <4 x i32> %d, i32 %b, i32 2
%f = insertelement <4 x i32> %e, i32 %b, i32 3
ret <4 x i32> %f
}