diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 626a912a67b..3266fc2dcc3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4053,8 +4053,12 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // just use DUPLANE. We can only do this if the lane being extracted // is at a constant index, as the DUP from lane instructions only have // constant-index forms. + // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they + // are not legal any more, no need to check the type size in bits should + // be large than 64. if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Value->getOperand(1))) { + isa(Value->getOperand(1)) && + Value->getOperand(0).getValueType().getSizeInBits() >= 64) { N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Value->getOperand(0), Value->getOperand(1)); } else diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index e9b1298a24c..dd87b92df25 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3252,6 +3252,21 @@ def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), (ST1_8B GPR64xsp:$addr, VPR64:$value)>; +// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. +// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, +// these patterns are not needed any more. +def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>; +def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>; +def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>; + +def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr), + (LSFP8_STR $value, $addr, 0)>; +def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr), + (LSFP16_STR $value, $addr, 0)>; +def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr), + (LSFP32_STR $value, $addr, 0)>; + + // End of vector load/store multiple N-element structure(class SIMD lselem) // The followings are post-index vector load/store multiple N-element diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 016ccb98575..c783c00c714 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -662,4 +662,45 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { ; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} %b = insertelement <2 x i64> undef, i64 %a, i32 0 ret <2 x i64> %b +} + +define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { +; CHECK-LABEL: testDUP.v1i8: +; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} + %b = extractelement <1 x i8> %a, i32 0 + %c = insertelement <8 x i8> undef, i8 %b, i32 0 + %d = insertelement <8 x i8> %c, i8 %b, i32 1 + %e = insertelement <8 x i8> %d, i8 %b, i32 2 + %f = insertelement <8 x i8> %e, i8 %b, i32 3 + %g = insertelement <8 x i8> %f, i8 %b, i32 4 + %h = insertelement <8 x i8> %g, i8 %b, i32 5 + %i = insertelement <8 x i8> %h, i8 %b, i32 6 + %j = insertelement <8 x i8> %i, i8 %b, i32 7 + ret <8 x i8> %j +} + +define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { +; CHECK-LABEL: testDUP.v1i16: +; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} + %b = extractelement <1 x i16> %a, i32 0 + %c = insertelement <8 x i16> undef, i16 %b, i32 0 + %d = insertelement <8 x i16> %c, i16 %b, i32 1 + %e = insertelement <8 x i16> %d, i16 %b, i32 2 + %f = insertelement <8 x i16> %e, i16 %b, i32 3 + %g = insertelement <8 x i16> %f, i16 %b, i32 4 + %h = insertelement <8 x i16> %g, i16 %b, i32 5 + %i = insertelement <8 x i16> %h, i16 %b, i32 6 + %j = insertelement <8 x i16> %i, i16 %b, i32 7 + ret <8 x i16> %j +} + +define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { +; CHECK-LABEL: testDUP.v1i32: +; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} + %b = extractelement <1 x i32> %a, i32 0 + %c = insertelement <4 x i32> undef, i32 %b, i32 0 + %d = insertelement <4 x i32> %c, i32 %b, i32 1 + %e = insertelement <4 x i32> %d, i32 %b, i32 2 + %f = insertelement <4 x i32> %e, i32 %b, i32 3 + ret <4 x i32> %f } \ No newline at end of file