diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 3c446d5860d..badd9e0f402 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -7074,10 +7074,18 @@ defm : Concat_Vector_Pattern; defm : Concat_Vector_Pattern; defm : Concat_Vector_Pattern; +def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)), + (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>; +def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (EXTRACT_SUBREG + (v4i32 (INSELs + (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)), + (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), + (i64 1), + (i64 0))), + sub_64)>; def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))), (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; -def : Pat<(v2i32 (concat_vectors undef, (v1i32 FPR32:$Rn))), - (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; //patterns for EXTRACT_SUBVECTOR def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 2a8d97414b8..bda56564449 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -948,25 +948,6 @@ entry: ret <2 x i32> %vecinit1.i } -define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_undef_v1i32: -; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <2 x i32> @test_concat_v1i32_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_v1i32_v1i32: -; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: @@ -1002,6 +983,52 @@ entry: ret <16 x i8> %vecinit30 } +define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_undef_v1i32: +; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4 + +define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) { +; CHECK-LABEL: test_concat_v1i32_undef: +; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: ret +entry: + %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) + %0 = extractelement <1 x i32> %b, i32 0 + %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0 + ret <2 x i32> %vecinit.i432 +} + +define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_same_v1i32_v1i32: +; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: test_concat_diff_v1i32_v1i32: +; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}} +; CHECK-NEXT: ins v0.s[1], v1.s[0] +entry: + %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) + %d = extractelement <1 x i32> %c, i32 0 + %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b) + %f = extractelement <1 x i32> %e, i32 0 + %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> + ret <2 x i32> %h +} + define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]