From 555f57f67b4a6dd1738f42cdf8c8499461edec7c Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Fri, 17 Jan 2014 05:44:46 +0000 Subject: [PATCH] [AArch64]Fix the problem can't select concat_vectors of two v1i32 types. Also fix the problem can't select scalar_to_vector from f32 to v2f32/v4f32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199461 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +-- lib/Target/AArch64/AArch64InstrNEON.td | 18 ++++++------ test/CodeGen/AArch64/neon-copy.ll | 32 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 581c8935c1f..00a4ac6a8f9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -324,13 +324,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 81371be0666..251fee2af2f 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -6888,15 +6888,10 @@ def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)), def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)), (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>; -def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), - (v2i32 (EXTRACT_SUBREG (v16i8 - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), - (v2i32 (EXTRACT_SUBREG (v16i8 - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; +def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), + (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; +def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), + (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), (v1f64 FPR64:$Rn)>; @@ -7063,6 +7058,11 @@ defm : Concat_Vector_Pattern; defm : Concat_Vector_Pattern; defm : Concat_Vector_Pattern; +def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))), + (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; +def : Pat<(v2i32 (concat_vectors undef, (v1i32 FPR32:$Rn))), + (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; + //patterns for EXTRACT_SUBVECTOR def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 95ff25099d8..7faf2a2a8cc 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -948,3 +948,35 @@ entry: ret <2 x i32> %vecinit1.i } +define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_undef_v1i32: +; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +define <2 x i32> @test_concat_v1i32_v1i32(<1 x i32> %a) { +; CHECK-LABEL: test_concat_v1i32_v1i32: +; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] +entry: + %0 = extractelement <1 x i32> %a, i32 0 + %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 + %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 + ret <2 x i32> %vecinit1.i +} + +define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<1 x float> %a) { +entry: + %0 = extractelement <1 x float> %a, i32 0 + %vecinit1.i = insertelement <2 x float> undef, float %0, i32 0 + ret <2 x float> %vecinit1.i +} + +define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<1 x float> %a) { +entry: + %0 = extractelement <1 x float> %a, i32 0 + %vecinit1.i = insertelement <4 x float> undef, float %0, i32 0 + ret <4 x float> %vecinit1.i +} \ No newline at end of file