[AArch64] Refactor the Neon vector/scalar floating-point convert implementation.

Specifically, reuse the ARM intrinsics when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196926 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-14 11:32:34 +00:00 · 2013-12-10 15:35:33 +00:00 · 2013-12-10 15:35:33 +00:00 · 72800f3a06
commit 72800f3a06
parent 4b3fcc21ec
4 changed files with 160 additions and 152 deletions
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@ -38,22 +38,6 @@ def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
 def int_aarch64_neon_fcvtxn :
  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtns : 
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtnu :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtps :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtpu :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtms :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtmu :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtas :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
-def int_aarch64_neon_fcvtau :
-  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
 def int_aarch64_neon_fcvtzs :
  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
 def int_aarch64_neon_fcvtzu :
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@ -4982,35 +4982,35 @@ def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
                                                  FCVTXN>;

 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtns,
                                                  FCVTNSss, FCVTNSdd>;

 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtnu,
                                                  FCVTNUss, FCVTNUdd>;

 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtms,
                                                  FCVTMSss, FCVTMSdd>;

 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtmu,
                                                  FCVTMUss, FCVTMUdd>;

 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtas,
                                                  FCVTASss, FCVTASdd>;

 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtau,
                                                  FCVTAUss, FCVTAUdd>;

 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtps,
                                                  FCVTPSss, FCVTPSdd>;

 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtpu,
                                                  FCVTPUss, FCVTPUdd>;

 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
@ -8392,23 +8392,23 @@ multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
 }

 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
-                                     int_aarch64_neon_fcvtns>;
+                                     int_arm_neon_vcvtns>;
 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
-                                     int_aarch64_neon_fcvtnu>;
+                                     int_arm_neon_vcvtnu>;
 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
-                                     int_aarch64_neon_fcvtps>;
+                                     int_arm_neon_vcvtps>;
 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
-                                     int_aarch64_neon_fcvtpu>;
+                                     int_arm_neon_vcvtpu>;
 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
-                                     int_aarch64_neon_fcvtms>;
+                                     int_arm_neon_vcvtms>;
 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
-                                     int_aarch64_neon_fcvtmu>;
+                                     int_arm_neon_vcvtmu>;
 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
-                                     int_aarch64_neon_fcvtas>;
+                                     int_arm_neon_vcvtas>;
 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
-                                     int_aarch64_neon_fcvtau>;
+                                     int_arm_neon_vcvtau>;

 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
                                  bits<5> opcode, SDPatternOperator Neon_Op> {
--- a/test/CodeGen/AArch64/neon-misc.ll
+++ b/test/CodeGen/AArch64/neon-misc.ll
@ -1080,147 +1080,171 @@ define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
  ret <2 x i64> %vcvt.i
 }

-define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtn_s32_f32
 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtns_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtns_f321.i
 }

-define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtnq_s32_f32
 ; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtns_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtns_f321.i
 }

-define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtnq_s64_f64
 ; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtns_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtns_f641.i
 }

-define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtn_u32_f32
 ; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtnu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtnu_f321.i
 }

-define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtnq_u32_f32
 ; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtnu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtnu_f321.i
 }

-define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtnq_u64_f64
 ; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtnu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtnu_f641.i
 }

-define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtp_s32_f32
 ; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtps_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtps_f321.i
 }

-define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtpq_s32_f32
 ; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtps_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtps_f321.i
 }

-define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtpq_s64_f64
 ; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtps_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtps_f641.i
 }

-define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtp_u32_f32
 ; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtpu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtpu_f321.i
 }

-define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtpq_u32_f32
 ; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtpu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtpu_f321.i
 }

-define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtpq_u64_f64
 ; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtpu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtpu_f641.i
 }

-define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtm_s32_f32
 ; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtms_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtms_f321.i
 }

-define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtmq_s32_f32
 ; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtms_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtms_f321.i
 }

-define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtmq_s64_f64
 ; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtms_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtms_f641.i
 }

-define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvtm_u32_f32
 ; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtmu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtmu_f321.i
 }

-define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtmq_u32_f32
 ; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtmu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtmu_f321.i
 }

-define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtmq_u64_f64
 ; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtmu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtmu_f641.i
 }

-define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvta_s32_f32
 ; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtas_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtas_f321.i
 }

-define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtaq_s32_f32
 ; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtas_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtas_f321.i
 }

-define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtaq_s64_f64
 ; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtas_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtas_f641.i
 }

-define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
+define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) {
+; CHECK-LABEL: test_vcvta_u32_f32
 ; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
-  %vcvtau_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #4
+  %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a)
  ret <2 x i32> %vcvtau_f321.i
 }

-define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
+define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vcvtaq_u32_f32
 ; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
-  %vcvtau_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #4
+  %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a)
  ret <4 x i32> %vcvtau_f321.i
 }

-define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 {
+define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) {
+; CHECK-LABEL: test_vcvtaq_u64_f64
 ; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
-  %vcvtau_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #4
+  %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a)
  ret <2 x i64> %vcvtau_f641.i
 }

@ -1348,53 +1372,53 @@ declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2

 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2

-declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>)

-declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) #2
+declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>)

-declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) #2
+declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>)

-declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) #2
+declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>)

 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3

@ -1624,56 +1648,56 @@ define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
 define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtn_s64_f64
 ; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtn_u64_f64
 ; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtp_s64_f64
 ; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtp_u64_f64
 ; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtm_s64_f64
 ; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvtm_u64_f64
 ; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvta_s64_f64
 ; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

 define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vcvta_u64_f64
 ; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
-  %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+  %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a)
  ret <1 x i64> %1
 }

@ -1691,14 +1715,14 @@ define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
  ret <1 x double> %1
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
-declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)

 define <1 x double> @test_vrndn_f64(<1 x double> %a) {
 ; CHECK-LABEL: test_vrndn_f64
--- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll
+++ b/test/CodeGen/AArch64/neon-scalar-fcvt.ll
@ -19,192 +19,192 @@ define i32 @test_vcvtass(float %a) {
 ; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
+  %vcvtas1.i = call <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float>)

 define i64 @test_test_vcvtasd(double %a) {
 ; CHECK: test_test_vcvtasd
 ; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
+  %vcvtas1.i = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtaus(float %a) {
 ; CHECK: test_vcvtaus
 ; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
+  %vcvtau1.i = call <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtaud(double %a) {
 ; CHECK: test_vcvtaud
 ; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
+  %vcvtau1.i = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) 
+declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) 

 define i32 @test_vcvtmss(float %a) {
 ; CHECK: test_vcvtmss
 ; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
+  %vcvtms1.i = call <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtmd_s64_f64(double %a) {
 ; CHECK: test_vcvtmd_s64_f64
 ; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
+  %vcvtms1.i = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtmus(float %a) {
 ; CHECK: test_vcvtmus
 ; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
+  %vcvtmu1.i = call <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtmud(double %a) {
 ; CHECK: test_vcvtmud
 ; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
+  %vcvtmu1.i = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtnss(float %a) {
 ; CHECK: test_vcvtnss
 ; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
+  %vcvtns1.i = call <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtnd_s64_f64(double %a) {
 ; CHECK: test_vcvtnd_s64_f64
 ; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
+  %vcvtns1.i = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtnus(float %a) {
 ; CHECK: test_vcvtnus
 ; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
+  %vcvtnu1.i = call <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtnud(double %a) {
 ; CHECK: test_vcvtnud
 ; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
+  %vcvtnu1.i = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtpss(float %a) {
 ; CHECK: test_vcvtpss
 ; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
+  %vcvtps1.i = call <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtpd_s64_f64(double %a) {
 ; CHECK: test_vcvtpd_s64_f64
 ; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
+  %vcvtps1.i = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtpus(float %a) {
 ; CHECK: test_vcvtpus
 ; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
 entry:
  %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
-  %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
+  %vcvtpu1.i = call <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
  ret i32 %0
 }

-declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>)
+declare <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float>)

 define i64 @test_vcvtpud(double %a) {
 ; CHECK: test_vcvtpud
 ; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
 entry:
  %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
-  %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
+  %vcvtpu1.i = tail call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
  ret i64 %0
 }

-declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)

 define i32 @test_vcvtss(float %a) {
 ; CHECK: test_vcvtss