diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index c4039631c9b..172a5695684 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -196,9 +196,6 @@ def int_aarch64_neon_vaddds : def int_aarch64_neon_vadddu : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -// Scalar Saturating Add (Signed, Unsigned) -def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic; -def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic; // Scalar Sub def int_aarch64_neon_vsubds : @@ -206,9 +203,6 @@ def int_aarch64_neon_vsubds : def int_aarch64_neon_vsubdu : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>; -// Scalar Saturating Sub (Signed, Unsigned) -def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic; -def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic; // Scalar Shift // Scalar Shift Left @@ -324,9 +318,6 @@ def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic; // Signed Saturating Doubling Multiply-Subtract Long def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic; -// Signed Saturating Doubling Multiply Long -def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic; - class Neon_2Arg_ShiftImm_Intrinsic : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index bcd59bd2e72..5b6168eb081 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4958,22 +4958,16 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; // Scalar Integer Saturating Doubling Multiply Half High @@ -5093,7 +5087,7 @@ defm : Neon_Scalar3Diff_ml_HS_size_patterns; -defm : Neon_Scalar3Diff_HS_size_patterns; // Scalar Signed Integer Convert To Floating-point @@ -5564,7 +5558,8 @@ multiclass Neon_ScalarXIndexedElem_FMA_Patterns< OpNImm:$Imm))>; } -// Scalar Floating Point fused multiply-add and multiply-subtract (scalar, by element) +// Scalar Floating Point fused multiply-add and +// multiply-subtract (scalar, by element) defm : Neon_ScalarXIndexedElem_FMA_Patterns; defm : Neon_ScalarXIndexedElem_FMA_Patterns; +// Scalar Signed saturating doubling multiply long (scalar, by element) +def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +multiclass Neon_ScalarXIndexedElem_MUL_Patterns< + SDPatternOperator opnode, + Instruction INST, + ValueType ResTy, RegisterClass FPRC, + ValueType OpVTy, ValueType OpTy, + ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { + + def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), + (OpVTy (scalar_to_vector + (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; + + //swapped operands + def : Pat<(ResTy (opnode + (OpVTy (scalar_to_vector + (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), + (OpVTy FPRC:$Rn))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; +} + + +// Patterns for Scalar Signed saturating doubling +// multiply long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; + // Scalar Signed saturating doubling multiply-add long (scalar, by element) def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { @@ -5629,34 +5688,64 @@ def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", let Inst{20-16} = MRm; } -// Scalar Signed saturating doubling multiply long (scalar, by element) -def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; +multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< + SDPatternOperator opnode, + SDPatternOperator coreopnode, + Instruction INST, + ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, + ValueType OpTy, + ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { + + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode (OpTy FPRC:$Rn), + (OpTy (scalar_to_vector + (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; + + // swapped operands + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode + (OpTy (scalar_to_vector + (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), + (OpTy FPRC:$Rn))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; } +// Patterns for Scalar Signed saturating +// doubling multiply-add long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; + +// Patterns for Scalar Signed saturating +// doubling multiply-sub long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; + + // Scalar Signed saturating doubling multiply returning // high half (scalar, by element) def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", @@ -5686,6 +5775,21 @@ def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", let Inst{20-16} = MRm; } +// Patterns for Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; + // Scalar Signed saturating rounding doubling multiply // returning high half (scalar, by element) def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", @@ -5715,6 +5819,18 @@ def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", let Inst{20-16} = MRm; } +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; // Scalar Copy - DUP element to scalar class NeonI_Scalar_DUP undef, i16 %a, i32 0 %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmull2.i = call <1 x i32> @llvm.aarch64.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i) + %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i) %0 = extractelement <1 x i32> %vqdmull2.i, i32 0 ret i32 %0 } @@ -134,10 +134,10 @@ define i64 @test_vqdmulls_s32(i32 %a, i32 %b) { entry: %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0 %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmull2.i = call <1 x i64> @llvm.aarch64.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i) + %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i) %0 = extractelement <1 x i64> %vqdmull2.i, i32 0 ret i64 %0 } -declare <1 x i32> @llvm.aarch64.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.aarch64.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>) +declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>) diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll index 5f035652588..bd66f80cebb 100644 --- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -1,171 +1,138 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqadd_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqadd d0, d0, d1 - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: uqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqsub_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sqsub d0, d0, d1 - ret <1 x i64> %tmp1 -} - -declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) + %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) ;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) + %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) ;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} ret <1 x i8> %tmp1 } -declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) +declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) + %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) ;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) + %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) ;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} ret <1 x i8> %tmp1 } -declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) + %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) ;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) + %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) ;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} ret <1 x i16> %tmp1 } -declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) + %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) ;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) + %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) ;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} ret <1 x i16> %tmp1 } -declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) + %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) ;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} ret <1 x i32> %tmp1 } define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) + %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) ;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} ret <1 x i32> %tmp1 } -declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) +declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) + %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) ;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} ret <1 x i32> %tmp1 } + define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) + %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) ;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} ret <1 x i32> %tmp1 } -declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) + %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) ;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) + %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) ;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} ret <1 x i64> %tmp1 } -declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) +declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) ;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) + %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) ;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} ret <1 x i64> %tmp1 }