diff --git a/include/llvm/IR/IntrinsicsARM64.td b/include/llvm/IR/IntrinsicsARM64.td index 56f1b1de3d2..b280d005d0c 100644 --- a/include/llvm/IR/IntrinsicsARM64.td +++ b/include/llvm/IR/IntrinsicsARM64.td @@ -268,6 +268,9 @@ let Properties = [IntrNoMem] in { def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic; def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic; + // Reciprocal Exponent + def int_arm64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic; + // Vector Saturating Shift Left def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic; def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic; @@ -339,11 +342,11 @@ let Properties = [IntrNoMem] in { // Vector Reciprocal Estimate def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic; - def int_arm64_neon_frecpe : AdvSIMD_1VectorArg_Intrinsic; + def int_arm64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic; // Vector Square Root Estimate def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic; - def int_arm64_neon_frsqrte : AdvSIMD_1VectorArg_Intrinsic; + def int_arm64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic; // Vector Bitwise Reverse def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic; diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 73fc046977c..4804f9af638 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -2575,8 +2575,23 @@ def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))), (FCVTPSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))), (FCVTPUv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))), (FRECPEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))), + (FRECPXv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))), + (FRECPXv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))), (FRSQRTEv1i64 FPR64:$Rn)>; diff --git a/test/CodeGen/ARM64/vsqrt.ll b/test/CodeGen/ARM64/vsqrt.ll index f4f56f4b30d..094d7042a4d 100644 --- a/test/CodeGen/ARM64/vsqrt.ll +++ b/test/CodeGen/ARM64/vsqrt.ll @@ -87,9 +87,46 @@ define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind { ret <2 x double> %tmp3 } +define float @frecpe_s(float* %A) nounwind { +;CHECK-LABEL: frecpe_s: +;CHECK: frecpe s0, {{s[0-9]+}} + %tmp1 = load float* %A + %tmp3 = call float @llvm.arm64.neon.frecpe.f32(float %tmp1) + ret float %tmp3 +} + +define double @frecpe_d(double* %A) nounwind { +;CHECK-LABEL: frecpe_d: +;CHECK: frecpe d0, {{d[0-9]+}} + %tmp1 = load double* %A + %tmp3 = call double @llvm.arm64.neon.frecpe.f64(double %tmp1) + ret double %tmp3 +} + declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>) nounwind readnone declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>) nounwind readnone declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>) nounwind readnone +declare float @llvm.arm64.neon.frecpe.f32(float) nounwind readnone +declare double @llvm.arm64.neon.frecpe.f64(double) nounwind readnone + +define float @frecpx_s(float* %A) nounwind { +;CHECK-LABEL: frecpx_s: +;CHECK: frecpx s0, {{s[0-9]+}} + %tmp1 = load float* %A + %tmp3 = call float @llvm.arm64.neon.frecpx.f32(float %tmp1) + ret float %tmp3 +} + +define double @frecpx_d(double* %A) nounwind { +;CHECK-LABEL: frecpx_d: +;CHECK: frecpx d0, {{d[0-9]+}} + %tmp1 = load double* %A + %tmp3 = call double @llvm.arm64.neon.frecpx.f64(double %tmp1) + ret double %tmp3 +} + +declare float @llvm.arm64.neon.frecpx.f32(float) nounwind readnone +declare double @llvm.arm64.neon.frecpx.f64(double) nounwind readnone define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind { ;CHECK-LABEL: frsqrte_2s: @@ -115,9 +152,27 @@ define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind { ret <2 x double> %tmp3 } +define float @frsqrte_s(float* %A) nounwind { +;CHECK-LABEL: frsqrte_s: +;CHECK: frsqrte s0, {{s[0-9]+}} + %tmp1 = load float* %A + %tmp3 = call float @llvm.arm64.neon.frsqrte.f32(float %tmp1) + ret float %tmp3 +} + +define double @frsqrte_d(double* %A) nounwind { +;CHECK-LABEL: frsqrte_d: +;CHECK: frsqrte d0, {{d[0-9]+}} + %tmp1 = load double* %A + %tmp3 = call double @llvm.arm64.neon.frsqrte.f64(double %tmp1) + ret double %tmp3 +} + declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone +declare float @llvm.arm64.neon.frsqrte.f32(float) nounwind readnone +declare double @llvm.arm64.neon.frsqrte.f64(double) nounwind readnone define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind { ;CHECK-LABEL: urecpe_2s: