From 53e4471adcf34cac253d2486e6b29c331e2d973e Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Tue, 4 Aug 2009 20:39:05 +0000 Subject: [PATCH] Add NEON single-precision FP support for fabs and fneg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 8 ++++++++ lib/Target/ARM/ARMInstrNEON.td | 8 ++++++++ lib/Target/ARM/ARMInstrVFP.td | 12 ++++++------ test/CodeGen/ARM/fabss.ll | 13 +++++++++++++ test/CodeGen/ARM/fnegs.ll | 23 +++++++++++++++++++++++ 5 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/ARM/fabss.ll create mode 100644 test/CodeGen/ARM/fnegs.ll diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index de2bb78bb41..ce39a3f7376 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1071,6 +1071,14 @@ class ASuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, let Inst{7-4} = opcod3; } +// Single precision, unary if no NEON +// Same as ASuI except not available if NEON is enabled +class ASuIn opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + string opc, string asm, list pattern> + : ASuI { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + // Single precision, binary class ASbI opcod, dag oops, dag iops, string opc, string asm, list pattern> diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 147490c7843..f36e3269b9b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -246,6 +246,12 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; +// Basic 2-register operations, scalar single-precision +class N2VDInts + : NEONFPPat<(f32 (OpNode SPR:$a)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -1338,6 +1344,7 @@ def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", v2f32, v2f32, int_arm_neon_vabsf>; def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", v4f32, v4f32, int_arm_neon_vabsf>; +def : N2VDInts; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s", @@ -1372,6 +1379,7 @@ def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; +def : N2VDInts; def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 923606d9772..20aff3704da 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -168,9 +168,9 @@ def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), "fabsd", " $dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - "fabss", " $dst, $a", - [(set SPR:$dst, (fabs SPR:$a))]>; +def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + "fabss", " $dst, $a", + [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), @@ -208,9 +208,9 @@ def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fnegd", " $dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - "fnegs", " $dst, $a", - [(set SPR:$dst, (fneg SPR:$a))]>; +def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + "fnegs", " $dst, $a", + [(set SPR:$dst, (fneg SPR:$a))]>; def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), "fsqrtd", " $dst, $a", diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll new file mode 100644 index 00000000000..4b5bd13481d --- /dev/null +++ b/test/CodeGen/ARM/fabss.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %dum = fadd float %a, %b + %0 = tail call float @fabsf(float %dum) + %dum1 = fadd float %0, %b + ret float %dum1 +} + +declare float @fabsf(float) diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll new file mode 100644 index 00000000000..ff171e19299 --- /dev/null +++ b/test/CodeGen/ARM/fnegs.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,+neonfp | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon,-neonfp | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 + +define float @test1(float* %a) { +entry: + %0 = load float* %a, align 4 ; [#uses=2] + %1 = fsub float -0.000000e+00, %0 ; [#uses=2] + %2 = fpext float %1 to double ; [#uses=1] + %3 = fcmp olt double %2, 1.234000e+00 ; [#uses=1] + %retval = select i1 %3, float %1, float %0 ; [#uses=1] + ret float %retval +} + +define float @test2(float* %a) { +entry: + %0 = load float* %a, align 4 ; [#uses=2] + %1 = fmul float -1.000000e+00, %0 ; [#uses=2] + %2 = fpext float %1 to double ; [#uses=1] + %3 = fcmp olt double %2, 1.234000e+00 ; [#uses=1] + %retval = select i1 %3, float %1, float %0 ; [#uses=1] + ret float %retval +}