Distribute (A + B) * C to (A * C) + (B * C) to make use of NEON multiplier

accumulator forwarding: vadd d3, d0, d1 vmul d3, d3, d2 => vmul d3, d0, d2 vmla d3, d1, d2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128665 91177308-0d34-0410-b5e6-96231b3b80d8
2025-08-05 13:26:55 +00:00 · 2011-03-31 19:38:48 +00:00
parent a52d7da1d8
commit 463d358f1d
4 changed files with 80 additions and 4 deletions
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -61,6 +61,10 @@ protected:
  /// whether the FP VML[AS] instructions are slow (if so, don't use them).
  bool SlowFPVMLx;

+  /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+  /// forwarding to allow mul + mla being issued back to back.
+  bool HasVMLxForwarding;
+
  /// SlowFPBrcc - True if floating point compare + branch is slow.
  bool SlowFPBrcc;

@@ -182,6 +186,7 @@ protected:
  bool hasT2ExtractPack() const { return HasT2ExtractPack; }
  bool hasDataBarrier() const { return HasDataBarrier; }
  bool useFPVMLx() const { return !SlowFPVMLx; }
+  bool hasVMLxForwarding() const { return HasVMLxForwarding; }
  bool isFPBrccSlow() const { return SlowFPBrcc; }
  bool isFPOnlySP() const { return FPOnlySP; }
  bool prefers32BitThumb() const { return Pref32BitThumb; }