From 7a1abfba24fe02ca4a1795b7456ca2c87cf1a4e3 Mon Sep 17 00:00:00 2001
From: Karol Stasiak <karol.m.stasiak@gmail.com>
Date: Sun, 30 Dec 2018 00:52:40 +0100
Subject: [PATCH] Optimization improvements

---
 CHANGELOG.md                                  |  2 ++
 .../mos/opt/AlwaysGoodOptimizations.scala     | 16 +++++++--------
 .../assembly/mos/opt/LaterOptimizations.scala |  4 ++--
 .../assembly/mos/opt/LoopUnrolling.scala      |  2 +-
 .../opt/RuleBasedAssemblyOptimization.scala   | 20 ++++++++++++++++++-
 .../z80/opt/AlwaysGoodI80Optimizations.scala  | 12 ++++++++++-
 .../opt/RuleBasedAssemblyOptimization.scala   | 18 +++++++++++++++++
 7 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f1397a1c..721bb222 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,8 @@
 
 * Fixed `#pragma` not respecting `#if`
 
+* Optimization improvements.
+
 ## 0.3.2
 
 * Almost complete support for the Zilog Z80, Intel 8080 and Sharp LR35902 microprocessors.
diff --git a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala
index 4643a6bf..e0f0d778 100644
--- a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala
+++ b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala
@@ -505,7 +505,7 @@ object AlwaysGoodOptimizations {
 
   private def operationPairBuilder3(op1: Opcode.Value, op1extra: AssemblyLinePattern, op2: Opcode.Value, middle: AssemblyLinePattern, discardToRemove: Option[Opcode.Value]) = {
     (HasOpcode(op1) & Elidable & op1extra) ~
-      middle.*.capture(1) ~
+      (middle & IsNotALabelUsedManyTimes).*.capture(1) ~
       Where(_.isExternallyLinearBlock(1)) ~
       (HasOpcode(op2) & Elidable) ~~> { (_, ctx) =>
       ctx.get[List[AssemblyLine]](1).filter(l => !discardToRemove.contains(l.opcode))
@@ -514,7 +514,7 @@ object AlwaysGoodOptimizations {
 
   private def operationPairBuilder4(op1: Opcode.Value, op1extra: AssemblyLinePattern, middle: AssemblyLinePattern, op2: Opcode.Value, op2extra: AssemblyLinePattern) = {
     (HasOpcode(op1) & op1extra  & Elidable & HasAddrModeIn(Absolute, ZeroPage, LongAbsolute) & MatchParameter(3)) ~
-      middle.*.capture(1) ~
+      (middle & IsNotALabelUsedManyTimes).*.capture(1) ~
       Where(_.isExternallyLinearBlock(1)) ~
       (HasOpcode(op2) & op2extra & Elidable & HasAddrModeIn(Absolute, ZeroPage, LongAbsolute) & MatchParameter(3)) ~~> { (_, ctx) =>
       ctx.get[List[AssemblyLine]](1)
@@ -640,7 +640,7 @@ object AlwaysGoodOptimizations {
     },
     (Elidable & HasOpcode(LDA) & MatchAddrMode(0) & MatchParameter(1)) ~
       (Elidable & HasOpcode(PHA)) ~
-      (Not(ConcernsStack) & DoesntChangeIndexingInAddrMode(0) & DoesntChangeMemoryAt(0, 1)).*.capture(2) ~
+      (IsNotALabelUsedManyTimes & Not(ConcernsStack) & DoesntChangeIndexingInAddrMode(0) & DoesntChangeMemoryAt(0, 1)).*.capture(2) ~
       Where(ctx => ctx.isExternallyLinearBlock(2))~
       (Elidable & HasOpcode(PLA)) ~~> { code =>
       code.head :: (code.drop(2).init :+ code.head)
@@ -2161,13 +2161,13 @@ object AlwaysGoodOptimizations {
       code.tail.init :+ code.head
     },
     (Elidable & HasOpcodeIn(DEX, INX) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~
-      (Not(ConcernsX)).*.capture(1) ~
+      (IsNotALabelUsedManyTimes & Not(ConcernsX)).*.capture(1) ~
       Where(ctx => ctx.isExternallyLinearBlock(1)) ~
       (Elidable & (HasOpcode(TXA) & DoesntMatterWhatItDoesWith(State.A) | HasOpcode(CPX) & HasImmediate(0) & DoesntMatterWhatItDoesWith(State.C, State.V))) ~~> { code =>
       code.tail.init :+ code.head
     },
     (Elidable & HasOpcodeIn(DEY, INY) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~
-      (Not(ConcernsY)).*.capture(1) ~
+      (IsNotALabelUsedManyTimes & Not(ConcernsY)).*.capture(1) ~
       Where(ctx => ctx.isExternallyLinearBlock(1)) ~
       (Elidable & (HasOpcode(TYA) & DoesntMatterWhatItDoesWith(State.A) | HasOpcode(CPY) & HasImmediate(0) & DoesntMatterWhatItDoesWith(State.C, State.V))) ~~> { code =>
       code.tail.init :+ code.head
@@ -2307,9 +2307,9 @@ object AlwaysGoodOptimizations {
   val PointlessSignCheck: RuleBasedAssemblyOptimization = {
     def loadOldSignedVariable: AssemblyPattern = (
       (HasOpcodeIn(AND, ANC) & HasImmediateWhere(i => (i & 0x80) == 0)) ~
-      (HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchAddrMode(0) & MatchParameter(1)) ~
-      DoesNotConcernMemoryAt(0, 1).* ~
-      (HasOpcode(LDA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(1))
+        (HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchAddrMode(0) & MatchParameter(1)) ~
+        (IsNotALabelUsedManyTimes & DoesNotConcernMemoryAt(0, 1)).* ~
+        (HasOpcode(LDA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(1))
       ).capture(10) ~ Where(_.isExternallyLinearBlock(10))
 
     val isNonnegative: Int => Boolean = i => (i & 0x80) == 0
diff --git a/src/main/scala/millfork/assembly/mos/opt/LaterOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/LaterOptimizations.scala
index a0b9f236..be47e061 100644
--- a/src/main/scala/millfork/assembly/mos/opt/LaterOptimizations.scala
+++ b/src/main/scala/millfork/assembly/mos/opt/LaterOptimizations.scala
@@ -241,7 +241,7 @@ object LaterOptimizations {
   val UseXInsteadOfStack = new RuleBasedAssemblyOptimization("Using X instead of stack",
     needsFlowInfo = FlowInfoRequirement.BackwardFlow,
     (Elidable & HasOpcode(PHA) & DoesntMatterWhatItDoesWith(State.X)) ~
-      (Not(ConcernsStack) & Not(ConcernsX)).capture(1) ~
+      (IsNotALabelUsedManyTimes & Not(ConcernsStack) & Not(ConcernsX)).capture(1) ~
       Where(_.isExternallyLinearBlock(1)) ~
       (Elidable & HasOpcode(PLA)) ~~> (c =>
       AssemblyLine.implied(TAX) :: (c.tail.init :+ AssemblyLine.implied(TXA))
@@ -251,7 +251,7 @@ object LaterOptimizations {
   val UseYInsteadOfStack = new RuleBasedAssemblyOptimization("Using Y instead of stack",
     needsFlowInfo = FlowInfoRequirement.BackwardFlow,
     (Elidable & HasOpcode(PHA) & DoesntMatterWhatItDoesWith(State.Y)) ~
-      (Not(ConcernsStack) & Not(ConcernsY)).capture(1) ~
+      (IsNotALabelUsedManyTimes & Not(ConcernsStack) & Not(ConcernsY)).capture(1) ~
       Where(_.isExternallyLinearBlock(1)) ~
       (Elidable & HasOpcode(PLA)) ~~> (c =>
       AssemblyLine.implied(TAY) :: (c.tail.init :+ AssemblyLine.implied(TYA))
diff --git a/src/main/scala/millfork/assembly/mos/opt/LoopUnrolling.scala b/src/main/scala/millfork/assembly/mos/opt/LoopUnrolling.scala
index 6c4d206d..0bc2e485 100644
--- a/src/main/scala/millfork/assembly/mos/opt/LoopUnrolling.scala
+++ b/src/main/scala/millfork/assembly/mos/opt/LoopUnrolling.scala
@@ -87,7 +87,7 @@ object LoopUnrolling {
     (Elidable & HasOpcode(LDX) & MatchNumericImmediate(Start) & Not(HasImmediate(0))).capture(Initialization) ~
       (Elidable & HasOpcode(BEQ) & MatchParameter(Skip)) ~
       (Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
-      ((Elidable & Not(HasOpcodeIn(RTS, JSR, RTI, RTL)) & Not(ChangesX)).*.capture(Body) ~
+      ((IsNotALabelUsedManyTimes & Elidable & Not(HasOpcodeIn(RTS, JSR, RTI, RTL)) & Not(ChangesX)).*.capture(Body) ~
         (Elidable & HasOpcodeIn(DEX, INX)).capture(Step)
         ).capture(BodyWithStep) ~
       (Elidable & HasOpcode(CPX) & MatchNumericImmediate(End)).? ~
diff --git a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala
index a2e5f7e2..e444c25b 100644
--- a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala
+++ b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala
@@ -28,6 +28,11 @@ object FlowInfoRequirement extends Enumeration {
     case BothFlows | BackwardFlow => ()
     case NoRequirement | JustLabels | ForwardFlow => FatalErrorReporting.reportFlyingPig("Backward flow info required")
   }
+
+  def assertLabels(x: FlowInfoRequirement.Value): Unit = x match {
+    case NoRequirement => FatalErrorReporting.reportFlyingPig("Backward flow info required")
+    case _ => ()
+  }
 }
 
 trait AssemblyRuleSet{
@@ -1365,4 +1370,17 @@ case object IsZeroPage extends AssemblyLinePattern {
       case _ => false
     }
   }
-}
\ No newline at end of file
+}
+
+case object IsNotALabelUsedManyTimes extends AssemblyLinePattern {
+
+  override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit = FlowInfoRequirement.assertLabels(needsFlowInfo)
+
+  override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean = line.opcode match {
+    case Opcode.LABEL => line.parameter match {
+      case MemoryAddressConstant(Label(l)) => flowInfo.labelUseCount(l) <= 1
+      case _ => false
+    }
+    case _ => true
+  }
+}
diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala
index e2ce4b46..048c8b2d 100644
--- a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala
+++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala
@@ -1,7 +1,7 @@
 package millfork.assembly.z80.opt
 
 import millfork.assembly.AssemblyOptimization
-import millfork.assembly.z80._
+import millfork.assembly.z80.{opt, _}
 import millfork.assembly.z80.ZOpcode._
 import millfork.env.{CompoundConstant, Constant, MathOperator, NumericConstant}
 import millfork.node.ZRegister
@@ -821,6 +821,14 @@ object AlwaysGoodI80Optimizations {
       List(ZLine.ldImm16(ZRegister.BC, (ctx.get[Constant](0) + ctx.get[Constant](1).asl(8)).quickSimplify))
     },
 
+    (Elidable & Is8BitLoad(A, MEM_ABS_8) & MatchParameter(1)) ~
+      (Not(Concerns(ZRegister.HL)) & IsNotALabelUsedManyTimes).*.capture(5) ~
+      Where(ctx => ctx.isExternallyLinearBlock(5)) ~
+      (Elidable & HasOpcode(LD_16) & opt.HasRegisters(TwoRegisters(HL, IMM_16)) & MatchParameter(1)) ~~> (code =>
+      code.last ::
+        code.head.copy(registers = TwoRegisters(A, MEM_HL), parameter = Constant.Zero) ::
+        code.tail.init),
+
     // TODO: this is a bit controversial
     // 41 cycles 6 bytes → 24 cycles 8 bytes
     MultipleAssemblyRules(Seq(BC, DE).map{ reg =>
@@ -1263,6 +1271,8 @@ object AlwaysGoodI80Optimizations {
     (Elidable & HasOpcode(SCF) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil),
     (Elidable & HasOpcode(CCF) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil),
     (Elidable & HasOpcodeIn(Set(OR, AND)) & HasRegisterParam(ZRegister.A) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil),
+    HasOpcodeIn(Set(OR, AND, XOR)) ~
+      (Elidable & HasOpcodeIn(Set(OR, AND)) & HasRegisterParam(ZRegister.A)) ~~> (_.init),
   )
 
   val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]](
diff --git a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala
index 3a43b45b..527f8824 100644
--- a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala
+++ b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala
@@ -27,6 +27,11 @@ object FlowInfoRequirement extends Enumeration {
     case BothFlows | BackwardFlow => ()
     case NoRequirement | JustLabels | ForwardFlow => FatalErrorReporting.reportFlyingPig("Backward flow info required")
   }
+
+  def assertLabels(x: FlowInfoRequirement.Value): Unit = x match {
+    case NoRequirement => FatalErrorReporting.reportFlyingPig("Backward flow info required")
+    case _ => ()
+  }
 }
 
 trait AssemblyRuleSet{
@@ -1036,4 +1041,17 @@ case class MatchElidableCopyOf(i: Int, firstLinePattern: AssemblyLinePattern, la
     }
     Some(after)
   }
+}
+
+case object IsNotALabelUsedManyTimes extends AssemblyLinePattern {
+
+  override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit = FlowInfoRequirement.assertLabels(needsFlowInfo)
+
+  override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = line.opcode match {
+    case ZOpcode.LABEL => line.parameter match {
+      case MemoryAddressConstant(Label(l)) => flowInfo.labelUseCount(l) <= 1
+      case _ => false
+    }
+    case _ => true
+  }
 }
\ No newline at end of file