From 11337f4975fe9efc963d52ccacdefad9f8d5e04e Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Tue, 6 Mar 2018 23:43:09 +0100 Subject: [PATCH] Optimizing predictable reads from local variables --- .../scala/millfork/OptimizationPresets.scala | 1 + .../opt/LocalVariableReadOptimization.scala | 138 ++++++++++++++++++ .../assembly/opt/SuperOptimizer.scala | 1 + .../test/AssemblyOptimizationSuite.scala | 22 +++ 4 files changed, 162 insertions(+) create mode 100644 src/main/scala/millfork/assembly/opt/LocalVariableReadOptimization.scala diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index dfcd286a..cda3db0e 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -124,6 +124,7 @@ object OptimizationPresets { LaterOptimizations.LoadingBranchesOptimization, LaterOptimizations.IncreaseWithLimit, SingleAssignmentVariableOptimization, + LocalVariableReadOptimization, ) val Good: List[AssemblyOptimization] = List[AssemblyOptimization]( diff --git a/src/main/scala/millfork/assembly/opt/LocalVariableReadOptimization.scala b/src/main/scala/millfork/assembly/opt/LocalVariableReadOptimization.scala new file mode 100644 index 00000000..caa85799 --- /dev/null +++ b/src/main/scala/millfork/assembly/opt/LocalVariableReadOptimization.scala @@ -0,0 +1,138 @@ +package millfork.assembly.opt + +import millfork.{CompilationFlag, CompilationOptions, NonOverlappingIntervals} +import millfork.assembly.{AddrMode, AssemblyLine, OpcodeClasses} +import millfork.assembly.Opcode._ +import millfork.assembly.AddrMode._ +import millfork.env._ +import millfork.error.ErrorReporting + +import scala.annotation.tailrec +import scala.collection.mutable.ListBuffer + +/** + * @author Karol Stasiak + */ +object LocalVariableReadOptimization extends AssemblyOptimization { + + override def name: String = "Local variable read optimization" + + override def optimize(f: NormalFunction, code: List[AssemblyLine], options: CompilationOptions): List[AssemblyLine] = { + + val stillUsedVariables = code.flatMap { + case AssemblyLine(_, _, MemoryAddressConstant(th: MemoryVariable), _) => th match { + case MemoryVariable(name, typ, VariableAllocationMethod.Auto | VariableAllocationMethod.Register) + if typ.size == 1 => Some(name) + case _ => None + } + case _ => None + }.toSet + val variablesWithAddressesTaken = code.flatMap { + case AssemblyLine(_, _, HalfWordConstant(MemoryAddressConstant(th), _), _) => Some(th.name) + case AssemblyLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _) => Some(th.name) + case _ => None + }.toSet + val eligibleVariables = (stillUsedVariables -- variablesWithAddressesTaken).filterNot(_.startsWith("__")) + + if (eligibleVariables.isEmpty) { + return code + } + + val statuses = CoarseFlowAnalyzer.analyze(f, code, options) + val (optimized, result) = optimizeImpl(code.zip(statuses), eligibleVariables, Map()) + if (optimized) { + ErrorReporting.debug("Optimized local variable reads") + reportOptimizedBlock(code, result) + result + } else { + code + } + } + + private implicit class TupleOps(val tuple: (Boolean, List[AssemblyLine])) { + def ::(head: AssemblyLine): (Boolean, List[AssemblyLine]) = (tuple._1, head :: tuple._2) + } + + def optimizeImpl(code: List[(AssemblyLine, CpuStatus)], variables: Set[String], map: Map[String, Int]): (Boolean, List[AssemblyLine]) = code match { + + case (AssemblyLine(op@( + LDA | LDX | LDY | LDZ | + ADC | ORA | EOR | AND | SBC | + CMP | CPX | CPY | CPZ), Absolute | ZeroPage, MemoryAddressConstant(th), true), _) :: xs + if variables(th.name) && map.contains(th.name) => + true -> (AssemblyLine.immediate(op, map(th.name)) :: optimizeImpl(xs, variables, map)._2) + + case (x@AssemblyLine(STA, Absolute | ZeroPage, MemoryAddressConstant(th), _), status) :: xs + if variables(th.name) => + val newMap = status.a match { + case SingleStatus(n) => map + (th.name -> n) + case _ => map - th.name + } + x :: optimizeImpl(xs, variables, newMap) + + case (x@AssemblyLine(STX, Absolute | ZeroPage, MemoryAddressConstant(th), _), status) :: xs + if variables(th.name) => + val newMap = status.x match { + case SingleStatus(n) => map + (th.name -> n) + case _ => map - th.name + } + x :: optimizeImpl(xs, variables, newMap) + + case (x@AssemblyLine(STY, Absolute | ZeroPage, MemoryAddressConstant(th: ThingInMemory), _), status) :: xs + if variables(th.name) => + val newMap = status.y match { + case SingleStatus(n) => map + (th.name -> n) + case _ => map - th.name + } + x :: optimizeImpl(xs, variables, newMap) + + case (x@AssemblyLine(STZ, Absolute | ZeroPage, MemoryAddressConstant(th), _), status) :: xs + if variables(th.name) => + val newMap = status.iz match { + case SingleStatus(n) => map + (th.name -> n) + case _ => map - th.name + } + x :: optimizeImpl(xs, variables, newMap) + + case (x@AssemblyLine(SAX, Absolute | ZeroPage, MemoryAddressConstant(th), _), status) :: xs + if variables(th.name) => + val newMap = (status.a, status.x) match { + case (SingleStatus(m), SingleStatus(n)) => map + (th.name -> (m & n)) + case (_, SingleStatus(0)) => map + (th.name -> 0) + case (SingleStatus(0), _) => map + (th.name -> 0) + case _ => map - th.name + } + x :: optimizeImpl(xs, variables, newMap) + + case (x@AssemblyLine(INC | ISC, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs + if map.contains(th.name) => + x :: optimizeImpl(xs, variables, map + (th.name -> map(th.name).+(1).&(0xff))) + + case (x@AssemblyLine(DEC | DCP, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs + if map.contains(th.name) => + x :: optimizeImpl(xs, variables, map + (th.name -> map(th.name).-(1).&(0xff))) + + case (x@AssemblyLine(ASL | SLO, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs + if map.contains(th.name) => + x :: optimizeImpl(xs, variables, map + (th.name -> map(th.name).<<(1).&(0xff))) + + case (x@AssemblyLine(LSR | SRE, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs + if map.contains(th.name) => + x :: optimizeImpl(xs, variables, map + (th.name -> map(th.name).&(0xff).>>(1))) + + // TODO: consider handling some more opcodes + case (x@AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs + if OpcodeClasses.ChangesMemoryAlways(op) && map.contains(th.name) => + x :: optimizeImpl(xs, variables, map - th.name) + + case (x@AssemblyLine(LABEL, _, _, _), _) :: xs => x :: optimizeImpl(xs, variables, Map()) + case (x, _) :: xs => x :: optimizeImpl(xs, variables, map) + case Nil => (false, Nil) + } + + def reportOptimizedBlock(oldCode: List[AssemblyLine], newCode: List[AssemblyLine]): Unit = { + oldCode.foreach(l => ErrorReporting.trace(l.toString)) + ErrorReporting.trace(" ↓") + newCode.foreach(l => ErrorReporting.trace(l.toString)) + } +} diff --git a/src/main/scala/millfork/assembly/opt/SuperOptimizer.scala b/src/main/scala/millfork/assembly/opt/SuperOptimizer.scala index 6f0dafe1..3aaad326 100644 --- a/src/main/scala/millfork/assembly/opt/SuperOptimizer.scala +++ b/src/main/scala/millfork/assembly/opt/SuperOptimizer.scala @@ -40,6 +40,7 @@ object SuperOptimizer extends AssemblyOptimization { allOptimizers ++= ZeropageRegisterOptimizations.All } allOptimizers ++= List( + LocalVariableReadOptimization, ChangeIndexRegisterOptimizationPreferringX2Y, ChangeIndexRegisterOptimizationPreferringY2X) val seenSoFar = mutable.Set[CodeView]() diff --git a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala index f95765c8..0ef54247 100644 --- a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala +++ b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala @@ -466,4 +466,26 @@ class AssemblyOptimizationSuite extends FunSuite with Matchers { m.readByte(0xc000) should equal(33) } } + + test("Low bit 3") { + EmuBenchmarkRun( + """ + | byte output @$c000 + | void main() { + | g(1) + | } + | void g(byte x) { + | if f() << 1 == x { + | output = 5 + | } + | } + | noinline byte f () { + | output = 33 + | return 3 + | } + """.stripMargin + ){m => + m.readByte(0xc000) should equal(33) + } + } }