diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index 9d66b33c..bcfebc7d 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -44,6 +44,8 @@ object OptimizationPresets { TwoVariablesToIndexRegistersOptimization, AlwaysGoodOptimizations.RearrangableLoadFromTheSameLocation, AlwaysGoodOptimizations.PoinlessLoadBeforeAnotherLoad, + RepeatedIndexCalculationOptimization(true), + RepeatedIndexCalculationOptimization(false), AlwaysGoodOptimizations.CommonIndexSubexpressionElimination, AlwaysGoodOptimizations.PointlessOperationPairRemoval, AlwaysGoodOptimizations.PointlessOperationPairRemoval2, @@ -245,6 +247,8 @@ object OptimizationPresets { AlwaysGoodOptimizations.RearrangableLoadFromTheSameLocation, AlwaysGoodOptimizations.RearrangeMath, AlwaysGoodOptimizations.RemoveNops, + RepeatedIndexCalculationOptimization(true), + RepeatedIndexCalculationOptimization(false), AlwaysGoodOptimizations.ReplacingArithmeticsWithBitOps, AlwaysGoodOptimizations.ReuseIndex, AlwaysGoodOptimizations.ReverseFlowAnalysis, @@ -274,6 +278,8 @@ object OptimizationPresets { AlwaysGoodOptimizations.BranchInPlaceRemoval, AlwaysGoodOptimizations.CommonBranchBodyOptimization, AlwaysGoodOptimizations.CommonExpressionInConditional, + RepeatedIndexCalculationOptimization(true), + RepeatedIndexCalculationOptimization(false), AlwaysGoodOptimizations.CommonIndexSubexpressionElimination, AlwaysGoodOptimizations.IndexSequenceOptimization, AlwaysGoodOptimizations.PoinlessStoreBeforeStore, diff --git a/src/main/scala/millfork/assembly/mos/opt/RepeatedIndexCalculationOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/RepeatedIndexCalculationOptimization.scala new file mode 100644 index 00000000..366f517f --- /dev/null +++ b/src/main/scala/millfork/assembly/mos/opt/RepeatedIndexCalculationOptimization.scala @@ -0,0 +1,224 @@ +package millfork.assembly.mos.opt + +import millfork.assembly.{AssemblyOptimization, Elidability, OptimizationContext} +import millfork.assembly.mos.{AddrMode, AssemblyLine, AssemblyLine0, Opcode, OpcodeClasses} +import Opcode._ +import AddrMode._ +import millfork.env.{MemoryAddressConstant, NormalFunction, NumericConstant} +import millfork.node.{MosNiceFunctionProperty, NiceFunctionProperty} + +import scala.collection.mutable.ListBuffer +import scala.util.control.TailCalls.TailRec +import scala.util.control.TailCalls.done +import scala.util.control.TailCalls.tailcall + +/** + * @author Karol Stasiak + */ +case class RepeatedIndexCalculationOptimization(forX: Boolean) extends AssemblyOptimization[AssemblyLine] { + override def name: String = "Repeated index calculation into " + (if (forX) "X" else "Y") + + override def optimize(f: NormalFunction, code: List[AssemblyLine], context: OptimizationContext): List[AssemblyLine] = { + val log = context.log + val allRuns = findAllRuns(code, 0, None).result + if (log.traceEnabled) { + log.trace("All index calculations found: " + allRuns) + for ((line, ix) <- code.zipWithIndex) { + val inRun = allRuns.indexWhere { case (from, to) => ix >= from && ix < to } + if (inRun < 0) { + log.trace(s"\t$line") + } else { + log.trace(s"$inRun\t$line") + } + } + } + if (allRuns.size <= 1) return code + lazy val reverseFlow = ReverseFlowAnalyzer.analyze(code, context.niceFunctionProperties) + val flow = CoarseFlowAnalyzer.analyze(f, code, context) + var replacements: Map[Int, Int] = Map() + for (i <- 1 until allRuns.size) { + val (r1From, r1To) = allRuns(i - 1) + val (r2From, r2To) = allRuns(i) + val codeBetween = code.slice(r1To, r2From) + val deltaOpt = if (forX) findConstantDeltaX(context, codeBetween) else findConstantDeltaY(context, codeBetween) + log.trace(s"Delta between ${i - 1} and $i is $deltaOpt") + deltaOpt match { + case Some(delta) if delta >= -2 && delta <= 2 => + val code1 = code.slice(r1From, r1To) + val code2 = code.slice(r2From, r2To) + val dIsUnimportant = !code1.exists(l => OpcodeClasses.ReadsD(l.opcode)) && !code2.exists(l => OpcodeClasses.ReadsD(l.opcode)) + val dIsClear = flow(r1From).d.contains(false) && flow(r2From).d.contains(false) + val cIsUnimportantAfter = reverseFlow(r2To - 1).c == Unimportant + val cIsUnmodified = !code2.exists(l => OpcodeClasses.ChangesC(l.opcode)) + if ((dIsUnimportant || dIsClear) && (cIsUnimportantAfter || cIsUnmodified)) { + getExtraDelta(code1, code2) match { + case Some(xdelta) => + log.trace(s"Runs are identical, extra delta is $xdelta") + val variablesToPreserve = getAccessedVariables(code2) + log.trace(s"variablesToPreserve=$variablesToPreserve") + val variablesNotPreserved = getModifiedVariables(codeBetween) + log.trace(s"variablesNotPreserved=$variablesNotPreserved") + if (!variablesToPreserve("?") && (variablesToPreserve & variablesNotPreserved).isEmpty) { + replacements += (i -> (delta + xdelta)) + } + case _ => + } + } + case _ => + } + } + if (replacements.isEmpty) return code + val result = ListBuffer[AssemblyLine]() + var processedSoFar = 0 + val IN_ = if (forX) INX else INY + val DE_ = if (forX) DEX else DEY + val T_A = if (forX) TXA else TYA + var previousFrom = 0 + var firstFrom = Int.MaxValue + var lastTo = -1 + for (((from, to), ix) <- allRuns.zipWithIndex) { + result ++= code.slice(processedSoFar, from) + replacements.get(ix) match { + case Some(delta) => + val newPos = code(to - 1).source + val replacement: Seq[AssemblyLine] = (delta, reverseFlow(to - 1).a != Unimportant) match { + case (0, false) => + if (reverseFlow(to - 1).n != Unimportant || reverseFlow(to - 1).z != Unimportant) { + List(AssemblyLine.implied(T_A).pos(newPos)) + } else Nil + case (0, true) => + List(AssemblyLine.implied(T_A).pos(newPos)) + case (d, false) if d > 0 => + List.fill(delta)(AssemblyLine.implied(DE_).pos(newPos)) + case (d, true) if d > 0 => + List.fill(delta)(AssemblyLine.implied(DE_).pos(newPos)) :+ AssemblyLine.implied(T_A).pos(newPos) + case (d, false) if d < 0 => + List.fill(-delta)(AssemblyLine.implied(IN_).pos(newPos)) + case (d, true) if d < 0 => + List.fill(-delta)(AssemblyLine.implied(IN_).pos(newPos)) :+ AssemblyLine.implied(T_A).pos(newPos) + } + result ++= replacement + log.debug(s"Applied $name for run $ix ($from-$to) with delta $delta") + firstFrom = firstFrom min from + lastTo = lastTo max to + if (log.traceEnabled) { + code.slice(previousFrom, to).filter(_.isPrintable).foreach(l => log.trace(l.toString)) + log.trace(" ↓") + (code.slice(previousFrom, from) ++ replacement).filter(_.isPrintable).foreach(l => log.trace(l.toString)) + } + case _ => + result ++= code.slice(from, to) + } + processedSoFar = to + previousFrom = from + } + result ++= code.drop(processedSoFar) + result.toList + } + + def findConstantDeltaX(ctx: OptimizationContext, code: List[AssemblyLine]): Option[Int] = { + var delta = 0 + for (line <- code) { + line match { + case AssemblyLine0(JSR, Absolute | LongAbsolute, MemoryAddressConstant(th)) => + if (!ctx.niceFunctionProperties(MosNiceFunctionProperty.DoesntChangeX -> th.name)) return None + if (!ctx.niceFunctionProperties(NiceFunctionProperty.DoesntWriteMemory -> th.name)) return None + case AssemblyLine0(INX, _, _) => delta += 1 + case AssemblyLine0(DEX, _, _) => delta -= 1 + case AssemblyLine0(CHANGED_MEM | JSR, _, _) => return None + case _ => + if (!OpcodeClasses.AllLinear(line.opcode)) return None + if (OpcodeClasses.ChangesX(line.opcode)) return None + } + } + Some(delta) + } + + def findConstantDeltaY(ctx: OptimizationContext, code: List[AssemblyLine]): Option[Int] = { + var delta = 0 + for (line <- code) { + line match { + case AssemblyLine0(JSR, Absolute | LongAbsolute, MemoryAddressConstant(th)) => + if (!ctx.niceFunctionProperties(MosNiceFunctionProperty.DoesntChangeY -> th.name)) return None + if (!ctx.niceFunctionProperties(NiceFunctionProperty.DoesntWriteMemory -> th.name)) return None + case AssemblyLine0(INY, _, _) => delta += 1 + case AssemblyLine0(DEY, _, _) => delta -= 1 + case AssemblyLine0(CHANGED_MEM | JSR, _, _) => return None + case _ => + if (!OpcodeClasses.AllLinear(line.opcode)) return None + if (OpcodeClasses.ChangesY(line.opcode)) return None + } + } + Some(delta) + } + + def getExtraDelta(code1: List[AssemblyLine], code2: List[AssemblyLine]): Option[Int] = { + var delta = 0 + // TODO: handle extra CLC/ADC#0 + if (code1.size != code2.size) { + if (code1.size == code2.size + 2) { + return getExtraDelta(code1, code2.init ++ List(AssemblyLine.implied(CLC), AssemblyLine.immediate(ADC, 0), code2.last)) + } + if (code1.size + 2 == code2 .size) { + return getExtraDelta(code1.init ++ List(AssemblyLine.implied(CLC), AssemblyLine.immediate(ADC, 0), code1.last), code2) + } + return None + } + for (i <- code1.indices) { + val l1 = code1(i) + val l2 = code2(i) + if (l1.opcode != l2.opcode) return None + if (l1.addrMode != l2.addrMode) return None + if (l1.parameter != l2.parameter) { + if (i != code2.size - 2) return None + if (l2.opcode != ADC) return None + if (l2.addrMode != Immediate) return None + (l1.parameter, l2.parameter) match { + case (NumericConstant(n1, _), NumericConstant(n2, _)) => + delta += (n1 - n2).toInt + case _ => return None + } + } + } + Some(delta) + } + + def getAccessedVariables(code: List[AssemblyLine]): Set[String] = { + code.map(_.parameter.rootThingName) + .filter(_ != "") + .toSet + } + + def getModifiedVariables(code: List[AssemblyLine]): Set[String] = { + code.filter(l => OpcodeClasses.ChangesMemoryAlways(l.opcode) || OpcodeClasses.ChangesMemoryIfNotImplied(l.opcode)) + .map(_.parameter.rootThingName) + .filter(_ != "") + .toSet + } + + def findAllRuns(xs: List[AssemblyLine], offset: Int, latestStart: Option[Int]): TailRec[List[(Int, Int)]] = { + if (xs.isEmpty) return done(Nil) + if (xs.head.elidability != Elidability.Elidable) return tailcall(findAllRuns(xs.tail, offset + 1, None)) + val TA_ = if (forX) TAX else TAY + xs match { + case AssemblyLine0(LDA, Immediate | ZeroPage | Absolute | LongAbsolute, _) :: tail => + tailcall(findAllRuns(tail, offset + 1, Some(offset))) + case AssemblyLine0(CLC | SEC, _, _) :: (l2@AssemblyLine0(ADC | ADC, Immediate | ZeroPage | Absolute | LongAbsolute, _)) :: tail + if l2.elidability == Elidability.Elidable => + tailcall(findAllRuns(tail, offset + 2, latestStart)) + case AssemblyLine0(AND | EOR | ORA, Immediate | ZeroPage | Absolute | LongAbsolute, _) :: tail => + tailcall(findAllRuns(tail, offset + 1, latestStart)) + case AssemblyLine0(ASL | LSR, Implied, _) :: tail => + tailcall(findAllRuns(tail, offset + 1, latestStart)) + case AssemblyLine0(TA_, Implied, _) :: tail => + latestStart match { + case Some(l) => + tailcall(findAllRuns(tail, offset + 1, latestStart)).map((l, offset + 1) :: _) + case None => + tailcall(findAllRuns(tail, offset + 1, latestStart)) + } + case _ :: tail => tailcall(findAllRuns(tail, offset + 1, None)) + case Nil => done(Nil) + } + } +} diff --git a/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala b/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala index e8f5fc26..be193048 100644 --- a/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala +++ b/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala @@ -536,6 +536,12 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { compile(ctx, expr, Some(b -> RegisterVariable(MosRegister.A, b)), BranchSpec.None) } + def compileToY(ctx: CompilationContext, expr: Expression): List[AssemblyLine] = { + val env = ctx.env + val b = env.get[Type]("byte") + compile(ctx, expr, Some(b -> RegisterVariable(MosRegister.Y, b)), BranchSpec.None) + } + def compileToAX(ctx: CompilationContext, expr: Expression): List[AssemblyLine] = { val env = ctx.env val w = env.get[Type]("word") @@ -584,7 +590,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { ctx.env.eval(base).map { baseConst => baseConst -> { (i: Int) => val b = ctx.env.get[Type]("byte") - compile(ctx, index #+# i, Some(b -> RegisterVariable(MosRegister.Y, b)), BranchSpec.None) + compileToY(ctx, index #+# i) } } } @@ -2137,10 +2143,20 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { AssemblyLine.immediate(LDA, constant.subbyte(i)), AssemblyLine.absolute(STA, addr + offset + i))) case _ => - prepare ++ (0 until targetType.size).flatMap(i => List( - if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY), - AssemblyLine.immediate(LDA, constant.subbyte(i)), - AssemblyLine(STA, am, addr))) + fastTarget match { + case Some((constAddr, initializeY)) => + initializeY(offset) ++ (0 until targetType.size).flatMap { i => + val load = List(AssemblyLine.immediate(LDA, constant.subbyte(i))) + load ++ (if (i == 0) List(AssemblyLine.absoluteY(STA, constAddr)) else List( + AssemblyLine.implied(INY), + AssemblyLine.absoluteY(STA, constAddr))) + } + case _ => + prepare ++ (0 until targetType.size).flatMap(i => List( + if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY), + AssemblyLine.immediate(LDA, constant.subbyte(i)), + AssemblyLine(STA, am, addr))) + } } case None => source match { @@ -2151,7 +2167,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { prepare ++ AssemblyLine.variable(ctx, LDA, variable) :+ AssemblyLine.absolute(STA, addr + offset) - case (1, _) => + case (1, _) if fastTarget.isEmpty => prepare ++ AssemblyLine.variable(ctx, LDA, variable) ++ List( AssemblyLine.immediate(LDY, offset), @@ -2163,11 +2179,21 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { AssemblyLine.absolute(STA, addr + offset + i)) } case (_, _) => - prepare ++ (0 until targetType.size).flatMap { i => - val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i) - load ++ List( - if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY), - AssemblyLine(STA, am, addr)) + fastTarget match { + case Some((constAddr, initializeY)) => + initializeY(offset) ++ (0 until targetType.size).flatMap { i => + val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i) + load ++ (if (i == 0) List(AssemblyLine.absoluteY(STA, constAddr)) else List( + AssemblyLine.implied(INY), + AssemblyLine.absoluteY(STA, constAddr))) + } + case _ => + prepare ++ (0 until targetType.size).flatMap { i => + val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i) + load ++ List( + if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY), + AssemblyLine(STA, am, addr)) + } } case _ => ctx.log.error("Cannot assign to a large object indirectly", target.position) @@ -2303,24 +2329,35 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { } case (2, _) => val someTuple = Some(targetType, RegisterVariable(MosRegister.AX, targetType)) - if (prepare.isEmpty) { - compile(ctx, source, someTuple, BranchSpec.None) ++ List( - AssemblyLine.immediate(LDY, offset), - AssemblyLine.indexedY(STA, addr), - AssemblyLine.implied(TXA), - AssemblyLine.implied(INY), - AssemblyLine.indexedY(STA, addr)) - } else { - compile(ctx, source, someTuple, BranchSpec.None) ++ List( - AssemblyLine.implied(PHA), - AssemblyLine.implied(TXA), - AssemblyLine.implied(PHA)) ++ prepare ++ List( - AssemblyLine.immediate(LDY, offset+1), - AssemblyLine.implied(PLA), - AssemblyLine.indexedY(STA, addr), - AssemblyLine.implied(PLA), - AssemblyLine.implied(DEY), - AssemblyLine.indexedY(STA, addr)) + fastTarget match { + case Some((baseOffset, initializeY)) => + compile(ctx, source, someTuple, BranchSpec.None) ++ + preserveRegisterIfNeeded(ctx, MosRegister.AX, initializeY(offset)) ++ + List( + AssemblyLine.absoluteY(STA, baseOffset), + AssemblyLine.implied(TXA), + AssemblyLine.implied(INY), + AssemblyLine.absoluteY(STA, baseOffset)) + case _ => + if (prepare.isEmpty) { + compile(ctx, source, someTuple, BranchSpec.None) ++ List( + AssemblyLine.immediate(LDY, offset), + AssemblyLine.indexedY(STA, addr), + AssemblyLine.implied(TXA), + AssemblyLine.implied(INY), + AssemblyLine.indexedY(STA, addr)) + } else { + compile(ctx, source, someTuple, BranchSpec.None) ++ List( + AssemblyLine.implied(PHA), + AssemblyLine.implied(TXA), + AssemblyLine.implied(PHA)) ++ prepare ++ List( + AssemblyLine.immediate(LDY, offset+1), + AssemblyLine.implied(PLA), + AssemblyLine.indexedY(STA, addr), + AssemblyLine.implied(PLA), + AssemblyLine.implied(DEY), + AssemblyLine.indexedY(STA, addr)) + } } case _ => ctx.log.error("Cannot assign to a large object indirectly", target.position)