mirror of
https://github.com/KarolS/millfork.git
synced 2025-02-06 17:30:05 +00:00
6502: Optimize array accesses for small arrays and also for repeated indices (#50)
This commit is contained in:
parent
d0bf683657
commit
63aab64204
@ -44,6 +44,8 @@ object OptimizationPresets {
|
||||
TwoVariablesToIndexRegistersOptimization,
|
||||
AlwaysGoodOptimizations.RearrangableLoadFromTheSameLocation,
|
||||
AlwaysGoodOptimizations.PoinlessLoadBeforeAnotherLoad,
|
||||
RepeatedIndexCalculationOptimization(true),
|
||||
RepeatedIndexCalculationOptimization(false),
|
||||
AlwaysGoodOptimizations.CommonIndexSubexpressionElimination,
|
||||
AlwaysGoodOptimizations.PointlessOperationPairRemoval,
|
||||
AlwaysGoodOptimizations.PointlessOperationPairRemoval2,
|
||||
@ -245,6 +247,8 @@ object OptimizationPresets {
|
||||
AlwaysGoodOptimizations.RearrangableLoadFromTheSameLocation,
|
||||
AlwaysGoodOptimizations.RearrangeMath,
|
||||
AlwaysGoodOptimizations.RemoveNops,
|
||||
RepeatedIndexCalculationOptimization(true),
|
||||
RepeatedIndexCalculationOptimization(false),
|
||||
AlwaysGoodOptimizations.ReplacingArithmeticsWithBitOps,
|
||||
AlwaysGoodOptimizations.ReuseIndex,
|
||||
AlwaysGoodOptimizations.ReverseFlowAnalysis,
|
||||
@ -274,6 +278,8 @@ object OptimizationPresets {
|
||||
AlwaysGoodOptimizations.BranchInPlaceRemoval,
|
||||
AlwaysGoodOptimizations.CommonBranchBodyOptimization,
|
||||
AlwaysGoodOptimizations.CommonExpressionInConditional,
|
||||
RepeatedIndexCalculationOptimization(true),
|
||||
RepeatedIndexCalculationOptimization(false),
|
||||
AlwaysGoodOptimizations.CommonIndexSubexpressionElimination,
|
||||
AlwaysGoodOptimizations.IndexSequenceOptimization,
|
||||
AlwaysGoodOptimizations.PoinlessStoreBeforeStore,
|
||||
|
@ -0,0 +1,224 @@
|
||||
package millfork.assembly.mos.opt
|
||||
|
||||
import millfork.assembly.{AssemblyOptimization, Elidability, OptimizationContext}
|
||||
import millfork.assembly.mos.{AddrMode, AssemblyLine, AssemblyLine0, Opcode, OpcodeClasses}
|
||||
import Opcode._
|
||||
import AddrMode._
|
||||
import millfork.env.{MemoryAddressConstant, NormalFunction, NumericConstant}
|
||||
import millfork.node.{MosNiceFunctionProperty, NiceFunctionProperty}
|
||||
|
||||
import scala.collection.mutable.ListBuffer
|
||||
import scala.util.control.TailCalls.TailRec
|
||||
import scala.util.control.TailCalls.done
|
||||
import scala.util.control.TailCalls.tailcall
|
||||
|
||||
/**
|
||||
* @author Karol Stasiak
|
||||
*/
|
||||
case class RepeatedIndexCalculationOptimization(forX: Boolean) extends AssemblyOptimization[AssemblyLine] {
|
||||
override def name: String = "Repeated index calculation into " + (if (forX) "X" else "Y")
|
||||
|
||||
override def optimize(f: NormalFunction, code: List[AssemblyLine], context: OptimizationContext): List[AssemblyLine] = {
|
||||
val log = context.log
|
||||
val allRuns = findAllRuns(code, 0, None).result
|
||||
if (log.traceEnabled) {
|
||||
log.trace("All index calculations found: " + allRuns)
|
||||
for ((line, ix) <- code.zipWithIndex) {
|
||||
val inRun = allRuns.indexWhere { case (from, to) => ix >= from && ix < to }
|
||||
if (inRun < 0) {
|
||||
log.trace(s"\t$line")
|
||||
} else {
|
||||
log.trace(s"$inRun\t$line")
|
||||
}
|
||||
}
|
||||
}
|
||||
if (allRuns.size <= 1) return code
|
||||
lazy val reverseFlow = ReverseFlowAnalyzer.analyze(code, context.niceFunctionProperties)
|
||||
val flow = CoarseFlowAnalyzer.analyze(f, code, context)
|
||||
var replacements: Map[Int, Int] = Map()
|
||||
for (i <- 1 until allRuns.size) {
|
||||
val (r1From, r1To) = allRuns(i - 1)
|
||||
val (r2From, r2To) = allRuns(i)
|
||||
val codeBetween = code.slice(r1To, r2From)
|
||||
val deltaOpt = if (forX) findConstantDeltaX(context, codeBetween) else findConstantDeltaY(context, codeBetween)
|
||||
log.trace(s"Delta between ${i - 1} and $i is $deltaOpt")
|
||||
deltaOpt match {
|
||||
case Some(delta) if delta >= -2 && delta <= 2 =>
|
||||
val code1 = code.slice(r1From, r1To)
|
||||
val code2 = code.slice(r2From, r2To)
|
||||
val dIsUnimportant = !code1.exists(l => OpcodeClasses.ReadsD(l.opcode)) && !code2.exists(l => OpcodeClasses.ReadsD(l.opcode))
|
||||
val dIsClear = flow(r1From).d.contains(false) && flow(r2From).d.contains(false)
|
||||
val cIsUnimportantAfter = reverseFlow(r2To - 1).c == Unimportant
|
||||
val cIsUnmodified = !code2.exists(l => OpcodeClasses.ChangesC(l.opcode))
|
||||
if ((dIsUnimportant || dIsClear) && (cIsUnimportantAfter || cIsUnmodified)) {
|
||||
getExtraDelta(code1, code2) match {
|
||||
case Some(xdelta) =>
|
||||
log.trace(s"Runs are identical, extra delta is $xdelta")
|
||||
val variablesToPreserve = getAccessedVariables(code2)
|
||||
log.trace(s"variablesToPreserve=$variablesToPreserve")
|
||||
val variablesNotPreserved = getModifiedVariables(codeBetween)
|
||||
log.trace(s"variablesNotPreserved=$variablesNotPreserved")
|
||||
if (!variablesToPreserve("?") && (variablesToPreserve & variablesNotPreserved).isEmpty) {
|
||||
replacements += (i -> (delta + xdelta))
|
||||
}
|
||||
case _ =>
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
}
|
||||
}
|
||||
if (replacements.isEmpty) return code
|
||||
val result = ListBuffer[AssemblyLine]()
|
||||
var processedSoFar = 0
|
||||
val IN_ = if (forX) INX else INY
|
||||
val DE_ = if (forX) DEX else DEY
|
||||
val T_A = if (forX) TXA else TYA
|
||||
var previousFrom = 0
|
||||
var firstFrom = Int.MaxValue
|
||||
var lastTo = -1
|
||||
for (((from, to), ix) <- allRuns.zipWithIndex) {
|
||||
result ++= code.slice(processedSoFar, from)
|
||||
replacements.get(ix) match {
|
||||
case Some(delta) =>
|
||||
val newPos = code(to - 1).source
|
||||
val replacement: Seq[AssemblyLine] = (delta, reverseFlow(to - 1).a != Unimportant) match {
|
||||
case (0, false) =>
|
||||
if (reverseFlow(to - 1).n != Unimportant || reverseFlow(to - 1).z != Unimportant) {
|
||||
List(AssemblyLine.implied(T_A).pos(newPos))
|
||||
} else Nil
|
||||
case (0, true) =>
|
||||
List(AssemblyLine.implied(T_A).pos(newPos))
|
||||
case (d, false) if d > 0 =>
|
||||
List.fill(delta)(AssemblyLine.implied(DE_).pos(newPos))
|
||||
case (d, true) if d > 0 =>
|
||||
List.fill(delta)(AssemblyLine.implied(DE_).pos(newPos)) :+ AssemblyLine.implied(T_A).pos(newPos)
|
||||
case (d, false) if d < 0 =>
|
||||
List.fill(-delta)(AssemblyLine.implied(IN_).pos(newPos))
|
||||
case (d, true) if d < 0 =>
|
||||
List.fill(-delta)(AssemblyLine.implied(IN_).pos(newPos)) :+ AssemblyLine.implied(T_A).pos(newPos)
|
||||
}
|
||||
result ++= replacement
|
||||
log.debug(s"Applied $name for run $ix ($from-$to) with delta $delta")
|
||||
firstFrom = firstFrom min from
|
||||
lastTo = lastTo max to
|
||||
if (log.traceEnabled) {
|
||||
code.slice(previousFrom, to).filter(_.isPrintable).foreach(l => log.trace(l.toString))
|
||||
log.trace(" ↓")
|
||||
(code.slice(previousFrom, from) ++ replacement).filter(_.isPrintable).foreach(l => log.trace(l.toString))
|
||||
}
|
||||
case _ =>
|
||||
result ++= code.slice(from, to)
|
||||
}
|
||||
processedSoFar = to
|
||||
previousFrom = from
|
||||
}
|
||||
result ++= code.drop(processedSoFar)
|
||||
result.toList
|
||||
}
|
||||
|
||||
def findConstantDeltaX(ctx: OptimizationContext, code: List[AssemblyLine]): Option[Int] = {
|
||||
var delta = 0
|
||||
for (line <- code) {
|
||||
line match {
|
||||
case AssemblyLine0(JSR, Absolute | LongAbsolute, MemoryAddressConstant(th)) =>
|
||||
if (!ctx.niceFunctionProperties(MosNiceFunctionProperty.DoesntChangeX -> th.name)) return None
|
||||
if (!ctx.niceFunctionProperties(NiceFunctionProperty.DoesntWriteMemory -> th.name)) return None
|
||||
case AssemblyLine0(INX, _, _) => delta += 1
|
||||
case AssemblyLine0(DEX, _, _) => delta -= 1
|
||||
case AssemblyLine0(CHANGED_MEM | JSR, _, _) => return None
|
||||
case _ =>
|
||||
if (!OpcodeClasses.AllLinear(line.opcode)) return None
|
||||
if (OpcodeClasses.ChangesX(line.opcode)) return None
|
||||
}
|
||||
}
|
||||
Some(delta)
|
||||
}
|
||||
|
||||
def findConstantDeltaY(ctx: OptimizationContext, code: List[AssemblyLine]): Option[Int] = {
|
||||
var delta = 0
|
||||
for (line <- code) {
|
||||
line match {
|
||||
case AssemblyLine0(JSR, Absolute | LongAbsolute, MemoryAddressConstant(th)) =>
|
||||
if (!ctx.niceFunctionProperties(MosNiceFunctionProperty.DoesntChangeY -> th.name)) return None
|
||||
if (!ctx.niceFunctionProperties(NiceFunctionProperty.DoesntWriteMemory -> th.name)) return None
|
||||
case AssemblyLine0(INY, _, _) => delta += 1
|
||||
case AssemblyLine0(DEY, _, _) => delta -= 1
|
||||
case AssemblyLine0(CHANGED_MEM | JSR, _, _) => return None
|
||||
case _ =>
|
||||
if (!OpcodeClasses.AllLinear(line.opcode)) return None
|
||||
if (OpcodeClasses.ChangesY(line.opcode)) return None
|
||||
}
|
||||
}
|
||||
Some(delta)
|
||||
}
|
||||
|
||||
def getExtraDelta(code1: List[AssemblyLine], code2: List[AssemblyLine]): Option[Int] = {
|
||||
var delta = 0
|
||||
// TODO: handle extra CLC/ADC#0
|
||||
if (code1.size != code2.size) {
|
||||
if (code1.size == code2.size + 2) {
|
||||
return getExtraDelta(code1, code2.init ++ List(AssemblyLine.implied(CLC), AssemblyLine.immediate(ADC, 0), code2.last))
|
||||
}
|
||||
if (code1.size + 2 == code2 .size) {
|
||||
return getExtraDelta(code1.init ++ List(AssemblyLine.implied(CLC), AssemblyLine.immediate(ADC, 0), code1.last), code2)
|
||||
}
|
||||
return None
|
||||
}
|
||||
for (i <- code1.indices) {
|
||||
val l1 = code1(i)
|
||||
val l2 = code2(i)
|
||||
if (l1.opcode != l2.opcode) return None
|
||||
if (l1.addrMode != l2.addrMode) return None
|
||||
if (l1.parameter != l2.parameter) {
|
||||
if (i != code2.size - 2) return None
|
||||
if (l2.opcode != ADC) return None
|
||||
if (l2.addrMode != Immediate) return None
|
||||
(l1.parameter, l2.parameter) match {
|
||||
case (NumericConstant(n1, _), NumericConstant(n2, _)) =>
|
||||
delta += (n1 - n2).toInt
|
||||
case _ => return None
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(delta)
|
||||
}
|
||||
|
||||
def getAccessedVariables(code: List[AssemblyLine]): Set[String] = {
|
||||
code.map(_.parameter.rootThingName)
|
||||
.filter(_ != "")
|
||||
.toSet
|
||||
}
|
||||
|
||||
def getModifiedVariables(code: List[AssemblyLine]): Set[String] = {
|
||||
code.filter(l => OpcodeClasses.ChangesMemoryAlways(l.opcode) || OpcodeClasses.ChangesMemoryIfNotImplied(l.opcode))
|
||||
.map(_.parameter.rootThingName)
|
||||
.filter(_ != "")
|
||||
.toSet
|
||||
}
|
||||
|
||||
def findAllRuns(xs: List[AssemblyLine], offset: Int, latestStart: Option[Int]): TailRec[List[(Int, Int)]] = {
|
||||
if (xs.isEmpty) return done(Nil)
|
||||
if (xs.head.elidability != Elidability.Elidable) return tailcall(findAllRuns(xs.tail, offset + 1, None))
|
||||
val TA_ = if (forX) TAX else TAY
|
||||
xs match {
|
||||
case AssemblyLine0(LDA, Immediate | ZeroPage | Absolute | LongAbsolute, _) :: tail =>
|
||||
tailcall(findAllRuns(tail, offset + 1, Some(offset)))
|
||||
case AssemblyLine0(CLC | SEC, _, _) :: (l2@AssemblyLine0(ADC | ADC, Immediate | ZeroPage | Absolute | LongAbsolute, _)) :: tail
|
||||
if l2.elidability == Elidability.Elidable =>
|
||||
tailcall(findAllRuns(tail, offset + 2, latestStart))
|
||||
case AssemblyLine0(AND | EOR | ORA, Immediate | ZeroPage | Absolute | LongAbsolute, _) :: tail =>
|
||||
tailcall(findAllRuns(tail, offset + 1, latestStart))
|
||||
case AssemblyLine0(ASL | LSR, Implied, _) :: tail =>
|
||||
tailcall(findAllRuns(tail, offset + 1, latestStart))
|
||||
case AssemblyLine0(TA_, Implied, _) :: tail =>
|
||||
latestStart match {
|
||||
case Some(l) =>
|
||||
tailcall(findAllRuns(tail, offset + 1, latestStart)).map((l, offset + 1) :: _)
|
||||
case None =>
|
||||
tailcall(findAllRuns(tail, offset + 1, latestStart))
|
||||
}
|
||||
case _ :: tail => tailcall(findAllRuns(tail, offset + 1, None))
|
||||
case Nil => done(Nil)
|
||||
}
|
||||
}
|
||||
}
|
@ -536,6 +536,12 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
compile(ctx, expr, Some(b -> RegisterVariable(MosRegister.A, b)), BranchSpec.None)
|
||||
}
|
||||
|
||||
def compileToY(ctx: CompilationContext, expr: Expression): List[AssemblyLine] = {
|
||||
val env = ctx.env
|
||||
val b = env.get[Type]("byte")
|
||||
compile(ctx, expr, Some(b -> RegisterVariable(MosRegister.Y, b)), BranchSpec.None)
|
||||
}
|
||||
|
||||
def compileToAX(ctx: CompilationContext, expr: Expression): List[AssemblyLine] = {
|
||||
val env = ctx.env
|
||||
val w = env.get[Type]("word")
|
||||
@ -584,7 +590,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
ctx.env.eval(base).map { baseConst =>
|
||||
baseConst -> { (i: Int) =>
|
||||
val b = ctx.env.get[Type]("byte")
|
||||
compile(ctx, index #+# i, Some(b -> RegisterVariable(MosRegister.Y, b)), BranchSpec.None)
|
||||
compileToY(ctx, index #+# i)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2137,10 +2143,20 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
AssemblyLine.immediate(LDA, constant.subbyte(i)),
|
||||
AssemblyLine.absolute(STA, addr + offset + i)))
|
||||
case _ =>
|
||||
prepare ++ (0 until targetType.size).flatMap(i => List(
|
||||
if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY),
|
||||
AssemblyLine.immediate(LDA, constant.subbyte(i)),
|
||||
AssemblyLine(STA, am, addr)))
|
||||
fastTarget match {
|
||||
case Some((constAddr, initializeY)) =>
|
||||
initializeY(offset) ++ (0 until targetType.size).flatMap { i =>
|
||||
val load = List(AssemblyLine.immediate(LDA, constant.subbyte(i)))
|
||||
load ++ (if (i == 0) List(AssemblyLine.absoluteY(STA, constAddr)) else List(
|
||||
AssemblyLine.implied(INY),
|
||||
AssemblyLine.absoluteY(STA, constAddr)))
|
||||
}
|
||||
case _ =>
|
||||
prepare ++ (0 until targetType.size).flatMap(i => List(
|
||||
if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY),
|
||||
AssemblyLine.immediate(LDA, constant.subbyte(i)),
|
||||
AssemblyLine(STA, am, addr)))
|
||||
}
|
||||
}
|
||||
case None =>
|
||||
source match {
|
||||
@ -2151,7 +2167,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
prepare ++
|
||||
AssemblyLine.variable(ctx, LDA, variable) :+
|
||||
AssemblyLine.absolute(STA, addr + offset)
|
||||
case (1, _) =>
|
||||
case (1, _) if fastTarget.isEmpty =>
|
||||
prepare ++
|
||||
AssemblyLine.variable(ctx, LDA, variable) ++ List(
|
||||
AssemblyLine.immediate(LDY, offset),
|
||||
@ -2163,11 +2179,21 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
AssemblyLine.absolute(STA, addr + offset + i))
|
||||
}
|
||||
case (_, _) =>
|
||||
prepare ++ (0 until targetType.size).flatMap { i =>
|
||||
val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i)
|
||||
load ++ List(
|
||||
if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY),
|
||||
AssemblyLine(STA, am, addr))
|
||||
fastTarget match {
|
||||
case Some((constAddr, initializeY)) =>
|
||||
initializeY(offset) ++ (0 until targetType.size).flatMap { i =>
|
||||
val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i)
|
||||
load ++ (if (i == 0) List(AssemblyLine.absoluteY(STA, constAddr)) else List(
|
||||
AssemblyLine.implied(INY),
|
||||
AssemblyLine.absoluteY(STA, constAddr)))
|
||||
}
|
||||
case _ =>
|
||||
prepare ++ (0 until targetType.size).flatMap { i =>
|
||||
val load = if (i >= sourceType.size) List(AssemblyLine.immediate(LDA, 0)) else AssemblyLine.variable(ctx, LDA, variable, i)
|
||||
load ++ List(
|
||||
if (i == 0) AssemblyLine.immediate(LDY, offset) else AssemblyLine.implied(INY),
|
||||
AssemblyLine(STA, am, addr))
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
ctx.log.error("Cannot assign to a large object indirectly", target.position)
|
||||
@ -2303,24 +2329,35 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
|
||||
}
|
||||
case (2, _) =>
|
||||
val someTuple = Some(targetType, RegisterVariable(MosRegister.AX, targetType))
|
||||
if (prepare.isEmpty) {
|
||||
compile(ctx, source, someTuple, BranchSpec.None) ++ List(
|
||||
AssemblyLine.immediate(LDY, offset),
|
||||
AssemblyLine.indexedY(STA, addr),
|
||||
AssemblyLine.implied(TXA),
|
||||
AssemblyLine.implied(INY),
|
||||
AssemblyLine.indexedY(STA, addr))
|
||||
} else {
|
||||
compile(ctx, source, someTuple, BranchSpec.None) ++ List(
|
||||
AssemblyLine.implied(PHA),
|
||||
AssemblyLine.implied(TXA),
|
||||
AssemblyLine.implied(PHA)) ++ prepare ++ List(
|
||||
AssemblyLine.immediate(LDY, offset+1),
|
||||
AssemblyLine.implied(PLA),
|
||||
AssemblyLine.indexedY(STA, addr),
|
||||
AssemblyLine.implied(PLA),
|
||||
AssemblyLine.implied(DEY),
|
||||
AssemblyLine.indexedY(STA, addr))
|
||||
fastTarget match {
|
||||
case Some((baseOffset, initializeY)) =>
|
||||
compile(ctx, source, someTuple, BranchSpec.None) ++
|
||||
preserveRegisterIfNeeded(ctx, MosRegister.AX, initializeY(offset)) ++
|
||||
List(
|
||||
AssemblyLine.absoluteY(STA, baseOffset),
|
||||
AssemblyLine.implied(TXA),
|
||||
AssemblyLine.implied(INY),
|
||||
AssemblyLine.absoluteY(STA, baseOffset))
|
||||
case _ =>
|
||||
if (prepare.isEmpty) {
|
||||
compile(ctx, source, someTuple, BranchSpec.None) ++ List(
|
||||
AssemblyLine.immediate(LDY, offset),
|
||||
AssemblyLine.indexedY(STA, addr),
|
||||
AssemblyLine.implied(TXA),
|
||||
AssemblyLine.implied(INY),
|
||||
AssemblyLine.indexedY(STA, addr))
|
||||
} else {
|
||||
compile(ctx, source, someTuple, BranchSpec.None) ++ List(
|
||||
AssemblyLine.implied(PHA),
|
||||
AssemblyLine.implied(TXA),
|
||||
AssemblyLine.implied(PHA)) ++ prepare ++ List(
|
||||
AssemblyLine.immediate(LDY, offset+1),
|
||||
AssemblyLine.implied(PLA),
|
||||
AssemblyLine.indexedY(STA, addr),
|
||||
AssemblyLine.implied(PLA),
|
||||
AssemblyLine.implied(DEY),
|
||||
AssemblyLine.indexedY(STA, addr))
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
ctx.log.error("Cannot assign to a large object indirectly", target.position)
|
||||
|
Loading…
x
Reference in New Issue
Block a user