1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-04-06 20:37:12 +00:00

Various optimization improvements and bugfixes

This commit is contained in:
Karol Stasiak 2018-03-14 18:50:58 +01:00
parent 38f3923d4d
commit 9e8a125487
10 changed files with 189 additions and 68 deletions

View File

@ -30,10 +30,17 @@ object EmptyMemoryStoreRemoval extends AssemblyOptimization {
case AssemblyLine(_, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name)
case _ => None
}.toSet
val variablesWithAddressesTaken = code.flatMap {
case AssemblyLine(_, Immediate, SubbyteConstant(MemoryAddressConstant(th), _), _) =>
Some(th.name)
case AssemblyLine(_, Immediate, HalfWordConstant(MemoryAddressConstant(th), _), _) =>
Some(th.name)
case _ => None
}.toSet
val allLocalVariables = f.environment.getAllLocalVariables
val localVariables = allLocalVariables.filter {
case MemoryVariable(name, typ, VariableAllocationMethod.Auto | VariableAllocationMethod.Zeropage) =>
typ.size > 0 && !paramVariables(name) && stillUsedVariables(name)
typ.size > 0 && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name)
case _ => false
}

View File

@ -111,25 +111,27 @@ object FlowAnalyzerForImmediate {
},
AND -> {(nn, currentStatus) =>
val n = nn & 0xff
val newA = currentStatus.a.map(_ & n)
val newA = if (n == 0) Status.SingleZero else currentStatus.a.map(_ & n)
val newN = if (n <= 0x7f) Status.SingleFalse else newA.n()
currentStatus.copy(
n = newA.n(),
n = newN,
z = newA.z(),
a = newA,
a7 = if ((nn & 0x80) != 0) currentStatus.a7 else Status.SingleFalse,
a0 = if ((nn & 1) != 0) currentStatus.a0 else Status.SingleFalse,
a7 = if ((nn & 0x80) != 0) newA.bit7 else Status.SingleFalse,
a0 = if ((nn & 1) != 0) newA.bit0 else Status.SingleFalse,
src = SourceOfNZ.A)
},
ANC -> {(nn, currentStatus) =>
val n = nn & 0xff
val newA = currentStatus.a.map(_ & n)
val newA = if (n == 0) Status.SingleZero else currentStatus.a.map(_ & n)
val newNC = if (n <= 0x7f) Status.SingleFalse else newA.n()
currentStatus.copy(
n = newA.n(),
c = newA.n(),
n = newNC,
c = newNC,
z = newA.z(),
a = newA,
a7 = if ((nn & 0x80) != 0) currentStatus.a7 else Status.SingleFalse,
a0 = if ((nn & 1) != 0) currentStatus.a0 else Status.SingleFalse,
a7 = if ((nn & 0x80) != 0) newA.bit7 else Status.SingleFalse,
a0 = if ((nn & 1) != 0) newA.bit0 else Status.SingleFalse,
src = SourceOfNZ.A)
},
ORA -> {(nn, currentStatus) =>

View File

@ -184,6 +184,11 @@ object ReverseFlowAnalyzer {
if ((n & 0x40) != 0) currentImportance = currentImportance.copy(v = Unimportant)
if ((n & 0x80) != 0) currentImportance = currentImportance.copy(n = Unimportant)
case AssemblyLine(ANC, _, NumericConstant(0, _), _) =>
currentImportance = currentImportance.copy(c = Unimportant, n = Unimportant, z = Unimportant, a = Unimportant)
case AssemblyLine(AND, _, NumericConstant(0, _), _) =>
currentImportance = currentImportance.copy(n = Unimportant, z = Unimportant, a = Unimportant)
case AssemblyLine(opcode, addrMode, _, _) =>
val reallyIgnoreC =
currentImportance.c == Unimportant &&

View File

@ -19,6 +19,7 @@ object UndocumentedOptimizations {
private val LdxAddrModes = Set(ZeroPage, Absolute, Immediate, AbsoluteY, ZeroPageY)
private val LaxAddrModeRestriction = Not(HasAddrModeIn(Set(AbsoluteX, ZeroPageX, IndexedX, Immediate)))
private val SaxAddrModeRestriction = HasAddrModeIn(Set(IndexedX, ZeroPage, Absolute, AbsoluteY))
//noinspection ScalaUnnecessaryParentheses
val UseLax = new RuleBasedAssemblyOptimization("Using undocumented instruction LAX",
@ -158,6 +159,10 @@ object UndocumentedOptimizations {
(Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~~> { code =>
List(AssemblyLine.immediate(ALR, code.head.parameter))
},
(Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~
(Elidable & HasOpcode(AND) & HasAddrMode(Immediate)) ~~> { code =>
List(AssemblyLine.immediate(ALR, code.last.parameter.asl(1).loByte))
},
(Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~
(Elidable & HasOpcode(CLC)) ~~> { _ =>
List(AssemblyLine.immediate(ALR, 0xFE))
@ -363,6 +368,43 @@ object UndocumentedOptimizations {
(Elidable & HasOpcode(TAX)) ~~> { (code, ctx) =>
List(code.head.copy(opcode = LAX), AssemblyLine(SBX, Immediate, ctx.get[Constant](2)))
},
(Elidable & HasOpcode(LDA) & LaxAddrModeRestriction & MatchAddrMode(0) & MatchParameter(1)) ~
(Not(ReadsX) & HasOpcodeIn(Set(ANC, ALR, ARR, ADC, AND, EOR, ORA, ADC, SBC, SEC, CLC, STA, LDY, STY)) |
HasAddrMode(Implied) & HasOpcodeIn(Set(ASL, LSR, ROL, ROR, TAY, TYA))).* ~
(Elidable & HasOpcode(LDA) & MatchAddrMode(0) & MatchParameter(1)) ~
HasOpcode(LDY).? ~
(Elidable & HasOpcode(AND)) ~
HasOpcode(LDY).? ~
(Elidable & HasOpcode(STA) & SaxAddrModeRestriction & DoesntMatterWhatItDoesWith(State.X)) ~
(Elidable & (HasOpcode(TAX) | HasOpcodeIn(Set(LDA, LDX, LAX)) & MatchAddrMode(0) & MatchParameter(1))).? ~~> { (code, ctx) =>
var rest = code
var result = List[AssemblyLine]()
rest.last.opcode match {
case STA => ()
case TAX | LDX => rest = rest.init
case LDA | LAX =>
rest = rest.init
result = List(AssemblyLine.implied(TXA))
}
result = rest.last.copy(opcode = SAX) :: result
rest = rest.init
rest.last.opcode match {
case LDY =>
result = rest.last :: result
rest = rest.init
case AND => ()
}
result = rest.last.copy(opcode = LDA) :: result
rest = rest.init
rest.last.opcode match {
case LDY =>
result = rest.last :: result
rest = rest.init
case LDA => ()
}
rest = rest.init
rest.head.copy(opcode = LAX) :: (rest.tail ++ result)
},
)
private def idempotent(illegal: Opcode.Value, pointless: Opcode.Value) =

View File

@ -7,7 +7,6 @@ import millfork.assembly.AddrMode._
import millfork.env._
import millfork.error.ErrorReporting
import scala.annotation.tailrec
import scala.collection.mutable.ListBuffer
/**
@ -15,6 +14,13 @@ import scala.collection.mutable.ListBuffer
*/
object VariableToRegisterOptimization extends AssemblyOptimization {
object CyclesAndBytes {
val Zero = CyclesAndBytes(0, 0)
}
case class CyclesAndBytes(bytes: Int, cycles: Int) {
def +(that: CyclesAndBytes) = CyclesAndBytes(this.bytes + that.bytes, this.cycles + that.cycles)
}
case class Features(
blastProcessing: Boolean,
izIsAlwaysZero: Boolean,
@ -128,15 +134,16 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
v.name -> VariableLifetime.apply(v.name, code)
)
val costFunction: CyclesAndBytes => Int = if (options.flag(CompilationFlag.OptimizeForSpeed)) _.cycles else _.bytes
val importances = ReverseFlowAnalyzer.analyze(f, code)
val blastProcessing = options.flag(CompilationFlag.OptimizeForSonicSpeed)
val identityArray = f.environment.maybeGet[ThingInMemory]("identity$").map(MemoryAddressConstant).getOrElse(Constant.Zero)
val izIsAlwaysZero = !options.flag(CompilationFlag.Emit65CE02Opcodes)
val features = Features(
blastProcessing =options.flag(CompilationFlag.OptimizeForSonicSpeed),
izIsAlwaysZero = !options.flag(CompilationFlag.Emit65CE02Opcodes),
blastProcessing = blastProcessing,
izIsAlwaysZero = izIsAlwaysZero,
indexRegisterTransfers = options.flag(CompilationFlag.EmitEmulation65816Opcodes),
identityArray = f.environment.maybeGet[ThingInMemory]("identity$").map(MemoryAddressConstant).getOrElse(Constant.Zero)
identityArray = identityArray
)
val xCandidates = variablesWithLifetimes.filter {
@ -145,7 +152,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
}.flatMap {
case (vName, range) =>
canBeInlined(Some(vName), None, None, features, code.zip(importances).slice(range.start, range.end)).map { score =>
(vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score)
(vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score)
}
}
@ -155,7 +162,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
}.flatMap {
case (vName, range) =>
canBeInlined(None, Some(vName), None, features, code.zip(importances).slice(range.start, range.end)).map { score =>
(vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score)
(vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score)
}
}
@ -165,7 +172,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
}.flatMap {
case (vName, range) =>
canBeInlined(None, None, Some(vName), features, code.zip(importances).slice(range.start, range.end)).map { score =>
(vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score)
(vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score)
}
}
@ -180,7 +187,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
synced = false,
vName,
code.zip(importances).slice(range.start, range.end)).map { score =>
(vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score)
(vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score)
}
}
// println(s"X: $xCandidates")
@ -188,10 +195,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// println(s"Z: $zCandidates")
// println(s"A: $aCandidates")
val xCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](xCandidates, _._2.start, _._2.end)
val yCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](yCandidates, _._2.start, _._2.end)
val zCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](zCandidates, _._2.start, _._2.end)
val aCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](aCandidates, _._2.start, _._2.end)
val xCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](xCandidates, _._2.start, _._2.end)
val yCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](yCandidates, _._2.start, _._2.end)
val zCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](zCandidates, _._2.start, _._2.end)
val aCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](aCandidates, _._2.start, _._2.end)
val variants = for {
vx <- if (options.flag(CompilationFlag.SingleThreaded)) xCandidateSets else xCandidateSets.par
@ -212,7 +219,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
if (nx & na).isEmpty
if (ny & na).isEmpty
score = vx.toSeq.map(_._3).sum + vy.toSeq.map(_._3).sum + va.toSeq.map(_._3).sum + vz.toSeq.map(_._3).sum
score = vx.toSeq.map(x => costFunction(x._3)).sum +
vy.toSeq.map(x => costFunction(x._3)).sum +
va.toSeq.map(x => costFunction(x._3)).sum +
vz.toSeq.map(x => costFunction(x._3)).sum
} yield (score, vx, vy, vz, va)
if (variants.isEmpty) {
@ -296,7 +306,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
}
// TODO: STA has different flag behaviour than TAX, keep it in mind!
def canBeInlined(xCandidate: Option[String], yCandidate: Option[String], zCandidate: Option[String], features: Features, lines: List[(AssemblyLine, CpuImportance)]): Option[Int] = {
def canBeInlined(xCandidate: Option[String], yCandidate: Option[String], zCandidate: Option[String], features: Features, lines: List[(AssemblyLine, CpuImportance)]): Option[CyclesAndBytes] = {
val vx = xCandidate.getOrElse("-")
val vy = yCandidate.getOrElse("-")
val vz = zCandidate.getOrElse("-")
@ -330,13 +340,23 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(SEP | REP, _, _, _), _) :: xs => None
case (AssemblyLine(STY | LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs if th.name == vx =>
if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
else None
case (AssemblyLine(STX | LDX, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs if th.name == vy =>
if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
else None
case (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs
if opcodesIdentityTable(op) && features.blastProcessing =>
if (th.name == vx || th.name == vy) {
if (elidable) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 0, cycles = -1))
else None
} else {
if (th.name == vz) None
else canBeInlined(xCandidate, yCandidate, zCandidate, features, xs)
}
case (AssemblyLine(opcode, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs
if th.name == vx && (opcode == LDY || opcodesThatCannotBeUsedWithIndexRegistersAsParameters(opcode)) =>
// if a variable is used by some opcodes, then it cannot be assigned to a register
@ -358,9 +378,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// removing LDX saves 3 cycles
if (elidable && th.name == vx) {
if (imp.z == Unimportant && imp.n == Unimportant) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
} else {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2))
}
} else {
None
@ -371,7 +391,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// LAX = LDX-LDA, and since LDX simplifies to nothing and LDA simplifies to TXA,
// LAX simplifies to TXA, saving two bytes
if (elidable && th.name == vx) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
} else {
None
}
@ -382,9 +402,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// sometimes that LDX has to be converted into CPX#0
if (elidable && th.name == vy) {
if (imp.z == Unimportant && imp.n == Unimportant) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2))
}
} else {
None
@ -393,9 +413,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(LDZ, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), imp) :: xs if zCandidate.isDefined =>
if (elidable && th.name == vz) {
if (imp.z == Unimportant && imp.n == Unimportant) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2))
}
} else {
None
@ -413,20 +433,11 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// if a register is populated with something else than a variable, then no variable cannot be assigned to that register
None
case (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs
if opcodesIdentityTable(op) =>
if (th.name == vx || th.name == vy) {
if (elidable) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs)
else None
} else {
if (th.name == vz) None
else canBeInlined(xCandidate, yCandidate, zCandidate, features, xs)
}
case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs
if opcodesCommutative(op) =>
// LDAw/ANDx -> TXA/ANDw
if (th.name == vx || th.name == vy) {
if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
else None
} else {
if (th.name == vz) None
@ -436,7 +447,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(CLC, _, _, _),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs
if opcodesCommutative(op) =>
if (th.name == vx || th.name == vy) {
if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
else None
} else {
if (th.name == vz) None
@ -448,7 +459,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// a variable cannot be inlined if there is TAX not after LDA of that variable
// but LDA-TAX can be simplified to TXA
if (elidable && elidable2 && th.name == vx) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
None
}
@ -458,7 +469,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// a variable cannot be inlined if there is TAY not after LDA of that variable
// but LDA-TAY can be simplified to TYA
if (elidable && elidable2 && th.name == vy) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
None
}
@ -468,16 +479,28 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
// a variable cannot be inlined if there is TAZ not after LDA of that variable
// but LDA-TAZ can be simplified to TZA
if (elidable && elidable2 && th.name == vy) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
None
}
case (AssemblyLine(LDA | STA | INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs =>
case (AssemblyLine(LDA | STA, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs =>
// changing LDA->TXA, STA->TAX, INC->INX, DEC->DEX saves 2 bytes
if (th.name == vy || th.name == vx || th.name == vz) {
if (elidable) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
} else {
None
}
} else {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs)
}
case (AssemblyLine(INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs =>
// changing LDA->TXA, STA->TAX, INC->INX, DEC->DEX saves 2 bytes
if (th.name == vy || th.name == vx || th.name == vz) {
if (elidable) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4))
} else {
None
}
@ -486,10 +509,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
}
case (AssemblyLine(STZ, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs =>
// changing STZ->LDX saves 2 bytes
// changing STZ->LDX saves 1 byte
if (th.name == vy || th.name == vx) {
if (elidable && features.izIsAlwaysZero) {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2)
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2))
} else {
None
}
@ -524,17 +547,17 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
canBeInlined(xCandidate, yCandidate, zCandidate, features, xs)
}
case Nil => Some(0)
case Nil => Some(CyclesAndBytes.Zero)
}
}
def canBeInlinedToAccumulator(options: CompilationOptions, start: Boolean, synced: Boolean, candidate: String, lines: List[(AssemblyLine, CpuImportance)]): Option[Int] = {
def canBeInlinedToAccumulator(options: CompilationOptions, start: Boolean, synced: Boolean, candidate: String, lines: List[(AssemblyLine, CpuImportance)]): Option[CyclesAndBytes] = {
val cmos = options.flags(CompilationFlag.EmitCmosOpcodes)
lines match {
case (AssemblyLine(STA, Absolute | ZeroPage, MemoryAddressConstant(th), true),_) :: xs
if th.name == candidate && start || synced =>
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3)
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4))
case (AssemblyLine(op, _, _, _),_) :: xs if opcodesThatAlwaysPrecludeAAllocation(op) =>
None
@ -560,14 +583,14 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
None
case (AssemblyLine(SEP | REP, Immediate, NumericConstant(nn, _), _), _) :: xs =>
if ((nn & 0x20) == 0) canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs).map(_ + 3)
if ((nn & 0x20) == 0) canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs)
else None
case (AssemblyLine(SEP | REP, _, _, _), _) :: xs => None
case (AssemblyLine(STA, _, MemoryAddressConstant(th), elidable) ,_):: xs if th.name == candidate =>
if (synced && elidable) {
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3)
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
None
}
@ -608,31 +631,31 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs
if opcodesCommutative(op) =>
if (th.name == candidate) {
if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3)
if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
else None
} else canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs)
case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(CLC, _, _, _),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs
if opcodesCommutative(op) =>
if (th.name == candidate) {
if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3)
if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
else None
} else canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs)
case (AssemblyLine(LDA, Absolute | ZeroPage, MemoryAddressConstant(th), true), imp) :: xs
if th.name == candidate =>
// removing LDA saves 3 cycles
// removing LDA saves 3 bytes
if (imp.z == Unimportant && imp.n == Unimportant) {
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3)
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4))
} else {
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 1)
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2))
}
case (AssemblyLine(LDX | LDY | LAX, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs
if th.name == candidate =>
// converting a load into a transfer saves 2 bytes
if (elidable) {
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 2)
canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2))
} else {
None
}
@ -644,7 +667,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(ASL | LSR | ROR | ROL, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs
if th.name == candidate =>
if (elidable) {
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + 2)
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4))
} else {
None
}
@ -652,7 +675,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs
if th.name == candidate =>
if (cmos && elidable) {
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + 2)
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4))
} else {
None
}
@ -660,14 +683,14 @@ object VariableToRegisterOptimization extends AssemblyOptimization {
case (AssemblyLine(TXA | TYA, _, _, elidable), imp) :: xs =>
if (imp.a == Unimportant && imp.c == Unimportant && imp.v == Unimportant && elidable) {
// TYA/TXA has to be converted to CPY#0/CPX#0
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ - 1)
canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = -1, cycles = 0))
} else {
None
}
case (x, _) :: xs => canBeInlinedToAccumulator(options, start = false, synced = synced && OpcodeClasses.AllLinear(x.opcode), candidate, xs)
case Nil => Some(0)
case Nil => Some(CyclesAndBytes.Zero)
}
}

View File

@ -118,6 +118,9 @@ class Environment(val parent: Option[Environment], val prefix: String) {
)
}
case VariableAllocationMethod.Auto | VariableAllocationMethod.Register | VariableAllocationMethod.Static =>
if (m.alloc == VariableAllocationMethod.Register) {
ErrorReporting.warn(s"Failed to inline variable `${m.name}` into a register", options, None)
}
m.sizeInBytes match {
case 0 => Nil
case 2 =>

View File

@ -488,4 +488,22 @@ class AssemblyOptimizationSuite extends FunSuite with Matchers {
m.readByte(0xc000) should equal(33)
}
}
test("Identity page") {
EmuUltraBenchmarkRun(
"""
| byte output @$c000
| void main() {
| byte b
| b = f()
| output = (b ^ $40) + b
| }
| noinline byte f () {
| return 3
| }
""".stripMargin
){m =>
m.readByte(0xc000) should equal(0x46)
}
}
}

View File

@ -222,4 +222,24 @@ class IllegalSuite extends FunSuite with Matchers {
""".stripMargin)
m.readWord(0xc000) should equal(0x105)
}
test("SAX test 2") {
val m = EmuUndocumentedRun("""
| byte output @$c000
| void main () {
| byte a
| byte b
| byte c
| b = five(a)
| five(a)
| a = 44 ^ b
| output = b & $41
| five(a)
| }
| noinline byte five (byte ignored) {
| return 5
| }
""".stripMargin)
m.readLong(0xc000) should equal(1)
}
}

View File

@ -16,7 +16,7 @@ object EmuOptimizedInlinedRun extends EmuRun(
ZeropageRegisterOptimizations.All ++
OptimizationPresets.Good) {
override def inline: Boolean = true
override def blastProcessing: Boolean = false
override def blastProcessing: Boolean = true
}

View File

@ -105,6 +105,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization],
CompilationFlag.EmitEmulation65816Opcodes -> (platform.cpu == millfork.Cpu.Sixteen),
CompilationFlag.Emit65CE02Opcodes -> (platform.cpu == millfork.Cpu.CE02),
CompilationFlag.EmitHudsonOpcodes -> (platform.cpu == millfork.Cpu.HuC6280),
CompilationFlag.OptimizeForSpeed -> blastProcessing,
CompilationFlag.OptimizeForSonicSpeed -> blastProcessing
// CompilationFlag.CheckIndexOutOfBounds -> true,
))