diff --git a/src/main/scala/millfork/assembly/opt/EmptyMemoryStoreRemoval.scala b/src/main/scala/millfork/assembly/opt/EmptyMemoryStoreRemoval.scala index 375bc078..033600ba 100644 --- a/src/main/scala/millfork/assembly/opt/EmptyMemoryStoreRemoval.scala +++ b/src/main/scala/millfork/assembly/opt/EmptyMemoryStoreRemoval.scala @@ -30,10 +30,17 @@ object EmptyMemoryStoreRemoval extends AssemblyOptimization { case AssemblyLine(_, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) case _ => None }.toSet + val variablesWithAddressesTaken = code.flatMap { + case AssemblyLine(_, Immediate, SubbyteConstant(MemoryAddressConstant(th), _), _) => + Some(th.name) + case AssemblyLine(_, Immediate, HalfWordConstant(MemoryAddressConstant(th), _), _) => + Some(th.name) + case _ => None + }.toSet val allLocalVariables = f.environment.getAllLocalVariables val localVariables = allLocalVariables.filter { case MemoryVariable(name, typ, VariableAllocationMethod.Auto | VariableAllocationMethod.Zeropage) => - typ.size > 0 && !paramVariables(name) && stillUsedVariables(name) + typ.size > 0 && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) case _ => false } diff --git a/src/main/scala/millfork/assembly/opt/FlowAnalyzerForImmediate.scala b/src/main/scala/millfork/assembly/opt/FlowAnalyzerForImmediate.scala index d2b19052..c3ac668c 100644 --- a/src/main/scala/millfork/assembly/opt/FlowAnalyzerForImmediate.scala +++ b/src/main/scala/millfork/assembly/opt/FlowAnalyzerForImmediate.scala @@ -111,25 +111,27 @@ object FlowAnalyzerForImmediate { }, AND -> {(nn, currentStatus) => val n = nn & 0xff - val newA = currentStatus.a.map(_ & n) + val newA = if (n == 0) Status.SingleZero else currentStatus.a.map(_ & n) + val newN = if (n <= 0x7f) Status.SingleFalse else newA.n() currentStatus.copy( - n = newA.n(), + n = newN, z = newA.z(), a = newA, - a7 = if ((nn & 0x80) != 0) currentStatus.a7 else Status.SingleFalse, - a0 = if ((nn & 1) != 0) currentStatus.a0 else Status.SingleFalse, + a7 = if ((nn & 0x80) != 0) newA.bit7 else Status.SingleFalse, + a0 = if ((nn & 1) != 0) newA.bit0 else Status.SingleFalse, src = SourceOfNZ.A) }, ANC -> {(nn, currentStatus) => val n = nn & 0xff - val newA = currentStatus.a.map(_ & n) + val newA = if (n == 0) Status.SingleZero else currentStatus.a.map(_ & n) + val newNC = if (n <= 0x7f) Status.SingleFalse else newA.n() currentStatus.copy( - n = newA.n(), - c = newA.n(), + n = newNC, + c = newNC, z = newA.z(), a = newA, - a7 = if ((nn & 0x80) != 0) currentStatus.a7 else Status.SingleFalse, - a0 = if ((nn & 1) != 0) currentStatus.a0 else Status.SingleFalse, + a7 = if ((nn & 0x80) != 0) newA.bit7 else Status.SingleFalse, + a0 = if ((nn & 1) != 0) newA.bit0 else Status.SingleFalse, src = SourceOfNZ.A) }, ORA -> {(nn, currentStatus) => diff --git a/src/main/scala/millfork/assembly/opt/ReverseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/opt/ReverseFlowAnalyzer.scala index 3a4ce681..a0619c5b 100644 --- a/src/main/scala/millfork/assembly/opt/ReverseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/opt/ReverseFlowAnalyzer.scala @@ -184,6 +184,11 @@ object ReverseFlowAnalyzer { if ((n & 0x40) != 0) currentImportance = currentImportance.copy(v = Unimportant) if ((n & 0x80) != 0) currentImportance = currentImportance.copy(n = Unimportant) + case AssemblyLine(ANC, _, NumericConstant(0, _), _) => + currentImportance = currentImportance.copy(c = Unimportant, n = Unimportant, z = Unimportant, a = Unimportant) + case AssemblyLine(AND, _, NumericConstant(0, _), _) => + currentImportance = currentImportance.copy(n = Unimportant, z = Unimportant, a = Unimportant) + case AssemblyLine(opcode, addrMode, _, _) => val reallyIgnoreC = currentImportance.c == Unimportant && diff --git a/src/main/scala/millfork/assembly/opt/UndocumentedOptimizations.scala b/src/main/scala/millfork/assembly/opt/UndocumentedOptimizations.scala index 05e283b5..a4ce1e0c 100644 --- a/src/main/scala/millfork/assembly/opt/UndocumentedOptimizations.scala +++ b/src/main/scala/millfork/assembly/opt/UndocumentedOptimizations.scala @@ -19,6 +19,7 @@ object UndocumentedOptimizations { private val LdxAddrModes = Set(ZeroPage, Absolute, Immediate, AbsoluteY, ZeroPageY) private val LaxAddrModeRestriction = Not(HasAddrModeIn(Set(AbsoluteX, ZeroPageX, IndexedX, Immediate))) + private val SaxAddrModeRestriction = HasAddrModeIn(Set(IndexedX, ZeroPage, Absolute, AbsoluteY)) //noinspection ScalaUnnecessaryParentheses val UseLax = new RuleBasedAssemblyOptimization("Using undocumented instruction LAX", @@ -158,6 +159,10 @@ object UndocumentedOptimizations { (Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~~> { code => List(AssemblyLine.immediate(ALR, code.head.parameter)) }, + (Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~ + (Elidable & HasOpcode(AND) & HasAddrMode(Immediate)) ~~> { code => + List(AssemblyLine.immediate(ALR, code.last.parameter.asl(1).loByte)) + }, (Elidable & HasOpcode(LSR) & HasAddrMode(Implied)) ~ (Elidable & HasOpcode(CLC)) ~~> { _ => List(AssemblyLine.immediate(ALR, 0xFE)) @@ -363,6 +368,43 @@ object UndocumentedOptimizations { (Elidable & HasOpcode(TAX)) ~~> { (code, ctx) => List(code.head.copy(opcode = LAX), AssemblyLine(SBX, Immediate, ctx.get[Constant](2))) }, + (Elidable & HasOpcode(LDA) & LaxAddrModeRestriction & MatchAddrMode(0) & MatchParameter(1)) ~ + (Not(ReadsX) & HasOpcodeIn(Set(ANC, ALR, ARR, ADC, AND, EOR, ORA, ADC, SBC, SEC, CLC, STA, LDY, STY)) | + HasAddrMode(Implied) & HasOpcodeIn(Set(ASL, LSR, ROL, ROR, TAY, TYA))).* ~ + (Elidable & HasOpcode(LDA) & MatchAddrMode(0) & MatchParameter(1)) ~ + HasOpcode(LDY).? ~ + (Elidable & HasOpcode(AND)) ~ + HasOpcode(LDY).? ~ + (Elidable & HasOpcode(STA) & SaxAddrModeRestriction & DoesntMatterWhatItDoesWith(State.X)) ~ + (Elidable & (HasOpcode(TAX) | HasOpcodeIn(Set(LDA, LDX, LAX)) & MatchAddrMode(0) & MatchParameter(1))).? ~~> { (code, ctx) => + var rest = code + var result = List[AssemblyLine]() + rest.last.opcode match { + case STA => () + case TAX | LDX => rest = rest.init + case LDA | LAX => + rest = rest.init + result = List(AssemblyLine.implied(TXA)) + } + result = rest.last.copy(opcode = SAX) :: result + rest = rest.init + rest.last.opcode match { + case LDY => + result = rest.last :: result + rest = rest.init + case AND => () + } + result = rest.last.copy(opcode = LDA) :: result + rest = rest.init + rest.last.opcode match { + case LDY => + result = rest.last :: result + rest = rest.init + case LDA => () + } + rest = rest.init + rest.head.copy(opcode = LAX) :: (rest.tail ++ result) + }, ) private def idempotent(illegal: Opcode.Value, pointless: Opcode.Value) = diff --git a/src/main/scala/millfork/assembly/opt/VariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/opt/VariableToRegisterOptimization.scala index 4b81dadd..e54e407e 100644 --- a/src/main/scala/millfork/assembly/opt/VariableToRegisterOptimization.scala +++ b/src/main/scala/millfork/assembly/opt/VariableToRegisterOptimization.scala @@ -7,7 +7,6 @@ import millfork.assembly.AddrMode._ import millfork.env._ import millfork.error.ErrorReporting -import scala.annotation.tailrec import scala.collection.mutable.ListBuffer /** @@ -15,6 +14,13 @@ import scala.collection.mutable.ListBuffer */ object VariableToRegisterOptimization extends AssemblyOptimization { + object CyclesAndBytes { + val Zero = CyclesAndBytes(0, 0) + } + case class CyclesAndBytes(bytes: Int, cycles: Int) { + def +(that: CyclesAndBytes) = CyclesAndBytes(this.bytes + that.bytes, this.cycles + that.cycles) + } + case class Features( blastProcessing: Boolean, izIsAlwaysZero: Boolean, @@ -128,15 +134,16 @@ object VariableToRegisterOptimization extends AssemblyOptimization { v.name -> VariableLifetime.apply(v.name, code) ) + val costFunction: CyclesAndBytes => Int = if (options.flag(CompilationFlag.OptimizeForSpeed)) _.cycles else _.bytes val importances = ReverseFlowAnalyzer.analyze(f, code) val blastProcessing = options.flag(CompilationFlag.OptimizeForSonicSpeed) val identityArray = f.environment.maybeGet[ThingInMemory]("identity$").map(MemoryAddressConstant).getOrElse(Constant.Zero) val izIsAlwaysZero = !options.flag(CompilationFlag.Emit65CE02Opcodes) val features = Features( - blastProcessing =options.flag(CompilationFlag.OptimizeForSonicSpeed), - izIsAlwaysZero = !options.flag(CompilationFlag.Emit65CE02Opcodes), + blastProcessing = blastProcessing, + izIsAlwaysZero = izIsAlwaysZero, indexRegisterTransfers = options.flag(CompilationFlag.EmitEmulation65816Opcodes), - identityArray = f.environment.maybeGet[ThingInMemory]("identity$").map(MemoryAddressConstant).getOrElse(Constant.Zero) + identityArray = identityArray ) val xCandidates = variablesWithLifetimes.filter { @@ -145,7 +152,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { }.flatMap { case (vName, range) => canBeInlined(Some(vName), None, None, features, code.zip(importances).slice(range.start, range.end)).map { score => - (vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score) + (vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score) } } @@ -155,7 +162,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { }.flatMap { case (vName, range) => canBeInlined(None, Some(vName), None, features, code.zip(importances).slice(range.start, range.end)).map { score => - (vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score) + (vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score) } } @@ -165,7 +172,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { }.flatMap { case (vName, range) => canBeInlined(None, None, Some(vName), features, code.zip(importances).slice(range.start, range.end)).map { score => - (vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score) + (vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score) } } @@ -180,7 +187,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { synced = false, vName, code.zip(importances).slice(range.start, range.end)).map { score => - (vName, range, if (variablesWithRegisterHint(vName)) score + 16 else score) + (vName, range, if (variablesWithRegisterHint(vName)) score + CyclesAndBytes(16, 16) else score) } } // println(s"X: $xCandidates") @@ -188,10 +195,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // println(s"Z: $zCandidates") // println(s"A: $aCandidates") - val xCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](xCandidates, _._2.start, _._2.end) - val yCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](yCandidates, _._2.start, _._2.end) - val zCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](zCandidates, _._2.start, _._2.end) - val aCandidateSets = NonOverlappingIntervals.apply[(String, Range, Int)](aCandidates, _._2.start, _._2.end) + val xCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](xCandidates, _._2.start, _._2.end) + val yCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](yCandidates, _._2.start, _._2.end) + val zCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](zCandidates, _._2.start, _._2.end) + val aCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](aCandidates, _._2.start, _._2.end) val variants = for { vx <- if (options.flag(CompilationFlag.SingleThreaded)) xCandidateSets else xCandidateSets.par @@ -212,7 +219,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization { if (nx & na).isEmpty if (ny & na).isEmpty - score = vx.toSeq.map(_._3).sum + vy.toSeq.map(_._3).sum + va.toSeq.map(_._3).sum + vz.toSeq.map(_._3).sum + score = vx.toSeq.map(x => costFunction(x._3)).sum + + vy.toSeq.map(x => costFunction(x._3)).sum + + va.toSeq.map(x => costFunction(x._3)).sum + + vz.toSeq.map(x => costFunction(x._3)).sum } yield (score, vx, vy, vz, va) if (variants.isEmpty) { @@ -296,7 +306,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { } // TODO: STA has different flag behaviour than TAX, keep it in mind! - def canBeInlined(xCandidate: Option[String], yCandidate: Option[String], zCandidate: Option[String], features: Features, lines: List[(AssemblyLine, CpuImportance)]): Option[Int] = { + def canBeInlined(xCandidate: Option[String], yCandidate: Option[String], zCandidate: Option[String], features: Features, lines: List[(AssemblyLine, CpuImportance)]): Option[CyclesAndBytes] = { val vx = xCandidate.getOrElse("-") val vy = yCandidate.getOrElse("-") val vz = zCandidate.getOrElse("-") @@ -330,13 +340,23 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(SEP | REP, _, _, _), _) :: xs => None case (AssemblyLine(STY | LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs if th.name == vx => - if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) else None case (AssemblyLine(STX | LDX, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs if th.name == vy => - if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) else None + case (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs + if opcodesIdentityTable(op) && features.blastProcessing => + if (th.name == vx || th.name == vy) { + if (elidable) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 0, cycles = -1)) + else None + } else { + if (th.name == vz) None + else canBeInlined(xCandidate, yCandidate, zCandidate, features, xs) + } + case (AssemblyLine(opcode, Absolute | ZeroPage, MemoryAddressConstant(th), _), _) :: xs if th.name == vx && (opcode == LDY || opcodesThatCannotBeUsedWithIndexRegistersAsParameters(opcode)) => // if a variable is used by some opcodes, then it cannot be assigned to a register @@ -358,9 +378,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // removing LDX saves 3 cycles if (elidable && th.name == vx) { if (imp.z == Unimportant && imp.n == Unimportant) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) } else { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2)) } } else { None @@ -371,7 +391,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // LAX = LDX-LDA, and since LDX simplifies to nothing and LDA simplifies to TXA, // LAX simplifies to TXA, saving two bytes if (elidable && th.name == vx) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) } else { None } @@ -382,9 +402,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // sometimes that LDX has to be converted into CPX#0 if (elidable && th.name == vy) { if (imp.z == Unimportant && imp.n == Unimportant) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2)) } } else { None @@ -393,9 +413,9 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(LDZ, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), imp) :: xs if zCandidate.isDefined => if (elidable && th.name == vz) { if (imp.z == Unimportant && imp.n == Unimportant) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 1) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2)) } } else { None @@ -413,20 +433,11 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // if a register is populated with something else than a variable, then no variable cannot be assigned to that register None - case (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs - if opcodesIdentityTable(op) => - if (th.name == vx || th.name == vy) { - if (elidable) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs) - else None - } else { - if (th.name == vz) None - else canBeInlined(xCandidate, yCandidate, zCandidate, features, xs) - } - case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs if opcodesCommutative(op) => + // LDAw/ANDx -> TXA/ANDw if (th.name == vx || th.name == vy) { - if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) else None } else { if (th.name == vz) None @@ -436,7 +447,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(CLC, _, _, _),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs if opcodesCommutative(op) => if (th.name == vx || th.name == vy) { - if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + if (elidable && elidable2) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) else None } else { if (th.name == vz) None @@ -448,7 +459,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // a variable cannot be inlined if there is TAX not after LDA of that variable // but LDA-TAX can be simplified to TXA if (elidable && elidable2 && th.name == vx) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { None } @@ -458,7 +469,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // a variable cannot be inlined if there is TAY not after LDA of that variable // but LDA-TAY can be simplified to TYA if (elidable && elidable2 && th.name == vy) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { None } @@ -468,16 +479,28 @@ object VariableToRegisterOptimization extends AssemblyOptimization { // a variable cannot be inlined if there is TAZ not after LDA of that variable // but LDA-TAZ can be simplified to TZA if (elidable && elidable2 && th.name == vy) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 3) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { None } - case (AssemblyLine(LDA | STA | INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs => + case (AssemblyLine(LDA | STA, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs => // changing LDA->TXA, STA->TAX, INC->INX, DEC->DEX saves 2 bytes if (th.name == vy || th.name == vx || th.name == vz) { if (elidable) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + } else { + None + } + } else { + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs) + } + + case (AssemblyLine(INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs => + // changing LDA->TXA, STA->TAX, INC->INX, DEC->DEX saves 2 bytes + if (th.name == vy || th.name == vx || th.name == vz) { + if (elidable) { + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4)) } else { None } @@ -486,10 +509,10 @@ object VariableToRegisterOptimization extends AssemblyOptimization { } case (AssemblyLine(STZ, Absolute | ZeroPage, MemoryAddressConstant(th), elidable), _) :: xs => - // changing STZ->LDX saves 2 bytes + // changing STZ->LDX saves 1 byte if (th.name == vy || th.name == vx) { if (elidable && features.izIsAlwaysZero) { - canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + 2) + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2)) } else { None } @@ -524,17 +547,17 @@ object VariableToRegisterOptimization extends AssemblyOptimization { canBeInlined(xCandidate, yCandidate, zCandidate, features, xs) } - case Nil => Some(0) + case Nil => Some(CyclesAndBytes.Zero) } } - def canBeInlinedToAccumulator(options: CompilationOptions, start: Boolean, synced: Boolean, candidate: String, lines: List[(AssemblyLine, CpuImportance)]): Option[Int] = { + def canBeInlinedToAccumulator(options: CompilationOptions, start: Boolean, synced: Boolean, candidate: String, lines: List[(AssemblyLine, CpuImportance)]): Option[CyclesAndBytes] = { val cmos = options.flags(CompilationFlag.EmitCmosOpcodes) lines match { case (AssemblyLine(STA, Absolute | ZeroPage, MemoryAddressConstant(th), true),_) :: xs if th.name == candidate && start || synced => - canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3) + canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4)) case (AssemblyLine(op, _, _, _),_) :: xs if opcodesThatAlwaysPrecludeAAllocation(op) => None @@ -560,14 +583,14 @@ object VariableToRegisterOptimization extends AssemblyOptimization { None case (AssemblyLine(SEP | REP, Immediate, NumericConstant(nn, _), _), _) :: xs => - if ((nn & 0x20) == 0) canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs).map(_ + 3) + if ((nn & 0x20) == 0) canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs) else None case (AssemblyLine(SEP | REP, _, _, _), _) :: xs => None case (AssemblyLine(STA, _, MemoryAddressConstant(th), elidable) ,_):: xs if th.name == candidate => if (synced && elidable) { - canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3) + canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { None } @@ -608,31 +631,31 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs if opcodesCommutative(op) => if (th.name == candidate) { - if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3) + if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) else None } else canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs) case (AssemblyLine(LDA, _, _, elidable),_) :: (AssemblyLine(CLC, _, _, _),_) :: (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidable2),_) :: xs if opcodesCommutative(op) => if (th.name == candidate) { - if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3) + if (elidable && elidable2) canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) else None } else canBeInlinedToAccumulator(options, start = false, synced = synced, candidate, xs) case (AssemblyLine(LDA, Absolute | ZeroPage, MemoryAddressConstant(th), true), imp) :: xs if th.name == candidate => - // removing LDA saves 3 cycles + // removing LDA saves 3 bytes if (imp.z == Unimportant && imp.n == Unimportant) { - canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 3) + canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 3, cycles = 4)) } else { - canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 1) + canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 1, cycles = 2)) } case (AssemblyLine(LDX | LDY | LAX, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs if th.name == candidate => // converting a load into a transfer saves 2 bytes if (elidable) { - canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + 2) + canBeInlinedToAccumulator(options, start = false, synced = true, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) } else { None } @@ -644,7 +667,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(ASL | LSR | ROR | ROL, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs if th.name == candidate => if (elidable) { - canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + 2) + canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4)) } else { None } @@ -652,7 +675,7 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(INC | DEC, Absolute | ZeroPage, MemoryAddressConstant(th), elidable),_) :: xs if th.name == candidate => if (cmos && elidable) { - canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + 2) + canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 4)) } else { None } @@ -660,14 +683,14 @@ object VariableToRegisterOptimization extends AssemblyOptimization { case (AssemblyLine(TXA | TYA, _, _, elidable), imp) :: xs => if (imp.a == Unimportant && imp.c == Unimportant && imp.v == Unimportant && elidable) { // TYA/TXA has to be converted to CPY#0/CPX#0 - canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ - 1) + canBeInlinedToAccumulator(options, start = false, synced = false, candidate, xs).map(_ + CyclesAndBytes(bytes = -1, cycles = 0)) } else { None } case (x, _) :: xs => canBeInlinedToAccumulator(options, start = false, synced = synced && OpcodeClasses.AllLinear(x.opcode), candidate, xs) - case Nil => Some(0) + case Nil => Some(CyclesAndBytes.Zero) } } diff --git a/src/main/scala/millfork/env/Environment.scala b/src/main/scala/millfork/env/Environment.scala index fe712bd5..1d833813 100644 --- a/src/main/scala/millfork/env/Environment.scala +++ b/src/main/scala/millfork/env/Environment.scala @@ -118,6 +118,9 @@ class Environment(val parent: Option[Environment], val prefix: String) { ) } case VariableAllocationMethod.Auto | VariableAllocationMethod.Register | VariableAllocationMethod.Static => + if (m.alloc == VariableAllocationMethod.Register) { + ErrorReporting.warn(s"Failed to inline variable `${m.name}` into a register", options, None) + } m.sizeInBytes match { case 0 => Nil case 2 => diff --git a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala index 0ef54247..f38ef1a1 100644 --- a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala +++ b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala @@ -488,4 +488,22 @@ class AssemblyOptimizationSuite extends FunSuite with Matchers { m.readByte(0xc000) should equal(33) } } + + test("Identity page") { + EmuUltraBenchmarkRun( + """ + | byte output @$c000 + | void main() { + | byte b + | b = f() + | output = (b ^ $40) + b + | } + | noinline byte f () { + | return 3 + | } + """.stripMargin + ){m => + m.readByte(0xc000) should equal(0x46) + } + } } diff --git a/src/test/scala/millfork/test/IllegalSuite.scala b/src/test/scala/millfork/test/IllegalSuite.scala index 6036f9f5..df095e36 100644 --- a/src/test/scala/millfork/test/IllegalSuite.scala +++ b/src/test/scala/millfork/test/IllegalSuite.scala @@ -222,4 +222,24 @@ class IllegalSuite extends FunSuite with Matchers { """.stripMargin) m.readWord(0xc000) should equal(0x105) } + + test("SAX test 2") { + val m = EmuUndocumentedRun(""" + | byte output @$c000 + | void main () { + | byte a + | byte b + | byte c + | b = five(a) + | five(a) + | a = 44 ^ b + | output = b & $41 + | five(a) + | } + | noinline byte five (byte ignored) { + | return 5 + | } + """.stripMargin) + m.readLong(0xc000) should equal(1) + } } diff --git a/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala b/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala index cb3c9c50..e48b5a73 100644 --- a/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala +++ b/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala @@ -16,7 +16,7 @@ object EmuOptimizedInlinedRun extends EmuRun( ZeropageRegisterOptimizations.All ++ OptimizationPresets.Good) { override def inline: Boolean = true - override def blastProcessing: Boolean = false + override def blastProcessing: Boolean = true } diff --git a/src/test/scala/millfork/test/emu/EmuRun.scala b/src/test/scala/millfork/test/emu/EmuRun.scala index e427a261..c018e51d 100644 --- a/src/test/scala/millfork/test/emu/EmuRun.scala +++ b/src/test/scala/millfork/test/emu/EmuRun.scala @@ -105,6 +105,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], CompilationFlag.EmitEmulation65816Opcodes -> (platform.cpu == millfork.Cpu.Sixteen), CompilationFlag.Emit65CE02Opcodes -> (platform.cpu == millfork.Cpu.CE02), CompilationFlag.EmitHudsonOpcodes -> (platform.cpu == millfork.Cpu.HuC6280), + CompilationFlag.OptimizeForSpeed -> blastProcessing, CompilationFlag.OptimizeForSonicSpeed -> blastProcessing // CompilationFlag.CheckIndexOutOfBounds -> true, ))