From 5f1448450316c44a2088ac085fc302692da8dc0f Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Mon, 16 Jul 2018 23:10:55 +0200 Subject: [PATCH] More Z80 optimizations. Finally almost as fast as C. --- .../z80/opt/AlwaysGoodZ80Optimizations.scala | 167 +++++++- .../z80/opt/EmptyMemoryStoreRemoval.scala | 58 +-- .../z80/opt/EmptyParameterStoreRemoval.scala | 72 ++++ .../z80/opt/ReverseFlowAnalyzer.scala | 63 ++- .../opt/RuleBasedAssemblyOptimization.scala | 16 + .../assembly/z80/opt/VariableStatus.scala | 91 ++++ .../WordVariableToRegisterOptimization.scala | 398 ++++++++++++++++++ .../z80/opt/Z80OptimizationPresets.scala | 13 +- 8 files changed, 810 insertions(+), 68 deletions(-) create mode 100644 src/main/scala/millfork/assembly/z80/opt/EmptyParameterStoreRemoval.scala create mode 100644 src/main/scala/millfork/assembly/z80/opt/VariableStatus.scala create mode 100644 src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodZ80Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodZ80Optimizations.scala index 18900748..1c68a87b 100644 --- a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodZ80Optimizations.scala +++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodZ80Optimizations.scala @@ -169,24 +169,82 @@ object AlwaysGoodZ80Optimizations { (Elidable & Is8BitLoad(register, register)) ~~> (_ => Nil) ), // 42-48 - for7Registers(register => - (Elidable & Is8BitLoadTo(register) & MatchSourceRegisterAndOffset(0)) ~ + for6Registers(register => + (Elidable & Is8BitLoadTo(register) & MatchSourceRegisterAndOffset(0) & MatchParameterOrNothing(1)) ~ (Linear & Not(Concerns(register)) & DoesntChangeMatchedRegisterAndOffset(0)).* ~ - (Elidable & HasOpcodeIn(Set(ADD, ADC, XOR, OR, AND, CP, SUB, SBC)) & HasRegisters(OneRegister(register)) & DoesntMatterWhatItDoesWith(register)) ~~> ((code,ctx) => - code.tail.init :+ code.last.copy(registers = ctx.get[RegisterAndOffset](0).toOneRegister) - ) + (Elidable & HasOpcodeIn(Set(ADD, ADC, XOR, OR, AND, CP, SUB, SBC)) & + HasRegisters(OneRegister(register)) & DoesntMatterWhatItDoesWith(register)) ~~> { (code, ctx) => + code.tail.init :+ code.last.copy(registers = ctx.get[RegisterAndOffset](0).toOneRegister, parameter = ctx.get[Constant](1)) + } ), + // 49-54 + MultipleAssemblyRules { + import ZRegister._ + val regs = Seq((BC, B, C), (DE, D, E), (HL, H, L)) + for { + (t, th, tl) <- regs + (s, sh, sl) <- regs + if t != HL + if t != s + } yield { + // TODO: make it a bit more universal + (Elidable & Is8BitLoad(th, sh)) ~ + (Elidable & Is8BitLoad(tl, sl)) ~ + (HasOpcode(OR) & HasRegisterParam(A)).?.capture(1) ~ + (Elidable & HasOpcodeIn(Set(ADD_16, ADC_16, SBC_16)) & HasRegisters(TwoRegisters(HL, t)) & DoesntMatterWhatItDoesWith(t)) ~~> { + (code, ctx) => + ctx.get[List[ZLine]](1) :+ code.last.copy(registers = TwoRegisters(HL, s)) + } + } + }, + ) val PointlessStackStashing = new RuleBasedAssemblyOptimization("Pointless stack stashing", - needsFlowInfo = FlowInfoRequirement.NoRequirement, + needsFlowInfo = FlowInfoRequirement.BackwardFlow, // 0-4 for5LargeRegisters(register => { (Elidable & HasOpcode(PUSH) & HasRegisterParam(register)) ~ (Linear & Not(HasOpcode(POP)) & Not(Changes(register))).* ~ (Elidable & HasOpcode(POP) & HasRegisterParam(register)) ~~> (_.tail.init) }), + // 5 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.DE) & DoesntMatterWhatItDoesWith(ZRegister.D)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.D))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.DE)) ~~> {code => + ZLine.ld8(ZRegister.D, ZRegister.E) :: (code.tail.init :+ ZLine.ld8(ZRegister.E, ZRegister.D)) + }, + // 6 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.DE) & DoesntMatterWhatItDoesWith(ZRegister.E)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.E))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.DE)) ~~> { code => + ZLine.ld8(ZRegister.E, ZRegister.D) :: (code.tail.init :+ ZLine.ld8(ZRegister.D, ZRegister.E)) + }, + // 7 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.BC) & DoesntMatterWhatItDoesWith(ZRegister.B)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.B))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.BC)) ~~> (code => + ZLine.ld8(ZRegister.B, ZRegister.C) :: (code.tail.init :+ ZLine.ld8(ZRegister.C, ZRegister.B)) + ), + // 8 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.BC) & DoesntMatterWhatItDoesWith(ZRegister.C)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.C))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.BC)) ~~> { code => + ZLine.ld8(ZRegister.C, ZRegister.B) :: (code.tail.init :+ ZLine.ld8(ZRegister.B, ZRegister.C)) + }, + // 9 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.HL) & DoesntMatterWhatItDoesWith(ZRegister.H)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.H))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.HL)) ~~> { code => + ZLine.ld8(ZRegister.H, ZRegister.L) :: (code.tail.init :+ ZLine.ld8(ZRegister.L, ZRegister.H)) + }, + // 10 + (Elidable & HasOpcode(PUSH) & HasRegisterParam(ZRegister.HL) & DoesntMatterWhatItDoesWith(ZRegister.L)) ~ + (Linear & Not(HasOpcode(POP)) & Not(Changes(ZRegister.L))).* ~ + (Elidable & HasOpcode(POP) & HasRegisterParam(ZRegister.HL)) ~~> { code => + ZLine.ld8(ZRegister.L, ZRegister.H) :: (code.tail.init :+ ZLine.ld8(ZRegister.H, ZRegister.L)) + }, ) @@ -252,6 +310,23 @@ object AlwaysGoodZ80Optimizations { (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.DE)) & MatchRegister(ZRegister.DE, 0) & MatchRegister(ZRegister.HL, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => List(ZLine.ldImm16(ZRegister.HL, ctx.get[Int](0) + ctx.get[Int](1))) }, + (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.HL)) & MatchRegister(ZRegister.HL, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + List(ZLine.ldImm16(ZRegister.HL, 2 * ctx.get[Int](1) & 0xffff)) + }, + + (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.BC)) & HasRegister(ZRegister.BC, 0) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + Nil + }, + (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.DE)) & HasRegister(ZRegister.DE, 0) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + Nil + }, + + (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.BC)) & HasRegister(ZRegister.BC, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + List(ZLine.register(ZOpcode.INC_16, ZRegister.HL)) + }, + (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.DE)) & HasRegister(ZRegister.DE, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + List(ZLine.register(ZOpcode.INC_16, ZRegister.HL)) + }, (Elidable & HasOpcode(ADD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.BC)) & MatchRegister(ZRegister.BC, 0) & MatchConstantInHL(1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => @@ -335,6 +410,13 @@ object AlwaysGoodZ80Optimizations { List(ZLine.ldImm8(ZRegister.A, CompoundConstant(MathOperator.DecimalMinus, NumericConstant(ctx.get[Int](0) & 0xff, 1), ctx.get[Constant](1)).quickSimplify)) }, + (Elidable & (Is8BitLoadTo(ZRegister.A) | HasOpcode(LD) & HasRegisters(TwoRegisters(ZRegister.A, ZRegister.MEM_ABS_8)))) ~ + (Elidable & HasOpcode(SUB) & Has8BitImmediate(0)) ~ + (Is8BitLoadTo(ZRegister.A) | HasOpcode(LD) & HasRegisters(TwoRegisters(ZRegister.A, ZRegister.MEM_ABS_8))) ~ + (Elidable & HasOpcode(SBC)) ~~> { code => + List(code(2), code(3).copy(opcode = SUB)) + } + ) val FreeHL = new RuleBasedAssemblyOptimization("Free HL", @@ -356,6 +438,7 @@ object AlwaysGoodZ80Optimizations { code.head.copy(opcode = LD, registers = TwoRegisters(ZRegister.A, ZRegister.MEM_ABS_8)), code(1).copy(registers = TwoRegisters(code(1).registers.asInstanceOf[TwoRegisters].target, ZRegister.A)), )), + (Elidable & Is16BitLoad(ZRegister.HL, ZRegister.IMM_16)) ~ (Elidable & Is8BitLoad(ZRegister.D, ZRegister.H)) ~ (Elidable & Is8BitLoad(ZRegister.E, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => @@ -368,6 +451,77 @@ object AlwaysGoodZ80Optimizations { List( code.head.copy(registers = TwoRegisters(ZRegister.BC, ZRegister.IMM_16)) )), + + (Elidable & Is16BitLoad(ZRegister.HL, ZRegister.MEM_ABS_16)) ~ + (Elidable & Is8BitLoad(ZRegister.D, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.E, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + code.head.copy(registers = TwoRegisters(ZRegister.DE, ZRegister.MEM_ABS_16)) + )), + (Elidable & Is16BitLoad(ZRegister.HL, ZRegister.MEM_ABS_16)) ~ + (Elidable & Is8BitLoad(ZRegister.B, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.C, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + code.head.copy(registers = TwoRegisters(ZRegister.BC, ZRegister.MEM_ABS_16)) + )), + + MultipleAssemblyRules(Seq(ZRegister.BC, ZRegister.DE).map { registerPair => + (Elidable & HasOpcode(LD_16) & HasRegisters(TwoRegisters(registerPair, ZRegister.IMM_16)) & MatchParameter(0)) ~ + (Elidable & HasOpcode(LD_16) & HasRegisters(TwoRegisters(ZRegister.HL, ZRegister.MEM_ABS_16)) & MatchParameter(1)) ~ + (Elidable & HasOpcode(OR) & HasRegisters(OneRegister(ZRegister.A))) ~ + (Elidable & HasOpcode(SBC_16) & HasRegisters(TwoRegisters(ZRegister.HL, registerPair)) & + DoesntMatterWhatItDoesWith(ZRegister.A, ZRegister.HL, registerPair) & + DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> { (code, ctx) => + import ZRegister._ + val value = ctx.get[Constant](0) + val variable = ctx.get[Constant](1) + List( + ZLine.ldAbs8(A, variable), + ZLine.imm8(SUB, value.loByte), + ZLine.ldAbs8(A, variable + 1), + ZLine.imm8(SBC, value.hiByte)) + } + }), + + (Elidable & Is8BitLoad(ZRegister.H, ZRegister.B)) ~ + (Elidable & Is8BitLoad(ZRegister.L, ZRegister.C)) ~ + (Elidable & HasOpcodeIn(Set(INC_16, DEC_16, PUSH, POP)) & HasRegisterParam(ZRegister.HL)) ~ + (Elidable & Is8BitLoad(ZRegister.B, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.C, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + code(2).copy(registers = OneRegister(ZRegister.BC)) + )), + + (Elidable & Is8BitLoad(ZRegister.H, ZRegister.D)) ~ + (Elidable & Is8BitLoad(ZRegister.L, ZRegister.E)) ~ + (Elidable & HasOpcodeIn(Set(INC_16, DEC_16, PUSH, POP)) & HasRegisterParam(ZRegister.HL)) ~ + (Elidable & Is8BitLoad(ZRegister.D, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.E, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + code(2).copy(registers = OneRegister(ZRegister.DE)) + )), + + (Elidable & Is8BitLoad(ZRegister.H, ZRegister.D)) ~ + (Elidable & Is8BitLoad(ZRegister.L, ZRegister.E)) ~ + (Elidable & HasOpcodeIn(Set(INC_16, DEC_16, PUSH, POP)) & HasRegisterParam(ZRegister.HL)) ~ + (Elidable & Is8BitLoad(ZRegister.B, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.C, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + ZLine.ld8(ZRegister.B, ZRegister.D), + ZLine.ld8(ZRegister.C, ZRegister.E), + code(2).copy(registers = OneRegister(ZRegister.BC)) + )), + + (Elidable & Is8BitLoad(ZRegister.H, ZRegister.B)) ~ + (Elidable & Is8BitLoad(ZRegister.L, ZRegister.C)) ~ + (Elidable & HasOpcodeIn(Set(INC_16, DEC_16, PUSH, POP)) & HasRegisterParam(ZRegister.HL)) ~ + (Elidable & Is8BitLoad(ZRegister.D, ZRegister.H)) ~ + (Elidable & Is8BitLoad(ZRegister.E, ZRegister.L) & DoesntMatterWhatItDoesWith(ZRegister.HL)) ~~> (code => + List( + ZLine.ld8(ZRegister.D, ZRegister.B), + ZLine.ld8(ZRegister.E, ZRegister.C), + code(2).copy(registers = OneRegister(ZRegister.DE)) + )), ) val UnusedCodeRemoval = new RuleBasedAssemblyOptimization("Unreachable code removal", @@ -389,7 +543,6 @@ object AlwaysGoodZ80Optimizations { val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]]( BranchInPlaceRemoval, - EmptyMemoryStoreRemoval, FreeHL, PointlessLoad, PointlessStackStashing, diff --git a/src/main/scala/millfork/assembly/z80/opt/EmptyMemoryStoreRemoval.scala b/src/main/scala/millfork/assembly/z80/opt/EmptyMemoryStoreRemoval.scala index 5fa836dd..c3c032d7 100644 --- a/src/main/scala/millfork/assembly/z80/opt/EmptyMemoryStoreRemoval.scala +++ b/src/main/scala/millfork/assembly/z80/opt/EmptyMemoryStoreRemoval.scala @@ -1,6 +1,5 @@ package millfork.assembly.z80.opt -import millfork.assembly.opt.SingleStatus import millfork.assembly.z80.{OneRegister, TwoRegisters, ZLine} import millfork.assembly.{AssemblyOptimization, OptimizationContext} import millfork.env._ @@ -16,61 +15,16 @@ object EmptyMemoryStoreRemoval extends AssemblyOptimization[ZLine] { override def name = "Removing pointless stores to automatic variables" override def optimize(f: NormalFunction, code: List[ZLine], optimizationContext: OptimizationContext): List[ZLine] = { - val paramVariables = f.params match { -// case NormalParamSignature(List(MemoryVariable(_, typ, _))) if typ.size == 1 => -// Set[String]() - case NormalParamSignature(ps) => - ps.map(_.name).toSet - case _ => - // assembly functions do not get this optimization - return code - } - val flow = FlowAnalyzer.analyze(f, code, optimizationContext.options, FlowInfoRequirement.BothFlows) - import millfork.node.ZRegister._ - val stillUsedVariables = code.flatMap { - case ZLine(_, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), MemoryAddressConstant(th), _) => Some(th.name) - case ZLine(_, TwoRegisters(_, MEM_ABS_8 | MEM_ABS_16), MemoryAddressConstant(th), _) => Some(th.name) - case ZLine(_, TwoRegisters(_, IMM_16), MemoryAddressConstant(th), _) => Some(th.name) - case ZLine(_, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) - case ZLine(_, TwoRegisters(_, MEM_ABS_8 | MEM_ABS_16), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) - case ZLine(_, TwoRegisters(_, IMM_16), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) - case _ => None - }.toSet - val variablesWithAddressesTaken = code.zipWithIndex.flatMap { - case (ZLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _), _) => - Some(th.name) - case (ZLine(_, _, SubbyteConstant(CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _), _), _) => - Some(th.name) - case (ZLine(_, - TwoRegisters(ZRegister.MEM_HL, _) | TwoRegisters(_, ZRegister.MEM_HL) | OneRegister(ZRegister.MEM_HL), - _, _), i) => - flow(i)._1.statusBefore.hl match { - case SingleStatus(MemoryAddressConstant(th)) => - if (flow(i)._1.importanceAfter.hlNumeric != Unimportant) Some(th.name) - else None - case SingleStatus(CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _))) => - if (flow(i)._1.importanceAfter.hlNumeric != Unimportant) Some(th.name) - else None - case _ => None // TODO: ??? - } - case _ => None - }.toSet - val allLocalVariables = f.environment.getAllLocalVariables - val localVariables = allLocalVariables.filter { - case MemoryVariable(name, typ, VariableAllocationMethod.Auto | VariableAllocationMethod.Zeropage) => - typ.size > 0 && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) - case _ => false - } - - if (localVariables.isEmpty) { + val vs = VariableStatus(f, code, optimizationContext, _ => true).getOrElse(return code) + if (vs.localVariables.isEmpty) { return code } + import ZRegister._ val toRemove = mutable.Set[Int]() val badVariables = mutable.Set[String]() - for(v <- localVariables) { - val lifetime = VariableLifetime.apply(v.name, flow) + for((v, lifetime) <- vs.variablesWithLifetimes) { val lastaccess = lifetime.last if (lastaccess >= 0) { val lastVariableAccess = code(lastaccess) @@ -79,10 +33,10 @@ object EmptyMemoryStoreRemoval extends AssemblyOptimization[ZLine] { case ZLine(LD, TwoRegisters(MEM_HL, _), _, true) => true case ZLine(LD | LD_16, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), _, true) => true case ZLine(INC | DEC, OneRegister(MEM_HL), _, true) => - val importances = flow(lastaccess)._1.importanceAfter + val importances = vs.codeWithFlow(lastaccess)._1.importanceAfter Seq(importances.sf, importances.zf).forall(_ == Unimportant) case ZLine(SLA | SLL | SRA | SRL | RL | RR | RLC | RRC, OneRegister(MEM_HL), _, true) => - val importances = flow(lastaccess)._1.importanceAfter + val importances = vs.codeWithFlow(lastaccess)._1.importanceAfter Seq(importances.sf, importances.zf, importances.cf).forall(_ == Unimportant) case _ => false }) { diff --git a/src/main/scala/millfork/assembly/z80/opt/EmptyParameterStoreRemoval.scala b/src/main/scala/millfork/assembly/z80/opt/EmptyParameterStoreRemoval.scala new file mode 100644 index 00000000..f986bd25 --- /dev/null +++ b/src/main/scala/millfork/assembly/z80/opt/EmptyParameterStoreRemoval.scala @@ -0,0 +1,72 @@ +package millfork.assembly.z80.opt + +import millfork.assembly.z80.ZOpcode._ +import millfork.assembly.z80.{TwoRegisters, ZLine} +import millfork.assembly.{AssemblyOptimization, OptimizationContext} +import millfork.env._ +import millfork.error.ErrorReporting + +/** + * @author Karol Stasiak + */ +object EmptyParameterStoreRemoval extends AssemblyOptimization[ZLine] { + override def name = "Removing pointless stores to foreign variables" + + override def optimize(f: NormalFunction, code: List[ZLine], optimizationContext: OptimizationContext): List[ZLine] = { + val usedFunctions = code.flatMap { + case ZLine(CALL | JP | JR, _, MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(CALL | JP | JR, _, NumericConstant(addr, _), _) => Some("$" + addr.toHexString) + case _ => None + }.toSet + val foreignVariables = f.environment.root.things.values.flatMap { + case other: NormalFunction => + val address = other.address match { + case Some(NumericConstant(addr, _)) => "$" + addr.toHexString + case _ => "" + } + if (other.name == f.name || usedFunctions(other.name) || usedFunctions(address)) { + Nil + } else { + val params = other.params match { + case NormalParamSignature(ps) => ps.map(_.name) + case _ => Nil + } + val locals = other.environment.things.values.flatMap{ + case th: MemoryVariable if th.alloc == VariableAllocationMethod.Auto => Some(th.name) + case _ => None + } + params ++ locals + } + case _ => Nil + }.toSet + import millfork.node.ZRegister._ + val stillReadOrStoredVariables = code.flatMap { + case ZLine(_, _, MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(_, _, CompoundConstant(_, MemoryAddressConstant(th), _), _) => Some(th.name) + case ZLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _) => Some(th.name) + case _ => None + }.toSet + val stillReadVariables = code.flatMap { + case ZLine(LD | LD_16, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), MemoryAddressConstant(th), true) => Nil + case ZLine(LD | LD_16, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), true) => Nil + case ZLine(_, _, MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(_, _, CompoundConstant(_, MemoryAddressConstant(th), _), _) => Some(th.name) + case ZLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _) => Some(th.name) + case _ => None + }.toSet + + val unusedForeignVariables = (foreignVariables & stillReadOrStoredVariables) -- stillReadVariables + if (unusedForeignVariables.isEmpty) { + return code + } + + ErrorReporting.debug(s"Removing pointless store(s) to foreign variables ${unusedForeignVariables.mkString(", ")}") + code.filterNot { + case ZLine(LD | LD_16, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), MemoryAddressConstant(th), _) => + unusedForeignVariables(th.name) + case ZLine(LD | LD_16, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), true) => + unusedForeignVariables(th.name) + case _ => false + } + } +} diff --git a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala index 591ecf78..3d7d2ee6 100644 --- a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala @@ -51,7 +51,7 @@ case class CpuImportance(a: Importance = UnknownImportance, ) { override def toString: String = { val memRepr = if (memIx.isEmpty) "" else (0 to memIx.keys.max).map(i => memIx.getOrElse(i, UnknownImportance)).mkString("") - s"A=$a,B=$b,C=$c,D=$d,E=$e,H=$h,L=$l,IX=$ixh$ixl,Y=$iyh$iyl; Z=$zf,C=$cf,N=$nf,S=$sf,P=$pf,H=$hf; HL=$hlNumeric; M=" ++ memRepr.padTo(4, ' ') + s"A=$a,B=$b,C=$c,D=$d,E=$e,H=$h,L=$l,IX=$ixh$ixl,IY=$iyh$iyl; Z=$zf,C=$cf,N=$nf,S=$sf,P=$pf,H=$hf; HL=$hlNumeric; M=" ++ memRepr.padTo(4, ' ') } def ~(that: CpuImportance) = new CpuImportance( @@ -196,7 +196,7 @@ object ReverseFlowAnalyzer { } val currentLine = codeArray(i) currentLine match { - case ZLine(LABEL, _, _, _) => () + case ZLine(LABEL | EI | DI | NOP, _, _, _) => () case ZLine(DJNZ, _, MemoryAddressConstant(Label(l)), _) => val labelIndex = getLabelIndex(codeArray, l) currentImportance = if (labelIndex < 0) finalImportance else (importanceArray(labelIndex) ~ currentImportance).butReadsRegister(ZRegister.B).butReadsFlag(ZFlag.Z) @@ -206,25 +206,44 @@ object ReverseFlowAnalyzer { case ZLine(JP | JR, IfFlagClear(flag), MemoryAddressConstant(Label(l)), _) => val labelIndex = getLabelIndex(codeArray, l) currentImportance = if (labelIndex < 0) finalImportance else importanceArray(labelIndex) ~ currentImportance.butReadsFlag(flag) + case ZLine(JP | JR, NoRegisters, MemoryAddressConstant(Label(l)), _) => + val labelIndex = getLabelIndex(codeArray, l) + currentImportance = if (labelIndex < 0) finalImportance else importanceArray(labelIndex) case ZLine(DISCARD_HL, _, _, _) => - currentImportance = currentImportance.copy(h = Unimportant, l = Unimportant) + currentImportance = currentImportance.copy(h = Unimportant, l = Unimportant) case ZLine(DISCARD_BCDEIX, _, _, _) => currentImportance = currentImportance.copy(b = Unimportant, c = Unimportant, d = Unimportant, e = Unimportant, ixh = Unimportant, ixl = Unimportant) case ZLine(DISCARD_A, _, _, _) => currentImportance = currentImportance.copy(a = Unimportant) case ZLine(DISCARD_F, _, _, _) => - currentImportance = currentImportance.copy(cf = Unimportant, zf= Unimportant, sf = Unimportant , pf = Unimportant, hf = Unimportant) + currentImportance = currentImportance.copy(cf = Unimportant, zf = Unimportant, sf = Unimportant, pf = Unimportant, hf = Unimportant) case ZLine(LD, TwoRegistersOffset(t, s, o), _, _) => currentImportance = currentImportance.butWritesRegister(t, o).butReadsRegister(s, o) case ZLine(LD | LD_16, TwoRegisters(t, s), _, _) => currentImportance = currentImportance.butWritesRegister(t).butReadsRegister(s) case ZLine(ADD_16, TwoRegisters(t, s), _, _) => currentImportance = currentImportance.butReadsRegister(t).butReadsRegister(s) + case ZLine(ADC_16 | SBC_16, TwoRegisters(t, s), _, _) => + currentImportance = currentImportance.butReadsRegister(t).butReadsRegister(s).butReadsFlag(ZFlag.C) case ZLine(XOR, OneRegister(ZRegister.A), _, _) => - currentImportance = currentImportance.butWritesRegister(ZRegister.A) + currentImportance = currentImportance.copy( + a = Unimportant, + cf = Unimportant, + zf = Unimportant, + sf = Unimportant, + hf = Unimportant, + pf = Unimportant + ) case ZLine(OR | AND, OneRegister(ZRegister.A), _, _) => - currentImportance = currentImportance.butReadsRegister(ZRegister.A) + currentImportance = currentImportance.copy( + a = currentImportance.zf ~ currentImportance.sf ~ currentImportance.pf, + cf = Unimportant, + zf = Unimportant, + sf = Unimportant, + hf = Unimportant, + pf = Unimportant + ) case ZLine(ADD | SUB | CP, OneRegister(s), _, _) => currentImportance = currentImportance.butReadsRegister(s).copy( @@ -286,6 +305,15 @@ object ReverseFlowAnalyzer { ) + case ZLine(DAA, _, _, _) => + currentImportance = currentImportance.copy( + a = Important, + hf = Important + ) + case ZLine(NEG, _, _, _) => + currentImportance = currentImportance.copy( + a = Important + ) case ZLine(INC | DEC | INC_16 | DEC_16, OneRegister(s), _, _) => currentImportance = currentImportance.butReadsRegister(s) case ZLine(INC | DEC | INC_16 | DEC_16, OneRegisterOffset(s, o), _, _) => @@ -294,8 +322,27 @@ object ReverseFlowAnalyzer { currentImportance = currentImportance.butWritesRegister(r) case ZLine(PUSH, OneRegister(r), _, _) => currentImportance = currentImportance.butReadsRegister(r) - case ZLine(CALL, NoRegisters, _, _) => - currentImportance = finalImportance.copy(memIx = currentImportance.memIx) + case ZLine(CALL, NoRegisters, MemoryAddressConstant(fun: FunctionInMemory), _) => + fun.params match { + case NormalParamSignature(_) | AssemblyParamSignature(Nil) => + currentImportance.copy( + a = Unimportant, + b = Unimportant, + c = Unimportant, + d = Unimportant, + e = Unimportant, + hlNumeric = Unimportant, + iyh = Unimportant, + iyl = Unimportant, + zf = Unimportant, + cf = Unimportant, + nf = Unimportant, + sf = Unimportant, + hf = Unimportant + ) + case _ => + currentImportance = finalImportance.copy(memIx = currentImportance.memIx) + } case ZLine(SLA | SRL, OneRegister(r), _, _) => currentImportance = currentImportance.butReadsRegister(r).butWritesFlag(ZFlag.C).butWritesFlag(ZFlag.Z) diff --git a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala index 9a65e7ca..6c96b6fb 100644 --- a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala +++ b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala @@ -521,6 +521,11 @@ case class MatchParameter(i: Int) extends AssemblyLinePattern { } } +case class MatchParameterOrNothing(i: Int) extends AssemblyLinePattern { + override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = + ctx.addObject(i, line.parameter.quickSimplify) +} + case class MatchJumpTarget(i: Int) extends AssemblyLinePattern { override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = line.registers match { @@ -573,6 +578,17 @@ case object DoesntMatterWhatItDoesWithFlags extends AssemblyLinePattern { override def toString: String = "[¯\\_(ツ)_/¯:F]" } +case object DoesntMatterWhatItDoesWithFlagsExceptCarry extends AssemblyLinePattern { + + override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit = + FlowInfoRequirement.assertBackward(needsFlowInfo) + + override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = + ZFlag.values.forall(r => r == ZFlag.C || flowInfo.importanceAfter.getFlag(r) != Important) + + override def toString: String = "[¯\\_(ツ)_/¯:F\\C]" +} + case class HasSet(flag: ZFlag.Value) extends AssemblyLinePattern { override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit = FlowInfoRequirement.assertForward(needsFlowInfo) diff --git a/src/main/scala/millfork/assembly/z80/opt/VariableStatus.scala b/src/main/scala/millfork/assembly/z80/opt/VariableStatus.scala new file mode 100644 index 00000000..f84ce605 --- /dev/null +++ b/src/main/scala/millfork/assembly/z80/opt/VariableStatus.scala @@ -0,0 +1,91 @@ +package millfork.assembly.z80.opt + +import millfork.assembly.OptimizationContext +import millfork.assembly.opt.SingleStatus +import millfork.assembly.z80.{OneRegister, TwoRegisters, ZLine} +import millfork.env._ +import millfork.node.ZRegister + +/** + * @author Karol Stasiak + */ + +class VariableStatus(val paramVariables: Set[String], + val stillUsedVariables: Set[String], + val variablesWithAddressesTaken: Set[String], + val variablesWithRegisterHint: Set[String], + val localVariables: List[Variable], + val variablesWithLifetimes: List[(Variable, Range)], + val variablesWithLifetimesMap: Map[String, Range], + val codeWithFlow: List[(FlowInfo, ZLine)]) + +object VariableStatus { + def apply(f: NormalFunction, code: List[ZLine], optimizationContext: OptimizationContext, typFilter: Type => Boolean): Option[VariableStatus] = { + val flow = FlowAnalyzer.analyze(f, code, optimizationContext.options, FlowInfoRequirement.BothFlows) + import millfork.node.ZRegister._ + val paramVariables = f.params match { + // case NormalParamSignature(List(MemoryVariable(_, typ, _))) if typ.size == 1 => + // Set[String]() + case NormalParamSignature(ps) => + ps.map(_.name).toSet + case _ => + // assembly functions do not get this optimization + return None + } + val stillUsedVariables = code.flatMap { + case ZLine(_, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(_, TwoRegisters(_, MEM_ABS_8 | MEM_ABS_16), MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(_, TwoRegisters(_, IMM_16), MemoryAddressConstant(th), _) => Some(th.name) + case ZLine(_, TwoRegisters(MEM_ABS_8 | MEM_ABS_16, _), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) + case ZLine(_, TwoRegisters(_, MEM_ABS_8 | MEM_ABS_16), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) + case ZLine(_, TwoRegisters(_, IMM_16), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _) => Some(th.name) + case _ => None + }.toSet + val variablesWithAddressesTaken = code.zipWithIndex.flatMap { + case (ZLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _), _) => + Some(th.name) + case (ZLine(_, _, SubbyteConstant(CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _)), _), _), _) => + Some(th.name) + case (ZLine(_, + TwoRegisters(ZRegister.MEM_HL, _) | TwoRegisters(_, ZRegister.MEM_HL) | OneRegister(ZRegister.MEM_HL), + _, _), i) => + flow(i)._1.statusBefore.hl match { + case SingleStatus(MemoryAddressConstant(th)) => + if (flow(i)._1.importanceAfter.hlNumeric != Unimportant) Some(th.name) + else None + case SingleStatus(CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(_, _))) => + if (flow(i)._1.importanceAfter.hlNumeric != Unimportant) Some(th.name) + else None + case _ => None // TODO: ??? + } + case _ => None + }.toSet + val allLocalVariables = f.environment.getAllLocalVariables + val localVariables = allLocalVariables.filter { + case MemoryVariable(name, typ, VariableAllocationMethod.Auto | VariableAllocationMethod.Zeropage) => + typFilter(typ) && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) + case _ => false + } + val variablesWithRegisterHint = f.environment.getAllLocalVariables.filter { + case MemoryVariable(name, typ, VariableAllocationMethod.Register) => + typFilter(typ) && (typ.size == 1 || typ.size == 2) && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) + case _ => false + }.map(_.name).toSet + val variablesWithLifetimes = localVariables.map(v => + v -> VariableLifetime.apply(v.name, flow) + ) + val variablesWithLifetimesMap = variablesWithLifetimes.map { + case (v, lt) => v.name -> lt + }.toMap + Some(new VariableStatus( + paramVariables, + stillUsedVariables, + variablesWithAddressesTaken, + variablesWithRegisterHint, + localVariables, + variablesWithLifetimes, + variablesWithLifetimesMap, + flow)) + } + +} \ No newline at end of file diff --git a/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala new file mode 100644 index 00000000..77099d9a --- /dev/null +++ b/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala @@ -0,0 +1,398 @@ +package millfork.assembly.z80.opt + +import millfork.{CompilationFlag, NonOverlappingIntervals} +import millfork.assembly.{AssemblyOptimization, OptimizationContext} +import millfork.assembly.z80.{TwoRegisters, ZFlag, ZLine} +import millfork.env._ +import millfork.error.ErrorReporting +import millfork.node.ZRegister + +import scala.collection.mutable.ListBuffer + +/** + * @author Karol Stasiak + */ +object WordVariableToRegisterOptimization extends AssemblyOptimization[ZLine] { + + override def name = "Allocating variables to register pairs" + + object CyclesAndBytes { + val Zero = CyclesAndBytes(0, 0) + } + + case class CyclesAndBytes(bytes: Int, cycles: Int) { + def +(that: CyclesAndBytes) = CyclesAndBytes(this.bytes + that.bytes, this.cycles + that.cycles) + } + + override def optimize(f: NormalFunction, code: List[ZLine], optimizationContext: OptimizationContext): List[ZLine] = { + val vs = VariableStatus(f, code, optimizationContext, _.size == 2).getOrElse(return code) + val options = optimizationContext.options + val removeVariablesForReal = !options.flag(CompilationFlag.InternalCurrentlyOptimizingForMeasurement) + val costFunction: CyclesAndBytes => Int = if (options.flag(CompilationFlag.OptimizeForSpeed)) _.cycles else _.bytes + + val hlCandidates = vs.variablesWithLifetimes.filter { + case (v, range) => + val tuple = vs.codeWithFlow(range.start) + tuple._1.importanceAfter.h != Important && + tuple._1.importanceAfter.l != Important || { +// println(s"Cannot inline ${v.name} to HL because of early $tuple") + false + } + }.flatMap { + case (v, range) => + canBeInlined(v.name, synced = false, ZRegister.HL, vs.codeWithFlow.slice(range.start, range.end)).map { score => + (v.name, range, if (vs.variablesWithRegisterHint(v.name)) score + CyclesAndBytes(16, 16) else score) + } + } + + val bcCandidates = vs.variablesWithLifetimes.filter { + case (v, range) => + val tuple = vs.codeWithFlow(range.start) + tuple._1.importanceAfter.b != Important && + tuple._1.importanceAfter.c != Important || { +// println(s"Cannot inline ${v.name} to BC because of early $tuple") + false + } + }.flatMap { + case (v, range) => + canBeInlined(v.name, synced = false, ZRegister.BC, vs.codeWithFlow.slice(range.start, range.end)).map { score => + (v.name, range, if (vs.variablesWithRegisterHint(v.name)) score + CyclesAndBytes(16, 16) else score) + } + } + + val deCandidates = vs.variablesWithLifetimes.filter { + case (v, range) => + val tuple = vs.codeWithFlow(range.start) + tuple._1.importanceAfter.d != Important && + tuple._1.importanceAfter.e != Important || { +// println(s"Cannot inline ${v.name} to DE because of early $tuple") + false + } + }.flatMap { + case (v, range) => + canBeInlined(v.name, synced = false, ZRegister.DE, vs.codeWithFlow.slice(range.start, range.end)).map { score => + (v.name, range, if (vs.variablesWithRegisterHint(v.name)) score + CyclesAndBytes(16, 16) else score) + } + } + + val hlCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](hlCandidates, _._2.start, _._2.end) + val bcCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](bcCandidates, _._2.start, _._2.end) + val deCandidateSets = NonOverlappingIntervals.apply[(String, Range, CyclesAndBytes)](deCandidates, _._2.start, _._2.end) + + val variants = for { + vhl <- if (options.flag(CompilationFlag.SingleThreaded)) hlCandidateSets else hlCandidateSets.par + nhl = vhl.map(_._1) + + vbc <- bcCandidateSets + nbc = vbc.map(_._1) + if (nhl & nbc).isEmpty + + vde <- deCandidateSets + nde = vde.map(_._1) + if (nhl & nde).isEmpty + if (nhl & nde).isEmpty + + + score = vhl.toSeq.map(x => costFunction(x._3)).sum + + vbc.toSeq.map(x => costFunction(x._3)).sum + + vde.toSeq.map(x => costFunction(x._3)).sum + } yield (score, vhl, vbc, vde) + + if (variants.isEmpty) { + return code + } + + // variants.foreach(println) + + val (_, bestHLs, bestBCs, bestDEs) = variants.maxBy(_._1) + + def reportOptimizedBlock[T](oldCode: List[(T, ZLine)], newCode: List[ZLine]): Unit = { + oldCode.foreach(l => ErrorReporting.trace(l._2.toString)) + ErrorReporting.trace(" ↓") + newCode.foreach(l => ErrorReporting.trace(l.toString)) + } + + if (bestHLs.nonEmpty || bestBCs.nonEmpty || bestDEs.nonEmpty) { + val output = ListBuffer[ZLine]() + var i = 0 + while (i < code.length) { + var done = false + bestHLs.find(_._2.start == i).foreach { + case (v, range, _) => + ErrorReporting.debug(s"Inlining $v to register HL") + val oldCode = vs.codeWithFlow.slice(range.start, range.end) + val newCode = inlineVars(v, "", "", oldCode.map(_._2)) + reportOptimizedBlock(oldCode, newCode) + output ++= newCode + i = range.end + if (removeVariablesForReal && contains(range, vs.variablesWithLifetimesMap(v))) { + f.environment.removeVariable(v) + } + done = true + } + if (!done) { + bestBCs.find(_._2.start == i).foreach { + case (v, range, _) => + ErrorReporting.debug(s"Inlining $v to register BC") + val oldCode = vs.codeWithFlow.slice(range.start, range.end) + val newCode = inlineVars("", v, "", oldCode.map(_._2)) + reportOptimizedBlock(oldCode, newCode) + output ++= newCode + i = range.end + if (removeVariablesForReal && contains(range, vs.variablesWithLifetimesMap(v))) { + f.environment.removeVariable(v) + } + done = true + } + } + if (!done) { + bestDEs.find(_._2.start == i).foreach { + case (v, range, _) => + ErrorReporting.debug(s"Inlining $v to register DE") + val oldCode = vs.codeWithFlow.slice(range.start, range.end) + val newCode = inlineVars("", "", v, oldCode.map(_._2)) + reportOptimizedBlock(oldCode, newCode) + output ++= newCode + i = range.end + if (removeVariablesForReal && contains(range, vs.variablesWithLifetimesMap(v))) { + f.environment.removeVariable(v) + } + done = true + } + } + if (!done) { + output += code(i) + i += 1 + } + } + output.toList + } else { + code + } + } + + def contains(outer: Range, inner: Range): Boolean = { + outer.contains(inner.start) && outer.contains(inner.end - 1) + } + + import millfork.assembly.z80.ZOpcode._ + import millfork.node.ZRegister._ + + def add(first: Boolean, ifTrue: CyclesAndBytes, ifFalse: CyclesAndBytes): CyclesAndBytes=>CyclesAndBytes = { c => + if (first) c + ifTrue else c + ifFalse + } + def add(value: CyclesAndBytes): CyclesAndBytes=>CyclesAndBytes = _ + value + + def canBeInlined(vname: String, synced: Boolean, target: ZRegister.Value, code: List[(FlowInfo, ZLine)]): Option[CyclesAndBytes] = { + def fail(reason: Int): None.type = { +// println(s"Cannot inline $vname to $target because of [[$reason]] ${code.head}") + None + } + code match { + + case (_, ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th1), true)) :: + (_, ZLine(ADD_16, TwoRegisters(HL, BC | DE), _, _)) :: + (i, ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th2), true)) :: + xs if target == HL && th1.name != vname && th2.name != vname && + i.importanceAfter.getFlag(ZFlag.Z) != Important && + i.importanceAfter.getFlag(ZFlag.H) != Important && + i.importanceAfter.getFlag(ZFlag.P) != Important && + i.importanceAfter.getRegister(A) != Important && + i.importanceAfter.getRegister(HL) != Important && + i.importanceAfter.getFlag(ZFlag.Z) != Important => + // bytes before: 3 + 1 + 3 = 7 + // cycles before: 16 + 11 + 16 = 43 + // bytes after: 3 + 1 + 3 + 3 + 1 + 3 = 14 + // cycles after: 13 + 4 + 13 + 13 + 4 + 13 = 60 + canBeInlined(vname, synced = true, target, xs).map(add(CyclesAndBytes(-17, -7))) + + case (_, ZLine(LD_16, TwoRegisters(t, _), _, true)) :: + (_, ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), true)) :: + (i, ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: + xs if th.name == vname && t != HL && t == t2 && i.importanceAfter.getRegister(t) == Unimportant => + // LD PP ; LD HL,(qq) ; ADD HL,PP → LD H,P ; LD L,P ; ADD HL,QQ + canBeInlined(vname, synced = true, target, xs).map(add(target == t, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + +// case (_, ZLine(LD_16, TwoRegisters(t, _), _, true)) :: +// (_, ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), true)) :: +// (i, ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: +// xs if th.name == vname && t == target && t != HL && t == t2 && i.importanceAfter.getRegister(t) == Unimportant => +// canBeInlined(vname, synced = true, target, xs).map(add(CyclesAndBytes(16, 3))) + + case (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), true)) :: + (_, ZLine(LD_16, TwoRegisters(t, _), _, true)) :: + (i, ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: + xs if t != HL && t == t2 && i.importanceAfter.getRegister(t) == Unimportant => + canBeInlined(vname, synced = true, target, xs).map(add(target == t, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + + case (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), true)) :: + (_, ZLine(LD_16, TwoRegisters(t, _), _, true)) :: + (i, ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: + xs if t == target && t != HL && t == t2 && i.importanceAfter.getRegister(t) == Unimportant => + canBeInlined(vname, synced = true, target, xs).map(add(CyclesAndBytes(16, 3))) + + case (_, ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == HL, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + case (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == HL, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + case (_, ZLine(LD_16, TwoRegisters(DE, MEM_ABS_16), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == DE, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + case (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, DE), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == DE, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + case (_, ZLine(LD_16, TwoRegisters(BC, MEM_ABS_16), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == BC, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + case (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, BC), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced = true, target, xs).map(add(target == BC, CyclesAndBytes(16, 3), CyclesAndBytes(8, 1))) + + case (_, x) :: (_, ZLine(LD_16, TwoRegisters(MEM_ABS_16, t), MemoryAddressConstant(th), true)) :: xs + if th.name == vname && t == target && x.changesRegister(t) => + canBeInlined(vname, synced = true, target, xs).map(add(CyclesAndBytes(16, 3))) + + + case (_, ZLine(LD, TwoRegisters(A, MEM_ABS_8), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced, target, xs).map(add(CyclesAndBytes(9, 2))) + case (_, ZLine(LD, TwoRegisters(MEM_ABS_8, A), MemoryAddressConstant(th), true)) :: xs if th.name == vname => + canBeInlined(vname, synced, target, xs).map(add(CyclesAndBytes(9, 2))) + + case (_, ZLine(LD, TwoRegisters(A, MEM_ABS_8), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), true)) :: xs if th.name == vname => + canBeInlined(vname, synced, target, xs).map(add(CyclesAndBytes(9, 2))) + case (_, ZLine(LD, TwoRegisters(MEM_ABS_8, A), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1, _)), true)) :: xs if th.name == vname => + canBeInlined(vname, synced, target, xs).map(add(CyclesAndBytes(9, 2))) + + case (_, ZLine(_, _, MemoryAddressConstant(th), _)) :: _ if th.name == vname => fail(4) + case (_, ZLine(_, _, CompoundConstant(_, MemoryAddressConstant(th), _), _)) :: _ if th.name == vname => fail(5) + case (_, ZLine(_, _, SubbyteConstant(MemoryAddressConstant(th), _), _)) :: _ if th.name == vname => fail(6) + case (_, x) :: xs if x.changesRegister(target) => fail(1) + case (_, x) :: xs if x.readsRegister(target) && !synced => fail(2) + case (_, ZLine(LABEL, _, _, _)) :: xs => canBeInlined(vname, synced = false, target, xs) + case (_, ZLine(CALL, _, _, _)) :: xs => fail(3) + case _ :: xs => canBeInlined(vname, synced, target, xs) + case _ => Some(CyclesAndBytes.Zero) + } + } + + def inlineVars(hl: String, bc: String, de: String, code: List[ZLine]): List[ZLine] = { + if (code.nonEmpty) println(code.head) + code match { + + case ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), a1@MemoryAddressConstant(th1), true) :: + ZLine(ADD_16, TwoRegisters(HL, reg@(DE | BC)), _, _) :: + ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), a2@MemoryAddressConstant(th2), true) :: + xs if hl != "" && th1.name != hl && th2.name != hl => + // bytes before: 3 + 1 + 3 = 7 + // cycles before: 16 + 11 + 16 = 43 + // bytes after: 3 + 1 + 3 + 3 + 1 + 3 = 14 + // cycles after: 13 + 4 + 13 + 13 + 4 + 13 = 60 + val (h,l) = reg match { + case BC => (B,C) + case DE => (D, E) + } + ZLine.ldAbs8(A, a1) :: + ZLine.register(ADD, l) :: + ZLine.ldAbs8(a2, A) :: + ZLine.ldAbs8(A, a1 + 1) :: + ZLine.register(ADC, h) :: + ZLine.ldAbs8(a2 + 1, A) :: + inlineVars(hl, bc, de, xs) + + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), _) :: + (loadConst@ZLine(LD_16, TwoRegisters(BC, constSource), _, _)) :: + (add@ZLine(ADD_16, TwoRegisters(HL, BC), _, _)) :: xs if th.name == bc => + ZLine.ld8(B, H) :: ZLine.ld8(C, L) :: + loadConst.copy(registers = TwoRegisters(HL, constSource)) :: + add.copy(registers = TwoRegisters(HL, BC)) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), _) :: + (loadConst@ZLine(LD_16, TwoRegisters(DE, constSource), _, _)) :: + (add@ZLine(ADD_16, TwoRegisters(HL, DE), _, _)) :: xs if th.name == de => + ZLine.ld8(D, H) :: ZLine.ld8(E, L) :: + loadConst.copy(registers = TwoRegisters(HL, constSource)) :: + add.copy(registers = TwoRegisters(HL, DE)) :: inlineVars(hl, bc, de, xs) + // TODO: above with regs swapped + + case (loadConst@ZLine(LD_16, TwoRegisters(t, constSource), _, _)) :: + ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), _) :: + (add@ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: xs if th.name == bc && t == t2 && t != HL => + loadConst.copy(registers = TwoRegisters(HL, constSource)) :: + add.copy(registers = TwoRegisters(HL, BC)) :: inlineVars(hl, bc, de, xs) + + case (loadConst@ZLine(LD_16, TwoRegisters(t, constSource),_,_)) :: + ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), _) :: + (add@ZLine(ADD_16, TwoRegisters(HL, t2), _, _)) :: xs if th.name == de && t == t2 && t != HL => + loadConst.copy(registers = TwoRegisters(HL, constSource)) :: + add.copy(registers = TwoRegisters(HL, DE)) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == hl => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), _) :: xs if th.name == hl => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(H, B) :: ZLine.ld8(L, C) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(B, H) :: ZLine.ld8(C, L) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(HL, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(H, D) :: ZLine.ld8(L, E) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, HL), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(D, H) :: ZLine.ld8(E, L) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD_16, TwoRegisters(DE, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == de => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, DE), MemoryAddressConstant(th), _) :: xs if th.name == de => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(DE, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(D, B) :: ZLine.ld8(E, C) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, DE), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(B, D) :: ZLine.ld8(C, E) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(DE, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(D, H) :: ZLine.ld8(E, L) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, DE), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(H, D) :: ZLine.ld8(L, E) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD_16, TwoRegisters(BC, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == bc => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, BC), MemoryAddressConstant(th), _) :: xs if th.name == bc => + inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(BC, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(B, H) :: ZLine.ld8(C, L) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, BC), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(H, B) :: ZLine.ld8(L, C) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(BC, MEM_ABS_16), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(B, D) :: ZLine.ld8(C, E) :: inlineVars(hl, bc, de, xs) + case ZLine(LD_16, TwoRegisters(MEM_ABS_16, BC), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(D, B) :: ZLine.ld8(E, C) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(A, L) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), MemoryAddressConstant(th), _) :: xs if th.name == hl => + ZLine.ld8(L, A) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(A, C) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), MemoryAddressConstant(th), _) :: xs if th.name == bc => + ZLine.ld8(C, A) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(A, E) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), MemoryAddressConstant(th), _) :: xs if th.name == de => + ZLine.ld8(E, A) :: inlineVars(hl, bc, de, xs) + + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == hl => + ZLine.ld8(A, H) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == hl => + ZLine.ld8(H, A) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == bc => + ZLine.ld8(A, B) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == bc => + ZLine.ld8(B, A) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(A, MEM_ABS_8), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == de => + ZLine.ld8(A, D) :: inlineVars(hl, bc, de, xs) + case ZLine(LD, TwoRegisters(MEM_ABS_8, A), CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(1,_)), _) :: xs if th.name == de => + ZLine.ld8(D, A) :: inlineVars(hl, bc, de, xs) + + case x :: _ if bc != "" && x.changesRegister(BC) => ??? + case x :: _ if de != "" && x.changesRegister(DE) => ??? + case x :: _ if hl != "" && x.changesRegister(HL) => ??? + + case x :: xs => x :: inlineVars(hl, bc, de, xs) + case Nil => Nil + } + } +} diff --git a/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala b/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala index dae8a290..df0e25b6 100644 --- a/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala +++ b/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala @@ -7,6 +7,17 @@ import millfork.assembly.z80.ZLine * @author Karol Stasiak */ object Z80OptimizationPresets { - val Good: List[AssemblyOptimization[ZLine]] = List.tabulate(15)(_ => AlwaysGoodZ80Optimizations.All).flatten + + val Good: List[AssemblyOptimization[ZLine]] = { + List.fill(5)( + List.fill(5)( + AlwaysGoodZ80Optimizations.All ++ + List( + EmptyParameterStoreRemoval, + EmptyMemoryStoreRemoval) + ).flatten ++ + List(WordVariableToRegisterOptimization) + ).flatten + } }