From dbe8e39e4a9cbea1b7c913eefc7ac3a8cb65aabf Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Thu, 13 Dec 2018 23:18:56 +0100 Subject: [PATCH] various optimization fixes and improvements --- .../scala/millfork/OptimizationPresets.scala | 3 + .../mos/opt/AlwaysGoodOptimizations.scala | 25 +++ .../assembly/mos/opt/CoarseFlowAnalyzer.scala | 10 +- .../millfork/assembly/mos/opt/CpuStatus.scala | 2 +- .../ReverseFlowAnalyzerPerImpliedOpcode.scala | 3 +- .../opt/RuleBasedAssemblyOptimization.scala | 2 +- .../opt/ZeropageRegisterOptimizations.scala | 55 +++++- .../z80/opt/AlwaysGoodI80Optimizations.scala | 162 ++++++++++++++++-- .../assembly/z80/opt/CoarseFlowAnalyzer.scala | 12 +- .../z80/opt/ReverseFlowAnalyzer.scala | 4 +- .../opt/RuleBasedAssemblyOptimization.scala | 10 ++ src/main/scala/millfork/node/CallGraph.scala | 8 + src/main/scala/millfork/node/Node.scala | 1 + .../millfork/output/AbstractAssembler.scala | 13 +- .../output/AbstractInliningCalculator.scala | 5 +- .../scala/millfork/output/MosAssembler.scala | 9 +- .../output/MosInliningCalculator.scala | 36 +++- .../output/Z80InliningCalculator.scala | 4 +- .../scala/millfork/test/InliningSuite.scala | 47 +++++ 19 files changed, 359 insertions(+), 52 deletions(-) create mode 100644 src/test/scala/millfork/test/InliningSuite.scala diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index c9c95283..19825170 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -118,6 +118,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.AlwaysTakenJumpRemoval, AlwaysGoodOptimizations.UnusedLabelRemoval, + AlwaysGoodOptimizations.ConstantInlinedShifting, LaterOptimizations.LoadingAfterShifting, AlwaysGoodOptimizations.PointlessAccumulatorShifting, EmptyMemoryStoreRemoval, @@ -130,6 +131,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.UnusedCodeRemoval, AlwaysGoodOptimizations.ReverseFlowAnalysis, AlwaysGoodOptimizations.ModificationOfJustWrittenValue, + AlwaysGoodOptimizations.ConstantInlinedShifting, AlwaysGoodOptimizations.ShiftingJustWrittenValue, AlwaysGoodOptimizations.PointlessAccumulatorShifting, AlwaysGoodOptimizations.ReverseFlowAnalysis, @@ -175,6 +177,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.BranchInPlaceRemoval, AlwaysGoodOptimizations.CarryFlagConversion, DangerousOptimizations.ConstantIndexOffsetPropagation, + AlwaysGoodOptimizations.ConstantInlinedShifting, AlwaysGoodOptimizations.CommonBranchBodyOptimization, AlwaysGoodOptimizations.CommonExpressionInConditional, AlwaysGoodOptimizations.CommonIndexSubexpressionElimination, diff --git a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala index 75da9f38..8d741ad8 100644 --- a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala @@ -2483,4 +2483,29 @@ object AlwaysGoodOptimizations { AssemblyLine.immediate(ORA, 1) :: code.init }, ) + + val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant inlined shifting", + needsFlowInfo = FlowInfoRequirement.BothFlows, + + // TODO: set limits on the loop iteration to avoid huge unrolled code + + (Elidable & HasOpcode(LABEL) & MatchX(1) & MatchParameter(2)) ~ + (Elidable & HasOpcodeIn(ASL, LSR, ROL, ROR, DEC, INC) & Not(ConcernsX)).*.capture(5) ~ + (Elidable & HasOpcode(DEX)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(2)) ~~> { (code, ctx) => + val iters = ctx.get[Int](1) + val shift = ctx.get[List[AssemblyLine]](5) + List.fill(iters)(shift).flatten :+ AssemblyLine.immediate(LDX, 0) + }, + + (Elidable & HasOpcode(LABEL) & MatchY(1) & MatchParameter(2))~ + (Elidable & HasOpcodeIn(ASL, LSR, ROL, ROR, DEC, INC) & Not(ConcernsY)).*.capture(5) ~ + (Elidable & HasOpcode(DEY)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(2)) ~~> { (code, ctx) => + val iters = ctx.get[Int](1) + val shift = ctx.get[List[AssemblyLine]](5) + List.fill(iters)(shift).flatten :+ AssemblyLine.immediate(LDY, 0) + }, + + ) } diff --git a/src/main/scala/millfork/assembly/mos/opt/CoarseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/mos/opt/CoarseFlowAnalyzer.scala index ac3d1423..5e19575e 100644 --- a/src/main/scala/millfork/assembly/mos/opt/CoarseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/mos/opt/CoarseFlowAnalyzer.scala @@ -5,7 +5,7 @@ import millfork.{CompilationFlag, CompilationOptions} import millfork.assembly.mos.AssemblyLine import millfork.assembly.mos.OpcodeClasses import millfork.assembly.opt.AnyStatus -import millfork.env.{Label, MemoryAddressConstant, NormalFunction, NumericConstant} +import millfork.env._ /** * @author Karol Stasiak @@ -72,6 +72,14 @@ object CoarseFlowAnalyzer { case AssemblyLine(op, Immediate | WordImmediate, NumericConstant(nn, _), _) if FlowAnalyzerForImmediate.hasDefinition(op) => currentStatus = FlowAnalyzerForImmediate.get(op)(nn.toInt, currentStatus) + case AssemblyLine(op, _, MemoryAddressConstant(th: Thing), _) + if th.name == "__reg" && FlowAnalyzerForTheRest.hasDefinition(op) => + currentStatus = FlowAnalyzerForTheRest.get(op)(currentStatus, Some(0)) + + case AssemblyLine(op, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th: Thing), NumericConstant(n, _)), _) + if th.name == "__reg" && FlowAnalyzerForTheRest.hasDefinition(op) => + currentStatus = FlowAnalyzerForTheRest.get(op)(currentStatus, Some(n.toInt)) + case AssemblyLine(op, _, _, _) if FlowAnalyzerForTheRest.hasDefinition(op) => currentStatus = FlowAnalyzerForTheRest.get(op)(currentStatus, None) diff --git a/src/main/scala/millfork/assembly/mos/opt/CpuStatus.scala b/src/main/scala/millfork/assembly/mos/opt/CpuStatus.scala index ae59fe75..e20a5e43 100644 --- a/src/main/scala/millfork/assembly/mos/opt/CpuStatus.scala +++ b/src/main/scala/millfork/assembly/mos/opt/CpuStatus.scala @@ -193,7 +193,7 @@ case class CpuStatus(a: Status[Int] = UnknownStatus, case Some(1) => this.copy(r1 = status) case Some(2) => this.copy(r2 = status) case Some(3) => this.copy(r3 = status) - case None => this + case _ => this } } } diff --git a/src/main/scala/millfork/assembly/mos/opt/ReverseFlowAnalyzerPerImpliedOpcode.scala b/src/main/scala/millfork/assembly/mos/opt/ReverseFlowAnalyzerPerImpliedOpcode.scala index 37c5a974..9ccf977c 100644 --- a/src/main/scala/millfork/assembly/mos/opt/ReverseFlowAnalyzerPerImpliedOpcode.scala +++ b/src/main/scala/millfork/assembly/mos/opt/ReverseFlowAnalyzerPerImpliedOpcode.scala @@ -12,7 +12,8 @@ object ReverseFlowAnalyzerPerImpiedOpcode { a = Important, ah = Important, x = Important, y = Important, iz = Important, c = Important, v = Important, d = Important, z = Important, n = Important, - m = Important, w = Important) + m = Important, w = Important, + r0 = Unimportant, r1 = Unimportant, r2 = Unimportant, r3 = Unimportant) private def allAddingOutputsUnimportant(currentImportance: CpuImportance): Boolean = currentImportance.a == Unimportant && diff --git a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala index 441c5c6c..c8026175 100644 --- a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala +++ b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala @@ -1116,7 +1116,7 @@ case class CallsAnyExcept(identifiers: Set[String]) extends TrivialAssemblyLineP (line.addrMode == AddrMode.Absolute || line.addrMode == AddrMode.LongAbsolute || line.addrMode == AddrMode.LongRelative) && (line.parameter match { - case MemoryAddressConstant(th) => !identifiers(th.name) + case MemoryAddressConstant(th) => th.name.head != '.' && !identifiers(th.name) case _ => false }) } diff --git a/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala index 63a2d9fb..a7b4b5fd 100644 --- a/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala @@ -4,7 +4,6 @@ import millfork.assembly.mos.Opcode._ import millfork.assembly.mos.AddrMode._ import millfork.assembly.AssemblyOptimization import millfork.assembly.mos.{AssemblyLine, Opcode, State} -import millfork.env.{CompoundConstant, Constant, MathOperator} import millfork.DecimalUtils.asDecimal /** * @author Karol Stasiak @@ -17,6 +16,25 @@ object ZeropageRegisterOptimizations { "__sbc_decimal" -> Set(2, 3), "__sub_decimal" -> Set(2, 3)) + val ConstantInlinedMultiplication = new RuleBasedAssemblyOptimization("Constant inlined multiplication", + needsFlowInfo = FlowInfoRequirement.BothFlows, + (Elidable & HasOpcode(LDA) & HasImmediate(0) & MatchZpReg(4, 0) & MatchZpReg(5, 1)) ~ + (Elidable & HasOpcodeIn(JMP, BEQ) & MatchParameter(13)) ~ + (Elidable & HasOpcode(LABEL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(CLC)) ~ + (Elidable & HasOpcode(ADC) & RefersTo("__reg", 0)) ~ + (Elidable & HasOpcode(LABEL) & MatchParameter(12)) ~ + (Elidable & HasOpcode(ASL) & RefersTo("__reg", 0)) ~ + (Elidable & HasOpcode(LABEL) & MatchParameter(13)) ~ + (Elidable & HasOpcode(LSR) & RefersTo("__reg", 1)) ~ + (Elidable & HasOpcode(BCS) & MatchParameter(11)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(12) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.V)) ~ + (Not(RefersTo("__reg")) & DoesntMatterWhatItDoesWithReg(0) & DoesntMatterWhatItDoesWithReg(1)) ~~> { (code, ctx) => + val product = ctx.get[Int](4) * ctx.get[Int](5) + List(AssemblyLine.immediate(LDA, product & 0xff), AssemblyLine.implied(CLC), code.last) + }, + ) + val ConstantMultiplication = new RuleBasedAssemblyOptimization("Constant multiplication", needsFlowInfo = FlowInfoRequirement.ForwardFlow, (HasOpcode(STA) & RefersTo("__reg", 0) & MatchAddrMode(0) & MatchParameter(1) & MatchA(4)) ~ @@ -125,27 +143,35 @@ object ZeropageRegisterOptimizations { needsFlowInfo = FlowInfoRequirement.NoRequirement, (Elidable & HasOpcode(STA) & RefersTo("__reg", 0) & MatchAddrMode(0) & MatchParameter(1)) ~ (LinearOrLabel & DoesNotConcernMemoryAt(0, 1)).* ~ - (HasOpcodeIn(Set(RTS, RTL)) | CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(0)).keySet)) ~~> (_.tail), + (HasOpcodeIn(Set(RTS, RTL)) | HasOpcodeIn(JSR, JMP) & CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(0)).keySet)) ~~> (_.tail), (Elidable & HasOpcode(STA) & RefersTo("__reg", 1) & MatchAddrMode(0) & MatchParameter(1)) ~ (LinearOrLabel & DoesNotConcernMemoryAt(0, 1)).* ~ - (HasOpcodeIn(Set(RTS, RTL)) | CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(1)).keySet)) ~~> (_.tail), + (HasOpcodeIn(Set(RTS, RTL)) | HasOpcodeIn(JSR, JMP) & CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(1)).keySet)) ~~> (_.tail), (Elidable & HasOpcode(STA) & RefersTo("__reg", 2) & MatchAddrMode(0) & MatchParameter(1)) ~ (LinearOrLabel & DoesNotConcernMemoryAt(0, 1)).* ~ - (HasOpcodeIn(Set(RTS, RTL)) | CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(2)).keySet)) ~~> (_.tail), + (HasOpcodeIn(Set(RTS, RTL)) | HasOpcodeIn(JSR, JMP) & CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(2)).keySet)) ~~> (_.tail), (Elidable & HasOpcode(STA) & RefersTo("__reg", 3) & MatchAddrMode(0) & MatchParameter(1)) ~ (LinearOrLabel & DoesNotConcernMemoryAt(0, 1)).* ~ - (HasOpcodeIn(Set(RTS, RTL)) | CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(3)).keySet)) ~~> (_.tail), + (HasOpcodeIn(Set(RTS, RTL)) | HasOpcodeIn(JSR, JMP) & CallsAnyExcept(functionsThatUsePseudoregisterAsInput.filter(_._2.contains(3)).keySet)) ~~> (_.tail), ) val DeadRegStoreFromFlow = new RuleBasedAssemblyOptimization("Dead zeropage register store from flow", needsFlowInfo = FlowInfoRequirement.BothFlows, - (Elidable & HasOpcode(STA) & RefersTo("__reg", 0) & DoesntMatterWhatItDoesWithReg(0)) ~~> (_.tail), - (Elidable & HasOpcode(STA) & RefersTo("__reg", 1) & DoesntMatterWhatItDoesWithReg(1)) ~~> (_.tail), - (Elidable & HasOpcode(STA) & RefersTo("__reg", 2) & DoesntMatterWhatItDoesWithReg(2)) ~~> (_.tail), - (Elidable & HasOpcode(STA) & RefersTo("__reg", 3) & DoesntMatterWhatItDoesWithReg(3)) ~~> (_.tail), + (Elidable & HasOpcodeIn(STA, STX, SAX, STY, STZ) & RefersTo("__reg", 0) & DoesntMatterWhatItDoesWithReg(0)) ~~> (_.tail), + (Elidable & HasOpcodeIn(STA, STX, SAX, STY, STZ) & RefersTo("__reg", 1) & DoesntMatterWhatItDoesWithReg(1)) ~~> (_.tail), + (Elidable & HasOpcodeIn(STA, STX, SAX, STY, STZ) & RefersTo("__reg", 2) & DoesntMatterWhatItDoesWithReg(2)) ~~> (_.tail), + (Elidable & HasOpcodeIn(STA, STX, SAX, STY, STZ) & RefersTo("__reg", 3) & DoesntMatterWhatItDoesWithReg(3)) ~~> (_.tail), + (Elidable & HasOpcodeIn(ROL, ROR, ASL, LSR) & RefersTo("__reg", 0) & DoesntMatterWhatItDoesWithReg(0) & DoesntMatterWhatItDoesWith(State.C, State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(ROL, ROR, ASL, LSR) & RefersTo("__reg", 1) & DoesntMatterWhatItDoesWithReg(1) & DoesntMatterWhatItDoesWith(State.C, State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(ROL, ROR, ASL, LSR) & RefersTo("__reg", 2) & DoesntMatterWhatItDoesWithReg(2) & DoesntMatterWhatItDoesWith(State.C, State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(ROL, ROR, ASL, LSR) & RefersTo("__reg", 3) & DoesntMatterWhatItDoesWithReg(3) & DoesntMatterWhatItDoesWith(State.C, State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(INC, DEC) & RefersTo("__reg", 0) & DoesntMatterWhatItDoesWithReg(0) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(INC, DEC) & RefersTo("__reg", 1) & DoesntMatterWhatItDoesWithReg(1) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(INC, DEC) & RefersTo("__reg", 2) & DoesntMatterWhatItDoesWithReg(2) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~~> (_.tail), + (Elidable & HasOpcodeIn(INC, DEC) & RefersTo("__reg", 3) & DoesntMatterWhatItDoesWithReg(3) & DoesntMatterWhatItDoesWith(State.N, State.Z)) ~~> (_.tail), (Elidable & HasOpcode(LDY) & RefersTo("__reg", 0)) ~ (Linear & Not(ConcernsY) & Not(RefersToOrUses("__reg", 0))).*.capture(2) ~ @@ -238,9 +264,20 @@ object ZeropageRegisterOptimizations { }) ) + val LoadingKnownValue = new RuleBasedAssemblyOptimization("Loading known value from register", + needsFlowInfo = FlowInfoRequirement.ForwardFlow, + MultipleAssemblyRules((0 to 4).map{ zregIndex => + (Elidable & HasOpcodeIn(LDA, ADC, SBC, CMP, EOR, AND, ORA, LDX, LDY, CPX, CPY) & RefersToOrUses("__reg", zregIndex) & MatchZpReg(1, zregIndex)) ~~> { (code, ctx) => + List(AssemblyLine.immediate(code.head.opcode, ctx.get[Int](1))) + } + }) + ) + val All: List[AssemblyOptimization[AssemblyLine]] = List( ConstantDecimalMath, ConstantMultiplication, + ConstantInlinedMultiplication, + LoadingKnownValue, DeadRegStore, DeadRegStoreFromFlow, PointlessLoad, diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala index 82c6be73..b401cd81 100644 --- a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala +++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala @@ -344,11 +344,11 @@ object AlwaysGoodI80Optimizations { val SimplifiableMaths = new RuleBasedAssemblyOptimization("Simplifiable maths", needsFlowInfo = FlowInfoRequirement.BothFlows, - for6Registers(register => + for7Registers(register => (Elidable & HasOpcode(ADD) & MatchRegister(ZRegister.A, 0) & HasRegisterParam(register) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlags) ~~> ((code, ctx) => List(ZLine.ldImm8(ZRegister.A, (ctx.get[Int](0) + ctx.get[Int](1)) & 0xff))), ), - for6Registers(register => + for7Registers(register => (Elidable & HasOpcode(ADD) & MatchRegister(ZRegister.A, 0) & HasRegisterParam(register) & MatchRegister(register, 1)) ~ (Elidable & HasOpcode(DAA) & DoesntMatterWhatItDoesWithFlags) ~~> {(code, ctx) => List(ZLine.ldImm8(ZRegister.A, asDecimal(ctx.get[Int](0) & 0xff, ctx.get[Int](1) & 0xff, _ + _).toInt & 0xff)) @@ -829,7 +829,7 @@ object AlwaysGoodI80Optimizations { val UnusedCodeRemoval = new RuleBasedAssemblyOptimization("Unreachable code removal", needsFlowInfo = FlowInfoRequirement.NoRequirement, - (HasOpcodeIn(Set(JP, JR)) & HasRegisters(NoRegisters)) ~ (Not(HasOpcode(LABEL)) & Elidable).+ ~~> (c => c.head :: Nil) + (HasOpcodeIn(Set(JP, JR)) & IsUnconditional) ~ (Not(HasOpcode(LABEL)) & Elidable).+ ~~> (c => c.head :: Nil) ) val UnusedLabelRemoval = new RuleBasedAssemblyOptimization("Unused label removal", @@ -1049,7 +1049,9 @@ object AlwaysGoodI80Optimizations { val ConstantMultiplication = new RuleBasedAssemblyOptimization("Constant multiplication", needsFlowInfo = FlowInfoRequirement.BothFlows, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & MatchRegister(ZRegister.A, 4) & MatchRegister(ZRegister.D, 5) & DoesntMatterWhatItDoesWithFlags @@ -1058,69 +1060,91 @@ object AlwaysGoodI80Optimizations { List(ZLine.ldImm8(ZRegister.A, product)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & (HasRegister(ZRegister.D, 0) | HasRegister(ZRegister.A, 0)) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.ldImm8(ZRegister.A, 0)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.D, 1) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => Nil }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.D, 2) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.D, 4) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.D, 8) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.D, 16) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.A, 1) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.ld8(ZRegister.A, ZRegister.D)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.A, 2) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.ld8(ZRegister.A, ZRegister.D), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.A, 4) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.ld8(ZRegister.A, ZRegister.D), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.A, 8) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(ZLine.ld8(ZRegister.A, ZRegister.D), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A), ZLine.register(ADD, ZRegister.A)) }, - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) + & IsUnconditional + & RefersTo("__mul_u8u8u8", 0) & HasRegister(ZRegister.A, 16) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => @@ -1129,20 +1153,126 @@ object AlwaysGoodI80Optimizations { (Elidable & Is8BitLoad(D, A)) ~ (Elidable & Is8BitLoad(A, IMM_8)) ~ - (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + (Elidable & HasOpcode(CALL) & IsUnconditional & RefersTo("__mul_u8u8u8", 0) & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => List(code(1).copy(registers = TwoRegisters(D, IMM_8)), code(2)) }, ) + val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant multiplication", + needsFlowInfo = FlowInfoRequirement.BothFlows, + + // TODO: set limits on the loop iteration to avoid huge unrolled code + // TODO: non-Z80 code without DJNZ + + (Elidable & IsLabelMatching(2) & MatchRegister(ZRegister.B, 1)) ~ + Where(ctx => ctx.get[Int](1) > 0) ~ + (Elidable & HasOpcodeIn(Set(ADD, SLA, SRL, SLL, RLC, RLCA, RRC, RRCA, RR, RL, RLA, RRA)) & Not(HasRegisterParam(ZRegister.B))).*.capture(5) ~ + (Elidable & HasOpcode(DJNZ) & MatchJumpTarget(2) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + val iter = ctx.get[Int](1) + val code = ctx.get[List[ZLine]](5) + List.fill(iter)(code).flatten :+ ZLine.ldImm8(ZRegister.B, 0) + }, + + (Elidable & HasOpcodeIn(Set(JP, JR)) & MatchJumpTarget(3) & IsUnconditional & MatchRegister(ZRegister.B, 1)) ~ + DebugMatching ~ + (Elidable & IsLabelMatching(2)) ~ + (Elidable & HasOpcodeIn(Set(ADD, SLA, SRL, SLL, RLC, RLCA, RRC, RRCA, RR, RL, RLA, RRA)) & Not(HasRegisterParam(ZRegister.B))).*.capture(5) ~ + (Elidable & IsLabelMatching(3)) ~ + (Elidable & HasOpcode(DJNZ) & MatchJumpTarget(2) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + val iter = ctx.get[Int](1).-(1).&(0xff) + val code = ctx.get[List[ZLine]](5) + List.fill(iter)(code).flatten :+ ZLine.ldImm8(ZRegister.B, 0) + }, + + ) + + val ShiftingKnownValue = new RuleBasedAssemblyOptimization("Shifting known value", + needsFlowInfo = FlowInfoRequirement.BothFlows, + + for7Registers(register => + (Elidable & HasOpcode(SLA) & HasRegisterParam(register) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlags) ~~> {(code,ctx) => + val value = ctx.get[Int](1) + List(ZLine.ldImm8(register, value.<<(1).&(0xff))) + } + ), + + (Elidable & HasOpcode(ADD) & HasRegisterParam(ZRegister.A) & MatchRegister(ZRegister.A, 1) & DoesntMatterWhatItDoesWithFlags) ~~> {(code,ctx) => + val value = ctx.get[Int](1) + List(ZLine.ldImm8(ZRegister.A, value.<<(1).&(0xff))) + }, + + (Elidable & HasOpcode(ADD_16) & HasRegisterParam(ZRegister.HL) & MatchRegister(ZRegister.HL, 1) & DoesntMatterWhatItDoesWithFlags) ~~> {(code,ctx) => + val value = ctx.get[Int](1) + List(ZLine.ldImm16(ZRegister.HL, value.<<(1).&(0xffff))) + }, + + for7Registers(register => + (Elidable & HasOpcode(SLA) & HasRegisterParam(register) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> {(code,ctx) => + val value = ctx.get[Int](1) + if (value.&(0x80) != 0) { + List(ZLine.ldImm8(register, value.<<(1).&(0xff)), ZLine.implied(SCF)) + } else { + List(ZLine.ldImm8(register, value.<<(1).&(0xff)), ZLine.register(OR, ZRegister.A)) + } + } + ), + + for7Registers(register => + (Elidable & HasOpcode(RL) & HasRegisterParam(register) & HasSet(ZFlag.C) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + val value = ctx.get[Int](1) + List(ZLine.ldImm8(register, value.<<(1).&(0xff).+(1))) + } + ), + + for7Registers(register => + (Elidable & HasOpcode(RL) & HasRegisterParam(register) & HasClear(ZFlag.C) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlags) ~~> { (code, ctx) => + val value = ctx.get[Int](1) + List(ZLine.ldImm8(register, value.<<(1).&(0xff))) + } + ), + + for7Registers(register => + (Elidable & HasOpcode(RL) & HasRegisterParam(register) & HasSet(ZFlag.C) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> { (code, ctx) => + val value = ctx.get[Int](1) + if (value.&(0x80) != 0) { + List(ZLine.ldImm8(register, value.<<(1).&(0xff).+(1)), ZLine.implied(SCF)) + } else { + List(ZLine.ldImm8(register, value.<<(1).&(0xff).+(1)), ZLine.register(OR, ZRegister.A)) + } + } + ), + + for7Registers(register => + (Elidable & HasOpcode(RL) & HasRegisterParam(register) & HasClear(ZFlag.C) & MatchRegister(register, 1) & DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> { (code, ctx) => + val value = ctx.get[Int](1) + if (value.&(0x80) != 0) { + List(ZLine.ldImm8(register, value.<<(1).&(0xff)), ZLine.implied(SCF)) + } else { + List(ZLine.ldImm8(register, value.<<(1).&(0xff)), ZLine.register(OR, ZRegister.A)) + } + } + ), + ) + + val PointlessFlagChange = new RuleBasedAssemblyOptimization("Pointless flag change", + needsFlowInfo = FlowInfoRequirement.BackwardFlow, + (Elidable & HasOpcode(SCF) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil), + (Elidable & HasOpcode(CCF) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil), + (Elidable & HasOpcodeIn(Set(OR, AND)) & HasRegisterParam(ZRegister.A) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil), + ) + val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]]( BranchInPlaceRemoval, ConstantMultiplication, + ConstantInlinedShifting, FreeHL, PointlessArithmetic, + PointlessFlagChange, PointlessLoad, PointlessStackStashing, ReloadingKnownValueFromMemory, + ShiftingKnownValue, SimplifiableMaths, SimplifiableShifting, UnusedCodeRemoval, diff --git a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala index a61d9e9e..240f0fbd 100644 --- a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala @@ -86,17 +86,17 @@ object CoarseFlowAnalyzer { nf = Status.SingleTrue, cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) case ZLine(AND, OneRegister(s), _, _) => currentStatus = currentStatus.copy(a = (currentStatus.a <*> currentStatus.getRegister(s)) ((m, n) => (m & n) & 0xff), - nf = Status.SingleFalse, cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) + nf = Status.SingleFalse, cf = Status.SingleFalse, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) case ZLine(OR, OneRegister(ZRegister.A), _, _) => currentStatus = currentStatus.copy(nf = Status.SingleFalse, cf = Status.SingleFalse, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) case ZLine(XOR, OneRegister(ZRegister.A), _, _) => currentStatus = currentStatus.copy(a = Status.SingleZero, nf = Status.SingleFalse, cf = Status.SingleFalse, zf = Status.SingleTrue, sf = Status.SingleFalse, pf = AnyStatus, hf = AnyStatus) case ZLine(OR, OneRegister(s), _, _) => currentStatus = currentStatus.copy(a = (currentStatus.a <*> currentStatus.getRegister(s)) ((m, n) => (m | n) & 0xff), - nf = Status.SingleFalse, cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) + nf = Status.SingleFalse, cf = Status.SingleFalse, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) case ZLine(XOR, OneRegister(s), _, _) => currentStatus = currentStatus.copy(a = (currentStatus.a <*> currentStatus.getRegister(s)) ((m, n) => (m ^ n) & 0xff), - nf = Status.SingleFalse, cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) + nf = Status.SingleFalse, cf = Status.SingleFalse, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) case ZLine(INC, OneRegister(r), _, _) => currentStatus = currentStatus. @@ -162,8 +162,10 @@ object CoarseFlowAnalyzer { zf = AnyStatus, pf = AnyStatus, hf = Status.SingleFalse) - case ZLine(SCF, _, _, _) => currentStatus.copy(cf = Status.SingleTrue, hf = Status.SingleFalse, nf = Status.SingleFalse) - case ZLine(CCF, _, _, _) => currentStatus.copy(cf = currentStatus.cf.negate, hf = AnyStatus, nf = AnyStatus) + case ZLine(SCF, _, _, _) => + currentStatus = currentStatus.copy(cf = Status.SingleTrue, hf = Status.SingleFalse, nf = Status.SingleFalse) + case ZLine(CCF, _, _, _) => + currentStatus = currentStatus.copy(cf = currentStatus.cf.negate, hf = AnyStatus, nf = AnyStatus) case ZLine(opcode, registers, _, _) => currentStatus = currentStatus.copy(cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) diff --git a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala index a7d75b9d..628c9622 100644 --- a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala @@ -231,7 +231,7 @@ object ReverseFlowAnalyzer { case ZLine(DISCARD_A, _, _, _) => currentImportance = currentImportance.copy(a = Unimportant) case ZLine(DISCARD_F, _, _, _) => - currentImportance = currentImportance.copy(cf = Unimportant, zf = Unimportant, sf = Unimportant, pf = Unimportant, hf = Unimportant) + currentImportance = currentImportance.copy(cf = Unimportant, zf = Unimportant, sf = Unimportant, pf = Unimportant, hf = Unimportant, nf = Unimportant) case ZLine(LD, TwoRegistersOffset(t, s, o), _, _) => currentImportance = currentImportance.butWritesRegister(t, o).butReadsRegister(s, o) case ZLine(LD | LD_16, TwoRegisters(t, s), _, _) => @@ -448,6 +448,8 @@ object ReverseFlowAnalyzer { currentImportance = currentImportance.butReadsRegister(r).copy(cf = Unimportant, zf = Unimportant, hf = Unimportant, nf = Unimportant, pf = Unimportant) case ZLine(RLA | RRA | RLCA | RRCA, _, _, _) => currentImportance = currentImportance.butReadsRegister(ZRegister.A).copy(cf = Important, hf = Unimportant, nf = Unimportant) + case ZLine(SCF, _, _, _) => + currentImportance = currentImportance.copy(cf = Unimportant, hf = Unimportant, nf = Unimportant) case _ => currentImportance = finalImportance // TODO } diff --git a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala index 22b77dd7..9cfc1a0f 100644 --- a/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala +++ b/src/main/scala/millfork/assembly/z80/opt/RuleBasedAssemblyOptimization.scala @@ -553,6 +553,11 @@ case class MatchParameter(i: Int) extends AssemblyLinePattern { } } +case class IsLabelMatching(i: Int) extends AssemblyLinePattern { + override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = + line.opcode == ZOpcode.LABEL && ctx.addObject(i, line.parameter.quickSimplify) +} + case class MatchParameterOrNothing(i: Int) extends AssemblyLinePattern { override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = ctx.addObject(i, line.parameter.quickSimplify) @@ -566,6 +571,11 @@ case class MatchJumpTarget(i: Int) extends AssemblyLinePattern { } } +case object IsUnconditional extends AssemblyLinePattern { + override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: ZLine): Boolean = + line.registers == NoRegisters +} + case class MatchConstantInHL(i: Int) extends AssemblyLinePattern { override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit = FlowInfoRequirement.assertForward(needsFlowInfo) diff --git a/src/main/scala/millfork/node/CallGraph.scala b/src/main/scala/millfork/node/CallGraph.scala index 4e6815db..c4fc9b37 100644 --- a/src/main/scala/millfork/node/CallGraph.scala +++ b/src/main/scala/millfork/node/CallGraph.scala @@ -100,6 +100,14 @@ abstract class CallGraph(program: Program, log: Logger) { everCalledFunctions ++= entryPoints callEdges.filter(e => entryPoints.contains(e._1)).foreach(e => everCalledFunctions += e._2) multiaccessibleFunctions ++= callEdges.filter(e => entryPoints.contains(e._1)).map(_._2).groupBy(identity).filter(p => p._2.size > 1).keys + for { + operator <- everCalledFunctions + if operator.nonEmpty && operator.head != '_' && !operator.head.isLetterOrDigit + internal <- allFunctions + if internal.startsWith("__") + } { + callEdges += operator -> internal + } if (log.traceEnabled) { log.trace("Call edges:") diff --git a/src/main/scala/millfork/node/Node.scala b/src/main/scala/millfork/node/Node.scala index 42058c0d..ffe940a3 100644 --- a/src/main/scala/millfork/node/Node.scala +++ b/src/main/scala/millfork/node/Node.scala @@ -118,6 +118,7 @@ sealed class NiceFunctionProperty(override val toString: String) object NiceFunctionProperty { case object DoesntReadMemory extends NiceFunctionProperty("MR") case object DoesntWriteMemory extends NiceFunctionProperty("MW") + case object IsLeaf extends NiceFunctionProperty("LEAF") } object MosNiceFunctionProperty { diff --git a/src/main/scala/millfork/output/AbstractAssembler.scala b/src/main/scala/millfork/output/AbstractAssembler.scala index faeee935..c8b48a32 100644 --- a/src/main/scala/millfork/output/AbstractAssembler.scala +++ b/src/main/scala/millfork/output/AbstractAssembler.scala @@ -10,6 +10,7 @@ import millfork.assembly.z80.ZLine import scala.collection.mutable import DecimalUtils._ +import millfork.node.NiceFunctionProperty.IsLeaf /** * @author Karol Stasiak @@ -186,7 +187,7 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program else 1.2) val potentiallyInlineable: Map[String, Int] = inliningResult.potentiallyInlineableFunctions - var nonInlineableFunctions: Set[String] = inliningResult.nonInlineableFunctions + var functionsThatCanBeCalledFromInlinedFunctions: Set[String] = inliningResult.nonInlineableFunctions env.allocateVariables(None, mem, callGraph, variableAllocators, options, labelMap.put, 1, forZpOnly = true) env.allocateVariables(None, mem, callGraph, variableAllocators, options, labelMap.put, 2, forZpOnly = true) @@ -202,15 +203,21 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program val strippedCodeForInlining = for { limit <- potentiallyInlineable.get(f) if code.map(_.sizeInBytes).sum <= limit - s <- inliningCalculator.codeForInlining(f, nonInlineableFunctions, code) + s <- inliningCalculator.codeForInlining(f, functionsThatCanBeCalledFromInlinedFunctions, code) } yield s strippedCodeForInlining match { case Some(c) => log.debug("Inlining " + f, function.position) inlinedFunctions += f -> c + val tmp = mutable.Set[(NiceFunctionProperty, String)]() + gatherNiceFunctionProperties(tmp, f, c) + if (tmp.exists(_._1 == IsLeaf)) { + functionsThatCanBeCalledFromInlinedFunctions += function.name + } compiledFunctions(f) = NonexistentFunction() case None => - nonInlineableFunctions += function.name + log.trace("Not inlining " + f, function.position) + functionsThatCanBeCalledFromInlinedFunctions += function.name compiledFunctions(f) = NormalCompiledFunction(function.declaredBank.getOrElse(platform.defaultCodeBank), code, function.address.isDefined, function.alignment) optimizedCodeSize += code.map(_.sizeInBytes).sum if (options.flag(CompilationFlag.InterproceduralOptimization)) { diff --git a/src/main/scala/millfork/output/AbstractInliningCalculator.scala b/src/main/scala/millfork/output/AbstractInliningCalculator.scala index e68bea65..1e9daf3f 100644 --- a/src/main/scala/millfork/output/AbstractInliningCalculator.scala +++ b/src/main/scala/millfork/output/AbstractInliningCalculator.scala @@ -12,8 +12,11 @@ import scala.collection.mutable /** * @author Karol Stasiak */ + +case class InliningResult(potentiallyInlineableFunctions: Map[String, Int], nonInlineableFunctions: Set[String]) + abstract class AbstractInliningCalculator[T <: AbstractCode] { - def codeForInlining(fname: String, functionsAlreadyKnownToBeNonInlineable: Set[String], code: List[T]): Option[List[T]] + def codeForInlining(fname: String, functionsThatCanBeCalledFromInlinedFunctions: Set[String], code: List[T]): Option[List[T]] def inline(code: List[T], inlinedFunctions: Map[String, List[T]], jobContext: JobContext): List[T] private val sizes = Seq(64, 64, 8, 6, 5, 5, 4) diff --git a/src/main/scala/millfork/output/MosAssembler.scala b/src/main/scala/millfork/output/MosAssembler.scala index d0fd913d..0fe79a54 100644 --- a/src/main/scala/millfork/output/MosAssembler.scala +++ b/src/main/scala/millfork/output/MosAssembler.scala @@ -154,15 +154,20 @@ class MosAssembler(program: Program, case AssemblyLine(op, _, _, _) => !OpcodeClasses.ReadsD(op) && !OpcodeClasses.OverwritesD(op) } genericPropertyScan(DoesntReadMemory) { - case AssemblyLine(op, _, Implied | Immediate | WordImmediate, _) => true + case AssemblyLine(op, Implied | Immediate | WordImmediate, _, _) => true case AssemblyLine(op, _, _, _) if OpcodeClasses.ReadsMemoryIfNotImpliedOrImmediate(op) => false case _ => true } genericPropertyScan(DoesntWriteMemory) { - case AssemblyLine(op, _, Implied | Immediate | WordImmediate, _) => true + case AssemblyLine(op, Implied | Immediate | WordImmediate, _, _) => true case AssemblyLine(op, _, _, _) if OpcodeClasses.ChangesMemoryIfNotImplied(op) || OpcodeClasses.ChangesMemoryAlways(op) => false case _ => true } + genericPropertyScan(IsLeaf) { + case AssemblyLine(JSR | BSR, Implied | Immediate | WordImmediate, _, _) => false + case AssemblyLine(JMP, Absolute, th:Thing, _) => th.name.startsWith(".") + case _ => true + } } override def bytePseudoopcode: String = "!byte" diff --git a/src/main/scala/millfork/output/MosInliningCalculator.scala b/src/main/scala/millfork/output/MosInliningCalculator.scala index 67606ba7..5f195d41 100644 --- a/src/main/scala/millfork/output/MosInliningCalculator.scala +++ b/src/main/scala/millfork/output/MosInliningCalculator.scala @@ -13,8 +13,6 @@ import scala.collection.mutable * @author Karol Stasiak */ -case class InliningResult(potentiallyInlineableFunctions: Map[String, Int], nonInlineableFunctions: Set[String]) - object MosInliningCalculator extends AbstractInliningCalculator[AssemblyLine] { private val sizes = Seq(64, 64, 8, 6, 5, 5, 4) @@ -22,22 +20,42 @@ object MosInliningCalculator extends AbstractInliningCalculator[AssemblyLine] { private val badOpcodes = Set(RTI, RTS, JSR, BRK, RTL, BSR, BYTE) ++ OpcodeClasses.ChangesStack private val jumpingRelatedOpcodes = Set(LABEL, JMP) ++ OpcodeClasses.ShortBranching - def codeForInlining(fname: String, functionsAlreadyKnownToBeNonInlineable: Set[String], code: List[AssemblyLine]): Option[List[AssemblyLine]] = { + def codeForInlining(fname: String, functionsThatCanBeCalledFromInlinedFunctions: Set[String], code: List[AssemblyLine]): Option[List[AssemblyLine]] = { if (code.isEmpty) return None - val lastOpcode = code.last.opcode - if (lastOpcode != RTS && lastOpcode != RTL) return None + val localLabels = code.flatMap{ + case AssemblyLine(LABEL, _, MemoryAddressConstant(Label(l)), _) => Some(l) + case _ => None + } + val lastLineOfCode = code.last + lastLineOfCode match { + case AssemblyLine(RTS | RTL, _, _, _) => + case AssemblyLine(JMP, AddrMode.Absolute, _, _) => + case _ => return None + } var result = code.init + if (lastLineOfCode.opcode == JMP) { + result = result :+ lastLineOfCode.copy(opcode = JSR) + } while (result.nonEmpty && OpcodeClasses.NoopDiscardsFlags(result.last.opcode)) { result = result.init } if (result.head.opcode == LABEL && result.head.parameter == Label(fname).toAddress) result = result.tail if (result.exists{ case AssemblyLine(op, AddrMode.Absolute | AddrMode.Relative | AddrMode.DoesNotExist, MemoryAddressConstant(Label(l)), _) if jumpingRelatedOpcodes(op) => - !l.startsWith(".") - case AssemblyLine(JSR, AddrMode.Absolute, MemoryAddressConstant(th:ExternFunction), _) => false + if (!localLabels.contains(l) && !l.startsWith(".")) { + println("Bad jump " + l) + true + } else false + case AssemblyLine(JSR, AddrMode.Absolute, MemoryAddressConstant(th:ExternFunction), _) => + false case AssemblyLine(JSR, AddrMode.Absolute, MemoryAddressConstant(th:NormalFunction), _) => - !functionsAlreadyKnownToBeNonInlineable(th.name) - case AssemblyLine(op, _, _, _) if jumpingRelatedOpcodes(op) || badOpcodes(op) => true + if(!functionsThatCanBeCalledFromInlinedFunctions(th.name)){ + println("Bad call " + th) + true + } else false + case AssemblyLine(op, _, _, _) if jumpingRelatedOpcodes(op) || badOpcodes(op) => + println("Bad opcode " + op) + true case _ => false }) return None Some(result) diff --git a/src/main/scala/millfork/output/Z80InliningCalculator.scala b/src/main/scala/millfork/output/Z80InliningCalculator.scala index 7f54c1f3..324f08c4 100644 --- a/src/main/scala/millfork/output/Z80InliningCalculator.scala +++ b/src/main/scala/millfork/output/Z80InliningCalculator.scala @@ -17,7 +17,7 @@ object Z80InliningCalculator extends AbstractInliningCalculator[ZLine] { private val badOpcodes = Set(RET, RETI, RETN, CALL, BYTE, POP, PUSH) private val jumpingRelatedOpcodes = Set(LABEL, JP, JR) - override def codeForInlining(fname: String, functionsAlreadyKnownToBeNonInlineable: Set[String], code: List[ZLine]): Option[List[ZLine]] = { + override def codeForInlining(fname: String, functionsThatCanBeCalledFromInlinedFunctions: Set[String], code: List[ZLine]): Option[List[ZLine]] = { if (code.isEmpty) return None code.last match { case ZLine(RET, NoRegisters, _, _) => @@ -35,7 +35,7 @@ object Z80InliningCalculator extends AbstractInliningCalculator[ZLine] { case ZLine(CALL, _, NumericConstant(_, _), _) => false case ZLine(JP, OneRegister(_), _, _) => false case ZLine(CALL, _, MemoryAddressConstant(th: NormalFunction), _) => - !functionsAlreadyKnownToBeNonInlineable(th.name) + !functionsThatCanBeCalledFromInlinedFunctions(th.name) case ZLine(op, _, _, _) if jumpingRelatedOpcodes(op) || badOpcodes(op) => true case _ => false }) return None diff --git a/src/test/scala/millfork/test/InliningSuite.scala b/src/test/scala/millfork/test/InliningSuite.scala new file mode 100644 index 00000000..cb4a65d0 --- /dev/null +++ b/src/test/scala/millfork/test/InliningSuite.scala @@ -0,0 +1,47 @@ +package millfork.test + +import millfork.Cpu +import millfork.test.emu.{EmuCrossPlatformBenchmarkRun, EmuOptimizedInlinedRun, EmuSizeOptimizedCrossPlatformRun} +import org.scalatest.{FunSuite, Matchers} + +/** + * @author Karol Stasiak + */ +class InliningSuite extends FunSuite with Matchers { + + test("Should inline square") { + EmuSizeOptimizedCrossPlatformRun(Cpu.Mos, Cpu.Z80)( + """ + | import zp_reg + | byte output @$c000 + | inline byte square(byte x) { + | return x * x + | } + | void main () { + | output = square(6) + | } + """.stripMargin)(_.readByte(0xc000) should equal(36)) + } + + test("Should inline <<") { + EmuSizeOptimizedCrossPlatformRun(Cpu.Mos, Cpu.Z80)( + """ + | byte output @$c000 + | word output2 @$c006 + | inline byte thing(byte x) { + | return x << x + | } + | inline word sh(word x, byte y) { + | return x << y + | } + | void main () { + | output = thing(6) + | output2 = sh(84, 4) + | } + """.stripMargin) { m => + m.readByte(0xc000) should equal(6.<<(6).&(0xff)) + m.readWord(0xc006) should equal(84.<<(4).&(0xffff)) + } + + } +} \ No newline at end of file