From c8492173ee12a72d4c73319fd89e8756298d94f1 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Mon, 27 Apr 2020 12:02:45 +0200 Subject: [PATCH] 6502: Improve index register optimizations --- .../scala/millfork/OptimizationPresets.scala | 4 + .../mos/opt/AlwaysGoodOptimizations.scala | 38 ++++++++ ...ariablesToIndexRegistersOptimization.scala | 25 +++++- .../opt/VariableToRegisterOptimization.scala | 87 ++++++++++++++++--- 4 files changed, 140 insertions(+), 14 deletions(-) diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index c68cc936..9d66b33c 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -61,6 +61,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.PointlessMath, AlwaysGoodOptimizations.PointlessOperationFromFlow, AlwaysGoodOptimizations.SimplifiableComparison, + AlwaysGoodOptimizations.ReuseIndex, VariableToRegisterOptimization, TwoVariablesToIndexRegistersOptimization, ChangeIndexRegisterOptimizationPreferringX2Y, @@ -68,6 +69,7 @@ object OptimizationPresets { TwoVariablesToIndexRegistersOptimization, ChangeIndexRegisterOptimizationPreferringY2X, VariableToRegisterOptimization, + AlwaysGoodOptimizations.ReuseIndex, TwoVariablesToIndexRegistersOptimization, AlwaysGoodOptimizations.ConstantFlowAnalysis, LaterOptimizations.DoubleLoadToDifferentRegisters, @@ -244,6 +246,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.RearrangeMath, AlwaysGoodOptimizations.RemoveNops, AlwaysGoodOptimizations.ReplacingArithmeticsWithBitOps, + AlwaysGoodOptimizations.ReuseIndex, AlwaysGoodOptimizations.ReverseFlowAnalysis, AlwaysGoodOptimizations.ShiftingJustWrittenValue, AlwaysGoodOptimizations.SimplifiableBitOpsSequence, @@ -280,6 +283,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.ReverseFlowAnalysis, AlwaysGoodOptimizations.SimplifiableCondition, LaterOptimizations.DontUseIndexRegisters, + AlwaysGoodOptimizations.ReuseIndex, VariableToRegisterOptimization, TwoVariablesToIndexRegistersOptimization, AlwaysGoodOptimizations.PointlessLoadAfterLoadOrStore, diff --git a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala index 120f11e8..c1762d8e 100644 --- a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala @@ -3186,4 +3186,42 @@ object AlwaysGoodOptimizations { }).flatten) ) + + lazy val ReuseIndex = new RuleBasedAssemblyOptimization("Reuse right index register", + needsFlowInfo = FlowInfoRequirement.NoRequirement, + + (Elidable & HasOpcode(LDX) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage)) ~ + (Elidable & Linear & Not(ConcernsY) & + (Not(ConcernsX) | HasOpcodeIn(LDA, STA, LDA_W, STA_W, ADC, ADC_W, SBC, SBC_W, ORA, ORA_W, AND, AND_W, EOR, EOR_W, CMP, CMP_W) & HasAddrMode(AbsoluteX))).*.capture(1) ~ + ((Elidable & HasOpcode(LDY) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage))).capture(2) ~~> { (code, ctx) => + code.head.copy(opcode = LDY) :: (ctx.get[List[AssemblyLine]](1).map(l => if (l.addrMode == AbsoluteX) l.copy(addrMode = AbsoluteY) else l) ++ ctx.get[List[AssemblyLine]](2)) + }, + + (Elidable & HasOpcode(LDY) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage)) ~ + (Elidable & Linear & Not(ConcernsX) & + (Not(ConcernsY) | HasOpcodeIn(LDA, STA, LDA_W, STA_W, ADC, ADC_W, SBC, SBC_W, ORA, ORA_W, AND, AND_W, EOR, EOR_W, CMP, CMP_W) & HasAddrMode(AbsoluteY))).*.capture(1) ~ + ((Elidable & HasOpcode(LDX) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage))).capture(2) ~~> { (code, ctx) => + code.head.copy(opcode = LDX) :: (ctx.get[List[AssemblyLine]](1).map(l => if (l.addrMode == AbsoluteY) l.copy(addrMode = AbsoluteX) else l) ++ ctx.get[List[AssemblyLine]](2)) + }, + + (Elidable & HasOpcode(LDX) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage)) ~ + (Elidable & Linear & Not(ConcernsY) & + (Not(ConcernsX) | HasOpcodeIn(LDA, STA, LDA_W, STA_W, ADC, ADC_W, SBC, SBC_W, ORA, ORA_W, AND, AND_W, EOR, EOR_W, CMP, CMP_W) & HasAddrMode(AbsoluteX))).*.capture(1) ~ + ((HasOpcode(LDX) & Not(MatchParameter(0))) ~ + (Linear & Not(ConcernsY)).* ~ + (Elidable & HasOpcode(LDY) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage))).capture(2) ~~> { (code, ctx) => + code.head.copy(opcode = LDY) :: (ctx.get[List[AssemblyLine]](1).map(l => if (l.addrMode == AbsoluteX) l.copy(addrMode = AbsoluteY) else l) ++ ctx.get[List[AssemblyLine]](2)) + }, + + (Elidable & HasOpcode(LDY) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage)) ~ + (Elidable & Linear & Not(ConcernsX) & + (Not(ConcernsY) | HasOpcodeIn(LDA, STA, LDA_W, STA_W, ADC, ADC_W, SBC, SBC_W, ORA, ORA_W, AND, AND_W, EOR, EOR_W, CMP, CMP_W) & HasAddrMode(AbsoluteY))).*.capture(1) ~ + ((HasOpcode(LDY) & Not(MatchParameter(0))) ~ + (Linear & Not(ConcernsY)).* ~ + (Elidable & HasOpcode(LDX) & MatchParameter(0) & HasAddrModeIn(Absolute, ZeroPage))).capture(2) ~~> { (code, ctx) => + code.head.copy(opcode = LDX) :: (ctx.get[List[AssemblyLine]](1).map(l => if (l.addrMode == AbsoluteY) l.copy(addrMode = AbsoluteX) else l) ++ ctx.get[List[AssemblyLine]](2)) + }, + + + ) } diff --git a/src/main/scala/millfork/assembly/mos/opt/TwoVariablesToIndexRegistersOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/TwoVariablesToIndexRegistersOptimization.scala index 17da6712..ad78dee2 100644 --- a/src/main/scala/millfork/assembly/mos/opt/TwoVariablesToIndexRegistersOptimization.scala +++ b/src/main/scala/millfork/assembly/mos/opt/TwoVariablesToIndexRegistersOptimization.scala @@ -74,8 +74,13 @@ object TwoVariablesToIndexRegistersOptimization extends AssemblyOptimization[Ass case AssemblyLine0(_, _, MemoryAddressConstant(th)) => Some(th.name) case _ => None }.toSet + if (stillUsedVariables.isEmpty) { + return code + } + val variablesWithAddressesTaken = code.flatMap { case AssemblyLine0(_, _, SubbyteConstant(MemoryAddressConstant(th), _)) => Some(th.name) + case AssemblyLine0(_, WordImmediate, MemoryAddressConstant(th)) => Some(th.name) case _ => None }.toSet val localVariables = f.environment.getAllLocalVariables.filter { @@ -83,6 +88,10 @@ object TwoVariablesToIndexRegistersOptimization extends AssemblyOptimization[Ass typ.size == 1 && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) && !v.isVolatile case _ => false } + if (localVariables.isEmpty) { + return code + } + val variablesWithRegisterHint = f.environment.getAllLocalVariables.filter { case v@MemoryVariable(name, typ, VariableAllocationMethod.Register) => typ.size == 1 && !paramVariables(name) && stillUsedVariables(name) && !variablesWithAddressesTaken(name) && !v.isVolatile @@ -318,32 +327,40 @@ object TwoVariablesToIndexRegistersOptimization extends AssemblyOptimization[Ass if th.name == vx => if (imp.z == Unimportant && imp.n == Unimportant) { tailcall(inlineVars(vx, vy, vx, loadedY, xs)) - } else { + } else if (imp.c == Unimportant) { tailcall(inlineVars(vx, vy, vx, loadedY, xs)).map(AssemblyLine.immediate(CPX, 0).pos(s) :: _) + } else { + tailcall(inlineVars(vx, vy, vx, loadedY, xs)).map(AssemblyLine.implied(INX).pos(s) :: AssemblyLine.implied(DEX).pos(s) :: _) } case (AssemblyLine(LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), imp) :: xs if th.name == vx => if (imp.z == Unimportant && imp.n == Unimportant) { tailcall(inlineVars(vx, vy, loadedX, vx, xs)) - } else { + } else if (imp.c == Unimportant) { tailcall(inlineVars(vx, vy, loadedX, vx, xs)).map(AssemblyLine.immediate(CPX, 0).pos(s) :: _) + } else { + tailcall(inlineVars(vx, vy, loadedX, vy, xs)).map(AssemblyLine.implied(INX).pos(s) :: AssemblyLine.implied(DEX).pos(s) :: _) } case (AssemblyLine(LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), imp) :: xs if th.name == vy => if (imp.z == Unimportant && imp.n == Unimportant) { inlineVars(vx, vy, loadedX, vy, xs) - } else { + } else if (imp.c == Unimportant) { tailcall(inlineVars(vx, vy, loadedX, vy, xs)).map(AssemblyLine.immediate(CPY, 0).pos(s) :: _) + } else { + tailcall(inlineVars(vx, vy, loadedX, vy, xs)).map(AssemblyLine.implied(INY).pos(s) :: AssemblyLine.implied(DEY).pos(s) :: _) } case (AssemblyLine(LDX, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), imp) :: xs if th.name == vy => if (imp.z == Unimportant && imp.n == Unimportant) { inlineVars(vx, vy, vy, loadedY, xs) - } else { + } else if (imp.c == Unimportant) { tailcall(inlineVars(vx, vy, vy, loadedY, xs)).map(AssemblyLine.immediate(CPY, 0).pos(s) :: _) + } else { + tailcall(inlineVars(vx, vy, vy, loadedY, xs)).map(AssemblyLine.implied(INY).pos(s) :: AssemblyLine.implied(DEY).pos(s) :: _) } case (x@AssemblyLine(LDY, _, _, _, _), imp) :: xs => diff --git a/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala index dbb71067..c71b9a38 100644 --- a/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala +++ b/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala @@ -417,13 +417,33 @@ object VariableToRegisterOptimization extends AssemblyOptimization[AssemblyLine] case (AssemblyLine0(SEP | REP, _, _), _) :: xs => None - case (AssemblyLine0(STY | LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vx => - if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) - else None + case (AssemblyLine0(STY | LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vx && (features.indexRegisterTransfers) => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) - case (AssemblyLine0(STX | LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vy => - if (features.indexRegisterTransfers) canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) - else None + case (AssemblyLine0(STX | LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vy && (features.indexRegisterTransfers) => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + + case (AssemblyLine0(LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (AssemblyLine0(LDA | STA | ADC | SBC | ORA | EOR | AND | CMP, AbsoluteY, _), f) :: xs if th.name == vx && f.y == Unimportant => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + + case (AssemblyLine0(LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (AssemblyLine0(LDA | STA | ADC | SBC | ORA | EOR | AND | CMP, AbsoluteX, _), f) :: xs if th.name == vy && f.x == Unimportant => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + + case (AssemblyLine0(LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (AssemblyLine0(SEC | CLC, _, _), _) :: + (AssemblyLine0(LDA | STA | ADC | SBC | ORA | EOR | AND | CMP, AbsoluteY, _), f) :: xs if th.name == vx && f.y == Unimportant => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + + case (AssemblyLine0(LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (AssemblyLine0(SEC | CLC, _, _), _) :: + (AssemblyLine0(LDA | STA | ADC | SBC | ORA | EOR | AND | CMP, AbsoluteX, _), f) :: xs if th.name == vy && f.x == Unimportant => + canBeInlined(xCandidate, yCandidate, zCandidate, features, xs).map(_ + CyclesAndBytes(bytes = 2, cycles = 2)) + + case (AssemblyLine0(STY | LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vx => None + + case (AssemblyLine0(STX | LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: xs if th.name == vy => None case (AssemblyLine(op, Absolute | ZeroPage, MemoryAddressConstant(th), elidability, _), _) :: xs if opcodesIdentityTable(op) && features.blastProcessing => @@ -1016,13 +1036,54 @@ object VariableToRegisterOptimization extends AssemblyOptimization[AssemblyLine] tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TYA).pos(s) :: _) case (AssemblyLine(LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs - if th.name == vx => + if th.name == vx && features.indexRegisterTransfers => tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TXY).pos(s) :: _) case (AssemblyLine(LDX, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs - if th.name == vy => + if th.name == vy && features.indexRegisterTransfers => tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TYX).pos(s) :: _) + case (l0@AssemblyLine0(LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (l1@AssemblyLine0(LDA | STA | ADC | SBC | AND | ORA | EOR | CMP, AbsoluteY, _), f):: xs + if th.name == vx => + if (l1.opcode != STA || f.n == Unimportant && f.z == Unimportant) { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(l1.copy(addrMode = AbsoluteX) :: _) + } else if (f.c == Unimportant) { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.immediate(CPX, 0).pos(l0.source) :: l1.copy(addrMode = AbsoluteX) :: _) + } else { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(INX).pos(l0.source) :: AssemblyLine.implied(DEX).pos(l0.source) :: l1.copy(addrMode = AbsoluteX) :: _) + } + + + case (l0@AssemblyLine0(LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (l1@AssemblyLine0(LDA | STA | ADC | SBC | AND | ORA | EOR | CMP, AbsoluteX, _), f):: xs + if th.name == vy => + if (l1.opcode != STA || f.n == Unimportant && f.z == Unimportant) { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(l1.copy(addrMode = AbsoluteY) :: _) + } else if (f.c == Unimportant) { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.immediate(CPY, 0).pos(l0.source) :: l1.copy(addrMode = AbsoluteY) :: _) + } else { + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(INY).pos(l0.source) :: AssemblyLine.implied(DEY).pos(l0.source) :: l1.copy(addrMode = AbsoluteY) :: _) + } + + case (l0@AssemblyLine0(LDY, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (l5@AssemblyLine0(SEC | CLC, _, _), _) :: + (l1@AssemblyLine0(LDA | STA | ADC | SBC | AND | ORA | EOR | CMP, AbsoluteY, _), _):: xs + if th.name == vx => + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(l5 :: l1.copy(addrMode = AbsoluteX) :: _) + + case (l0@AssemblyLine0(LDX, Absolute | ZeroPage, MemoryAddressConstant(th)), _) :: + (l5@AssemblyLine0(SEC | CLC, _, _), _) :: + (l1@AssemblyLine0(LDA | STA | ADC | SBC | AND | ORA | EOR | CMP, AbsoluteX, _), _):: xs + if th.name == vy => + tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(l5 :: l1.copy(addrMode = AbsoluteY) :: _) + + case (AssemblyLine(LDY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs + if th.name == vx => features.log.fatal("Unexpected LDY") + + case (AssemblyLine(LDX, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs + if th.name == vy => features.log.fatal("Unexpected LDX") + case (AssemblyLine(LDA, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs if th.name == vz => tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TZA).pos(s) :: _) @@ -1071,13 +1132,19 @@ object VariableToRegisterOptimization extends AssemblyOptimization[AssemblyLine] tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TYA).pos(s) :: _) case (AssemblyLine(STX, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs - if th.name == vy => + if th.name == vy && features.indexRegisterTransfers => tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TXY).pos(s) :: _) case (AssemblyLine(STY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs - if th.name == vx => + if th.name == vx && features.indexRegisterTransfers => tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.implied(TYX).pos(s) :: _) + case (AssemblyLine(STX, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs + if th.name == vy => features.log.fatal("Unexpected STX") + + case (AssemblyLine(STY, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs + if th.name == vx => features.log.fatal("Unexpected STY") + case (AssemblyLine(STZ, Absolute | ZeroPage, MemoryAddressConstant(th), _, s), _) :: xs if th.name == vx => if (features.izIsAlwaysZero) tailcall(inlineVars(xCandidate, yCandidate, zCandidate, aCandidate, features, xs)).map(AssemblyLine.immediate(LDX, 0).pos(s) :: _)