From 8b086f6c2f8a041db35b501a74d44ceea8ea3f2b Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Sat, 6 Jan 2018 00:21:28 +0100 Subject: [PATCH] More optimisations --- .../scala/millfork/OptimizationPresets.scala | 2 + .../opt/AlwaysGoodOptimizations.scala | 34 +++++++++ .../assembly/opt/LaterOptimizations.scala | 45 ++++++++++-- src/main/scala/millfork/env/Constant.scala | 6 +- .../SecondAssemblyOptimizationSuite.scala | 70 +++++++++++++++++++ 5 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/test/scala/millfork/test/SecondAssemblyOptimizationSuite.scala diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index daf0d387..6983429d 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -108,6 +108,7 @@ object OptimizationPresets { LaterOptimizations.UseYInsteadOfStack, LaterOptimizations.IndexSwitchingOptimization, LaterOptimizations.LoadingBranchesOptimization, + LaterOptimizations.IncreaseWithLimit, ) val Good: List[AssemblyOptimization] = List[AssemblyOptimization]( @@ -125,6 +126,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.IndexSequenceOptimization, AlwaysGoodOptimizations.MathOperationOnTwoIdenticalMemoryOperands, AlwaysGoodOptimizations.ModificationOfJustWrittenValue, + AlwaysGoodOptimizations.OperationsAroundShifting, AlwaysGoodOptimizations.PoinlessFlagChange, AlwaysGoodOptimizations.PointlessLoadAfterLoadOrStore, AlwaysGoodOptimizations.PoinlessLoadBeforeAnotherLoad, diff --git a/src/main/scala/millfork/assembly/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/opt/AlwaysGoodOptimizations.scala index 66f62f86..34095b56 100644 --- a/src/main/scala/millfork/assembly/opt/AlwaysGoodOptimizations.scala +++ b/src/main/scala/millfork/assembly/opt/AlwaysGoodOptimizations.scala @@ -14,6 +14,7 @@ import millfork.env._ * * @author Karol Stasiak */ +//noinspection ZeroIndexToHead object AlwaysGoodOptimizations { val counter = new AtomicInteger(30000) @@ -851,4 +852,37 @@ object AlwaysGoodOptimizations { (Elidable & HasOpcode(LABEL) & HasCallerCount(0)) ~~> (_ => Nil) ) + val OperationsAroundShifting = new RuleBasedAssemblyOptimization("Operations around shifting", + needsFlowInfo = FlowInfoRequirement.BothFlows, + (Elidable & HasOpcode(CLC)).? ~ + (Elidable & HasOpcode(ADC) & HasClear(State.C) & HasClear(State.D) & MatchImmediate(1)) ~ + HasOpcode(ASL).+.capture(2) ~ + (Elidable & HasOpcode(CLC)) ~ + (Elidable & HasOpcode(ADC) & HasClear(State.D) & MatchImmediate(3) & DoesntMatterWhatItDoesWith(State.C, State.Z, State.N)) ~~> {(code, ctx) => + val shifts = ctx.get[List[AssemblyLine]](2) + val const = ctx.get[Constant](1).asl(shifts.length) + ctx.get[Constant](3) + shifts ++ List(AssemblyLine.implied(CLC), AssemblyLine.immediate(ADC, const)) + }, + (Elidable & HasOpcode(AND) & MatchImmediate(1)) ~ + HasOpcode(ASL).+.capture(2) ~ + (Elidable & HasOpcode(AND) & MatchImmediate(3) & DoesntMatterWhatItDoesWith(State.C, State.Z, State.N)) ~~> {(code, ctx) => + val shifts = ctx.get[List[AssemblyLine]](2) + val const = CompoundConstant(MathOperator.And, ctx.get[Constant](1).asl(shifts.length), ctx.get[Constant](3)).quickSimplify + shifts :+ AssemblyLine.immediate(AND, const) + }, + (Elidable & HasOpcode(EOR) & MatchImmediate(1)) ~ + HasOpcode(ASL).+.capture(2) ~ + (Elidable & HasOpcode(EOR) & MatchImmediate(3) & DoesntMatterWhatItDoesWith(State.C, State.Z, State.N)) ~~> {(code, ctx) => + val shifts = ctx.get[List[AssemblyLine]](2) + val const = CompoundConstant(MathOperator.Exor, ctx.get[Constant](1).asl(shifts.length), ctx.get[Constant](3)).quickSimplify + shifts :+ AssemblyLine.immediate(EOR, const) + }, + (Elidable & HasOpcode(ORA) & MatchImmediate(1)) ~ + HasOpcode(ASL).+.capture(2) ~ + (Elidable & HasOpcode(ORA) & MatchImmediate(3) & DoesntMatterWhatItDoesWith(State.C, State.Z, State.N)) ~~> {(code, ctx) => + val shifts = ctx.get[List[AssemblyLine]](2) + val const = CompoundConstant(MathOperator.Or, ctx.get[Constant](1).asl(shifts.length), ctx.get[Constant](3)).quickSimplify + shifts :+ AssemblyLine.immediate(ORA, const) + }, + ) } diff --git a/src/main/scala/millfork/assembly/opt/LaterOptimizations.scala b/src/main/scala/millfork/assembly/opt/LaterOptimizations.scala index baab0892..bfd70ad1 100644 --- a/src/main/scala/millfork/assembly/opt/LaterOptimizations.scala +++ b/src/main/scala/millfork/assembly/opt/LaterOptimizations.scala @@ -11,6 +11,7 @@ import millfork.env.{Constant, NormalFunction, NumericConstant} * * @author Karol Stasiak */ +//noinspection ZeroIndexToHead object LaterOptimizations { @@ -275,7 +276,7 @@ object LaterOptimizations { val LoadingBranchesOptimization = new RuleBasedAssemblyOptimization("Loading branches optimization", needsFlowInfo = FlowInfoRequirement.BackwardFlow, (Elidable & HasOpcode(LDA) & HasAddrModeIn(LdxAddrModes) & DoesntMatterWhatItDoesWith(State.X)) ~ - (Linear & Not(ConcernsX) & Not(ChangesA) & Not(HasOpcode(CMP)) & (Not(ReadsA) | Elidable & HasOpcode(STA) & HasAddrModeIn(StxAddrModes)) ).*.capture(39) ~ + (Linear & Not(ConcernsX) & Not(ChangesA) & Not(HasOpcode(CMP)) & (Not(ReadsA) | Elidable & HasOpcode(STA) & HasAddrModeIn(StxAddrModes))).*.capture(39) ~ (Elidable & HasOpcode(CMP) & HasAddrModeIn(CpxyAddrModes)).?.capture(40) ~ (Elidable & HasOpcodeIn(OpcodeClasses.ShortConditionalBranching) & MatchParameter(22)).capture(41) ~ (Elidable & HasOpcode(LDA)).capture(31) ~ @@ -284,7 +285,7 @@ object LaterOptimizations { (Elidable & HasOpcode(LDA)).capture(32) ~ (Elidable & HasOpcode(LABEL) & MatchParameter(21) & HasCallerCount(1) & DoesntMatterWhatItDoesWith(State.A, State.X, State.N, State.Z)) ~~> { (code, ctx) => val ldx = List(code.head.copy(opcode = LDX)) - val stx = ctx.get[List[AssemblyLine]](39).map(l => if (l.opcode == STA) l.copy(opcode = STX) else l ) + val stx = ctx.get[List[AssemblyLine]](39).map(l => if (l.opcode == STA) l.copy(opcode = STX) else l) val cpx = ctx.get[List[AssemblyLine]](40).map(_.copy(opcode = CPX)) val branch = ctx.get[List[AssemblyLine]](41) val label = ctx.get[List[AssemblyLine]](42) @@ -293,7 +294,7 @@ object LaterOptimizations { List(loadIfJumped, ldx, stx, cpx, branch, loadIfNotJumped, label).flatten }, (Elidable & HasOpcode(LDA) & HasAddrModeIn(LdyAddrModes) & DoesntMatterWhatItDoesWith(State.Y)) ~ - (Linear & Not(ConcernsY) & Not(ChangesA) & Not(HasOpcode(CMP)) & (Not(ReadsA) | Elidable & HasOpcode(STA) & HasAddrModeIn(StyAddrModes)) ).*.capture(39) ~ + (Linear & Not(ConcernsY) & Not(ChangesA) & Not(HasOpcode(CMP)) & (Not(ReadsA) | Elidable & HasOpcode(STA) & HasAddrModeIn(StyAddrModes))).*.capture(39) ~ (Elidable & HasOpcode(CMP) & HasAddrModeIn(CpxyAddrModes)).?.capture(40) ~ (Elidable & HasOpcodeIn(OpcodeClasses.ShortConditionalBranching) & MatchParameter(22)).capture(41) ~ (Elidable & HasOpcode(LDA)).capture(31) ~ @@ -302,7 +303,7 @@ object LaterOptimizations { (Elidable & HasOpcode(LDA)).capture(32) ~ (Elidable & HasOpcode(LABEL) & MatchParameter(21) & HasCallerCount(1) & DoesntMatterWhatItDoesWith(State.A, State.Y, State.N, State.Z)) ~~> { (code, ctx) => val ldy = List(code.head.copy(opcode = LDY)) - val sty = ctx.get[List[AssemblyLine]](39).map(l => if (l.opcode == STA) l.copy(opcode = STY) else l ) + val sty = ctx.get[List[AssemblyLine]](39).map(l => if (l.opcode == STA) l.copy(opcode = STY) else l) val cpy = ctx.get[List[AssemblyLine]](40).map(_.copy(opcode = CPY)) val branch = ctx.get[List[AssemblyLine]](41) val label = ctx.get[List[AssemblyLine]](42) @@ -348,6 +349,42 @@ object LaterOptimizations { }, ) + val IncreaseWithLimit = new RuleBasedAssemblyOptimization("Increase with a limit", + needsFlowInfo = FlowInfoRequirement.BackwardFlow, + (Elidable & HasOpcode(INC) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0)) ~ + (Elidable & HasOpcode(LDA) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0)) ~ + (Elidable & HasOpcode(CMP) & HasAddrModeIn(CpxyAddrModes)) ~ + (HasOpcode(BNE) & MatchParameter(14831)) ~ + (Elidable & HasOpcode(LDA) & HasAddrModeIn(LdyAddrModes)) ~ + (Elidable & HasOpcode(STA) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0) & DoesntMatterWhatItDoesWith(State.A, State.Y)) ~ + (HasOpcode(LABEL) & MatchParameter(14831)) ~~> { code => + List( + code(1).copy(opcode = LDY), + AssemblyLine.implied(INY), + code(2).copy(opcode = CPY), + code(3), + code(4).copy(opcode = LDY), + code(6), + code(5).copy(opcode = STY)) + }, + (Elidable & HasOpcode(INC) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0)) ~ + (Elidable & HasOpcode(LDA) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0)) ~ + (Elidable & HasOpcode(CMP) & HasAddrModeIn(CpxyAddrModes)) ~ + (HasOpcode(BNE) & MatchParameter(14831)) ~ + (Elidable & HasOpcode(LDA) & HasAddrModeIn(LdxAddrModes)) ~ + (Elidable & HasOpcode(STA) & HasAddrModeIn(Set(Absolute, ZeroPage)) & MatchParameter(0) & DoesntMatterWhatItDoesWith(State.A, State.X)) ~ + (HasOpcode(LABEL) & MatchParameter(14831)) ~~> { code => + List( + code(1).copy(opcode = LDX), + AssemblyLine.implied(INX), + code(2).copy(opcode = CPX), + code(3), + code(4).copy(opcode = LDX), + code(6), + code(5).copy(opcode = STX)) + }, + ) + val All = List( DoubleLoadToDifferentRegisters, DoubleLoadToTheSameRegister, diff --git a/src/main/scala/millfork/env/Constant.scala b/src/main/scala/millfork/env/Constant.scala index fddaf26d..1faac691 100644 --- a/src/main/scala/millfork/env/Constant.scala +++ b/src/main/scala/millfork/env/Constant.scala @@ -77,7 +77,11 @@ case class NumericConstant(value: Long, requiredSize: Int) extends Constant { override def isLowestByteAlwaysEqual(i: Int) : Boolean = (value & 0xff) == (i&0xff) - override def asl(i: Int) = NumericConstant(value << i, requiredSize + i / 8) + override def asl(i: Int): Constant = { + val newSize = requiredSize + i / 8 + val mask = (1 << (8 * newSize)) - 1 + NumericConstant((value << i) & mask, newSize) + } override def +(that: Constant): Constant = that + value diff --git a/src/test/scala/millfork/test/SecondAssemblyOptimizationSuite.scala b/src/test/scala/millfork/test/SecondAssemblyOptimizationSuite.scala new file mode 100644 index 00000000..f0caeede --- /dev/null +++ b/src/test/scala/millfork/test/SecondAssemblyOptimizationSuite.scala @@ -0,0 +1,70 @@ +package millfork.test + +import millfork.assembly.opt.{AlwaysGoodOptimizations, LaterOptimizations, VariableToRegisterOptimization} +import millfork.test.emu.{EmuBenchmarkRun, EmuRun, EmuUltraBenchmarkRun} +import millfork.{Cpu, OptimizationPresets} +import org.scalatest.{FunSuite, Matchers} + +/** + * @author Karol Stasiak + */ +class SecondAssemblyOptimizationSuite extends FunSuite with Matchers { + + test("Add-shift-add") { + EmuBenchmarkRun( + """ + | byte output @$c000 + | void main () { + | byte a + | a = two() + | output = ((a + 3) << 2) + 9 + | } + | byte two() { return 2 } + """.stripMargin) { m => m.readByte(0xc000) should equal(29) } + } + + test("And-shift-and") { + EmuBenchmarkRun( + """ + | byte output @$c000 + | void main () { + | byte a + | a = ee() + | output = ((a & $dd) << 1) & $55 + | } + | byte ee() { return $ee } + """.stripMargin) { m => m.readByte(0xc000) should equal(0x10) } + } + + test("Add with limit") { + EmuBenchmarkRun( + """ + | byte output @$c000 + | const byte start = 5 + | const byte limit = 234 + | void main () { + | output += 1 + | if output == limit { + | output = start + | } + | } + """.stripMargin) { m => m.readByte(0xc000) should equal(1) } + } + + test("User register instead of stack") { + EmuBenchmarkRun( + """ + | array output [4] @$c000 + | void main () { + | output[0] = double(2) + | } + | asm byte double(byte a) { + | ? asl + | ? pha + | lda output + | ? pla + | ? rts + | } + """.stripMargin) { m => m.readByte(0xc000) should equal(4) } + } +}