diff --git a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala index 1c820c08..be87a1c0 100644 --- a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala @@ -8,6 +8,7 @@ import millfork.assembly.mos.OpcodeClasses._ import millfork.assembly.mos.{AddrMode, opt, _} import millfork.assembly.mos.AddrMode._ import millfork.env._ +import millfork.error.FatalErrorReporting /** * These optimizations should not remove opportunities for more complex optimizations to trigger. @@ -1598,18 +1599,18 @@ object AlwaysGoodOptimizations { val originalStart = if (shiftBeforeStore) { (Elidable & HasOpcode(ASL) & HasAddrMode(Implied)) ~ - (Elidable & HasOpcode(STA) & HasAddrMode(Absolute) & MatchParameter(0)) ~ + (Elidable & HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(0)) ~ (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ (Elidable & HasOpcode(ROL) & HasAddrMode(Implied)) ~ - (Elidable & HasOpcode(STA) & HasAddrMode(Absolute) & MatchParameter(1)) + (Elidable & HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(1)) } else { - (Elidable & HasOpcode(STA) & HasAddrMode(Absolute) & MatchParameter(0)) ~ + (Elidable & HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(0)) ~ (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ - (Elidable & HasOpcode(STA) & HasAddrMode(Absolute) & MatchParameter(1)) + (Elidable & HasOpcode(STA) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(1)) } val shifting = (0 until shiftAmountAfterStore).map(_ => - (Elidable & HasOpcode(ASL) & HasAddrMode(Absolute) & MatchParameter(0)) ~ - (Elidable & HasOpcode(ROL) & HasAddrMode(Absolute) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.A)) + (Elidable & HasOpcode(ASL) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(0)) ~ + (Elidable & HasOpcode(ROL) & HasAddrModeIn(Absolute, ZeroPage) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.A)) ).reduce(_ ~ _) val rightShiftCount = 8 - (if (shiftBeforeStore) shiftAmountAfterStore + 1 else shiftAmountAfterStore) @@ -2053,6 +2054,24 @@ object AlwaysGoodOptimizations { (Elidable & HasOpcodeIn(ORA, EOR) & HasImmediate(1)) ~~> { code => List(AssemblyLine.implied(SEC), code.head.copy(opcode = ROL)) }, + + (Elidable & HasOpcode(BIT) & DoesntMatterWhatItDoesWith(State.V, State.C, State.Z) & HasAddrModeIn(Absolute, ZeroPage) & MatchAddrMode(2) & MatchParameter(3)) ~ + DebugMatching ~ + (Elidable & HasOpcodeIn(BPL, BMI) & MatchParameter(1)) ~ + DebugMatching ~ + (Linear & DoesNotConcernMemoryAt(2, 3)).* ~ + (Elidable & HasOpcode(LABEL) & MatchParameter(1) & HasCallerCount(1)) ~ + (Elidable & HasOpcode(ASL) & MatchAddrMode(2) & MatchParameter(3)) ~~> { code => + List(code.last, remapN2C(code(1))) ++ (code.drop(2).init) + }, + + (Elidable & HasOpcode(LDA) & DoesntMatterWhatItDoesWith(State.A, State.C, State.Z) & HasAddrModeIn(Absolute, ZeroPage) & MatchAddrMode(2) & MatchParameter(3)) ~ + (Elidable & HasOpcodeIn(BPL, BMI) & MatchParameter(1)) ~ + (Linear & DoesNotConcernMemoryAt(2, 3) & Not(ConcernsA)).* ~ + (Elidable & HasOpcode(LABEL) & MatchParameter(1) & HasCallerCount(1)) ~ + (Elidable & HasOpcode(ASL) & MatchAddrMode(2) & MatchParameter(3)) ~~> { code => + List(code.last, remapN2C(code(1))) ++ (code.drop(2) :+ code.init.last) + }, ) private def blockIsIdempotentWhenItComesToIndexRegisters(i: Int) = Where(ctx => { @@ -2233,18 +2252,39 @@ object AlwaysGoodOptimizations { }, ) - private def remapZ2N(line: AssemblyLine) = line.opcode match { + private def remapZ2N(line: AssemblyLine): AssemblyLine = line.opcode match { case BNE => line.copy(opcode = BMI) case BEQ => line.copy(opcode = BPL) + case _ => FatalErrorReporting.reportFlyingPig(s"Tried to treat ${line.opcode} as a branch on Z") } - private def remapZ2V(line: AssemblyLine) = line.opcode match { + + private def remapC2N(line: AssemblyLine): AssemblyLine = line.opcode match { + case BCS => line.copy(opcode = BMI) + case BCC => line.copy(opcode = BPL) + case _ => FatalErrorReporting.reportFlyingPig(s"Tried to treat ${line.opcode} as a branch on C") + } + + private def remapN2C(line: AssemblyLine): AssemblyLine = line.opcode match { + case BMI => line.copy(opcode = BCS) + case BPL => line.copy(opcode = BCC) + case _ => FatalErrorReporting.reportFlyingPig(s"Tried to treat ${line.opcode} as a branch on N") + } + + private def remapZ2CInverse(line: AssemblyLine): AssemblyLine = line.opcode match { + case BNE => line.copy(opcode = BCS) + case BEQ => line.copy(opcode = BCC) + case _ => FatalErrorReporting.reportFlyingPig(s"Tried to treat ${line.opcode} as a branch on C") + } + + private def remapZ2V(line: AssemblyLine): AssemblyLine = line.opcode match { case BNE => line.copy(opcode = BVS) case BEQ => line.copy(opcode = BVC) + case _ => FatalErrorReporting.reportFlyingPig(s"Tried to treat ${line.opcode} as a branch on Z") } val SimplifiableCondition = new RuleBasedAssemblyOptimization("Simplifiable condition", needsFlowInfo = FlowInfoRequirement.BackwardFlow, - HasOpcode(LDA) ~ + HasOpcodeIn(LDA, TXA, TYA, TZA, ADC, SBC, AND, ORA, EOR) ~ (Elidable & HasOpcode(AND) & HasImmediate(0x80)) ~ (Elidable & HasOpcodeIn(BNE, BEQ) & DoesntMatterWhatItDoesWith(State.A, State.N, State.Z)) ~~> {code => List(code(0), remapZ2N(code(2))) @@ -2254,6 +2294,16 @@ object AlwaysGoodOptimizations { (Elidable & HasOpcodeIn(BNE, BEQ) & DoesntMatterWhatItDoesWith(State.A, State.N, State.Z)) ~~> {code => List(code(1).copy(opcode = LDA), remapZ2N(code(2))) }, + HasOpcodeIn(LDA, TXA, TYA, TZA, ADC, SBC, AND, ORA, EOR) ~ + (Elidable & HasOpcode(ASL)) ~ + (Elidable & HasOpcodeIn(BCS, BCC) & DoesntMatterWhatItDoesWith(State.A, State.N, State.C, State.Z)) ~~> {code => + List(code(0), remapC2N(code(2))) + }, + (HasOpcodeIn(LDA, AND) & HasImmediate(0)) ~ + (Elidable & HasOpcode(ROL)) ~ + (Elidable & HasOpcodeIn(BEQ, BNE) & DoesntMatterWhatItDoesWith(State.A, State.N, State.C, State.Z)) ~~> {code => + List(code(0), remapZ2CInverse(code(2))) + }, ) val PointlessSignCheck: RuleBasedAssemblyOptimization = { diff --git a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala index d1ea432b..a2e5f7e2 100644 --- a/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala +++ b/src/main/scala/millfork/assembly/mos/opt/RuleBasedAssemblyOptimization.scala @@ -824,7 +824,7 @@ case object ReadsY extends TrivialAssemblyLinePattern { case object ConcernsC extends TrivialAssemblyLinePattern { override def apply(line: AssemblyLine): Boolean = - OpcodeClasses.ReadsC(line.opcode) && OpcodeClasses.ChangesC(line.opcode) + OpcodeClasses.ReadsC(line.opcode) || OpcodeClasses.ChangesC(line.opcode) } case object ConcernsA extends TrivialAssemblyLinePattern { diff --git a/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala index 5285fa54..7d492aef 100644 --- a/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/ZeropageRegisterOptimizations.scala @@ -307,6 +307,33 @@ object ZeropageRegisterOptimizations { }) ) + val SimplifiableAddingOfOneBit = new RuleBasedAssemblyOptimization("Simplifiable adding of one bit", + needsFlowInfo = FlowInfoRequirement.BothFlows, + (Elidable & HasOpcode(AND) & HasImmediate(1)) ~ + (Elidable & HasOpcode(STA) & RefersTo("__reg", 0) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.A, State.N, State.Z, State.C)) ~ + (Linear & Not(ConcernsC) & DoesNotConcernMemoryAt(0, 1)).*.capture(5) ~ + (Elidable & HasOpcode(ASL) & HasAddrMode(Implied)) ~ + (Elidable & HasOpcode(CLC)) ~ + (Elidable & HasClear(State.D) & HasOpcode(ADC) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.V)) ~~> { (code, ctx) => + AssemblyLine.implied(ROR) :: (ctx.get[List[AssemblyLine]](5) :+ AssemblyLine.implied(ROL)) + }, + (Elidable & HasOpcode(ASL) & HasAddrMode(Implied)) ~ + (Elidable & HasOpcode(STA) & RefersTo("__reg", 0) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.A, State.N, State.Z, State.C)) ~ + (Linear & Not(HasOpcode(AND)) & Not(ConcernsC) & DoesNotConcernMemoryAt(0, 1)).*.capture(5) ~ + (Elidable & HasOpcode(AND) & HasImmediate(1)) ~ + (Elidable & HasOpcode(CLC)) ~ + (Elidable & HasClear(State.D) & HasOpcode(ADC) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.V)) ~~> { (code, ctx) => + code(1) :: (ctx.get[List[AssemblyLine]](5) ++ List(AssemblyLine.implied(ROR), code(1).copy(opcode = LDA), AssemblyLine.implied(ROL))) + }, + (Elidable & HasOpcode(ASL) & HasAddrMode(Implied)) ~ + (Elidable & HasOpcode(STA) & RefersTo("__reg", 0) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.A, State.N, State.Z, State.C)) ~ + (Linear & Not(HasOpcode(ANC)) & Not(ConcernsC) & DoesNotConcernMemoryAt(0, 1)).*.capture(5) ~ + (Elidable & HasOpcode(ANC) & HasImmediate(1)) ~ + (Elidable & HasClear(State.D) & HasOpcode(ADC) & MatchAddrMode(0) & MatchParameter(1) & DoesntMatterWhatItDoesWith(State.V)) ~~> { (code, ctx) => + code(1) :: (ctx.get[List[AssemblyLine]](5) ++ List(AssemblyLine.implied(ROR), code(1).copy(opcode = LDA), AssemblyLine.implied(ROL))) + }, + ) + val All: List[AssemblyOptimization[AssemblyLine]] = List( ConstantDecimalMath, ConstantMultiplication, @@ -315,6 +342,7 @@ object ZeropageRegisterOptimizations { DeadRegStore, DeadRegStoreFromFlow, PointlessLoad, + SimplifiableAddingOfOneBit, StashInRegInsteadOfStack, ) diff --git a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala index 9beadede..256032f5 100644 --- a/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala +++ b/src/test/scala/millfork/test/AssemblyOptimizationSuite.scala @@ -562,4 +562,55 @@ class AssemblyOptimizationSuite extends FunSuite with Matchers { } } + test("Shift, mask and increase 1") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos)( + """ + | byte output @$c000 + | void main() { + | output = twicePlusLowBit(5, 5) + | } + | noinline byte twicePlusLowBit (byte x, byte y) { + | return (y & 1) + x * 2 + | } + """.stripMargin + ) { m => + m.readByte(0xc000) should equal(11) + } + } + + test("Shift, mask and increase 2") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos)( + """ + | byte output @$c000 + | void main() { + | output = twicePlusLowBit(5, 5) + | } + | noinline byte twicePlusLowBit (byte x, byte y) { + | return x * 2 + (y & 1) + | } + """.stripMargin + ) { m => + m.readByte(0xc000) should equal(11) + } + } + + test("Shift, mask, increase and test") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos)( + """ + | byte output @$c000 + | void main() { + | stuff(0, 4) + | } + | inline void stuff (byte x, byte y) { + | if ((f(y) & 1) + x * 2) == 0 { + | output = 11 + | } + | } + | noinline byte f(byte y) { return y } + """.stripMargin + ) { m => + m.readByte(0xc000) should equal(11) + } + } + } diff --git a/src/test/scala/millfork/test/BitPackingSuite.scala b/src/test/scala/millfork/test/BitPackingSuite.scala index 9bdf4b23..5246f05c 100644 --- a/src/test/scala/millfork/test/BitPackingSuite.scala +++ b/src/test/scala/millfork/test/BitPackingSuite.scala @@ -39,6 +39,26 @@ class BitPackingSuite extends FunSuite with Matchers { } } + test("Population count") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos)(""" + | byte output @$c000 + | noinline byte popcnt(byte x) { + | byte result + | result = 0 + | while x != 0 { + | if x & $80 != 0 { result += 1 } + | x <<= 1 + | } + | return result + | } + | void main () { + | output = popcnt(5) + | } + """.stripMargin){m => + m.readByte(0xc000) should equal(2) + } + } + test("Unpack bits from a word") { EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)(""" | array output[16]