From 943a56a30f0a44bc98c88985c1800f789b39503f Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Tue, 30 Jul 2019 14:47:52 +0200 Subject: [PATCH] 6502: Fix some cases of word additions; optimize bitmasking --- .../scala/millfork/OptimizationPresets.scala | 2 + .../mos/opt/AlwaysGoodOptimizations.scala | 147 +++++++++++++++++- .../compiler/mos/PseudoregisterBuiltIns.scala | 35 ++++- src/test/scala/millfork/test/BitOpSuite.scala | 17 ++ .../scala/millfork/test/BooleanSuite.scala | 17 ++ .../scala/millfork/test/WordMathSuite.scala | 17 ++ 6 files changed, 227 insertions(+), 8 deletions(-) diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index 52b5c60a..ff3a2bb8 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -32,6 +32,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.IdempotentDuplicateRemoval, AlwaysGoodOptimizations.BranchInPlaceRemoval, UnusedLabelRemoval, + AlwaysGoodOptimizations.OptimizableMasking, AlwaysGoodOptimizations.UnconditionalJumpRemoval, UnusedLabelRemoval, AlwaysGoodOptimizations.RearrangeMath, @@ -212,6 +213,7 @@ object OptimizationPresets { AlwaysGoodOptimizations.NonetAddition, AlwaysGoodOptimizations.NonetBitOp, AlwaysGoodOptimizations.OperationsAroundShifting, + AlwaysGoodOptimizations.OptimizableMasking, AlwaysGoodOptimizations.OptimizeZeroComparisons, AlwaysGoodOptimizations.PoinlessFlagChange, AlwaysGoodOptimizations.PointlessAccumulatorShifting, diff --git a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala index 6ad36c3e..ddac0fdb 100644 --- a/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala +++ b/src/main/scala/millfork/assembly/mos/opt/AlwaysGoodOptimizations.scala @@ -2849,7 +2849,7 @@ object AlwaysGoodOptimizations { }, ) - val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant inlined shifting", + lazy val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant inlined shifting", needsFlowInfo = FlowInfoRequirement.BothFlows, // TODO: set limits on the loop iteration to avoid huge unrolled code @@ -2874,7 +2874,7 @@ object AlwaysGoodOptimizations { ) - val SimplifiableComparison = new RuleBasedAssemblyOptimization("Simplifiable comparison", + lazy val SimplifiableComparison = new RuleBasedAssemblyOptimization("Simplifiable comparison", needsFlowInfo = FlowInfoRequirement.BackwardFlow, (Elidable & HasOpcode(LDA)) ~ @@ -2909,4 +2909,147 @@ object AlwaysGoodOptimizations { }, ) + + private val powersOf2: List[(Int, Int)] = List( + 1 -> 0, + 2 -> 1, + 4 -> 2, + 8 -> 3, + 16 -> 4, + 32 -> 5, + 64 -> 6 + ) + + lazy val OptimizableMasking = new RuleBasedAssemblyOptimization("Simplifiable masking", + needsFlowInfo = FlowInfoRequirement.BackwardFlow, + + MultipleAssemblyRules((for{ + (sourceMask, sourceShift) <- powersOf2 + (targetMask, targetShift) <- powersOf2 + shiftOp = if (sourceShift > targetShift) LSR else ASL + shift = if (sourceShift > targetShift) sourceShift - targetShift else targetShift - sourceShift + if shift < 2 + } yield { + List( + + // LDA: + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(0)) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))), + + // LDY + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & (HasOpcode(LDY) & HasImmediate(0) | HasOpcode(TAY))) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(0)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(0)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & (HasOpcode(LDY) & HasImmediate(0) | HasOpcode(TAY))) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))), + + + // LDX + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & (HasOpcode(LDX) & HasImmediate(0) | HasOpcode(TAX))) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(0)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(0)) ~ + (Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))), + + (HasOpcode(AND) & HasImmediate(sourceMask)) ~ + (Elidable & HasOpcode(BNE) & MatchParameter(10)) ~ + (Elidable & (HasOpcode(LDX) & HasImmediate(0) | HasOpcode(TAX))) ~ + (Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~ + (Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~ + (Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~> + (c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))) + + + ) + }).flatten) + + ) } diff --git a/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala b/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala index 44cb0e56..a5e2221e 100644 --- a/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala +++ b/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala @@ -80,6 +80,8 @@ object PseudoregisterBuiltIns { niceReads.prepend(List(AssemblyLine.immediate(LDA, constant.loByte)) -> List(AssemblyLine.immediate(LDA, constant.hiByte))) } } + if (doesMemoryAccessOverlap(niceReads.flatMap(_._1).toList, stores(1))) return None + if (doesMemoryAccessOverlap(niceReads.flatMap(_._2).toList, stores(0))) return None for (b <- 0 to 1) { for (read <- niceReads) { if (b == 0) result ++= read._1 @@ -122,11 +124,15 @@ object PseudoregisterBuiltIns { return List(AssemblyLine.immediate(LDA, 0), AssemblyLine.immediate(LDX, 0)) } val reg = ctx.env.get[VariableInMemory]("__reg.loword") - val head = params.head match { + val addToRegs = params.map{ case (sub, param) => addToReg(ctx, param, sub, decimal) } + val newHead = params.indices.find{ i => + !params(i)._1 && addToRegs(i).exists(l => l.opcode == PHA || l.opcode == PHA_W) + }.getOrElse(0) + val head = params(newHead) match { case (false, e) => MosExpressionCompiler.compile(ctx, e, Some(MosExpressionCompiler.getExpressionType(ctx, e) -> reg), BranchSpec.None) case (true, e) => ??? } - params.tail.foldLeft[List[AssemblyLine]](head){case (code, (sub, param)) => code ++ addToReg(ctx, param, sub, decimal)} ++ List( + params.indices.filter(_ != newHead).foldLeft[List[AssemblyLine]](head){case (code, index) => code ++ addToRegs(index)} ++ List( AssemblyLine.zeropage(LDA, reg), AssemblyLine.zeropage(LDX, reg, 1), ) @@ -152,11 +158,15 @@ object PseudoregisterBuiltIns { return List(AssemblyLine.accu16, AssemblyLine.immediate(LDA_W, 0), AssemblyLine.accu8) } val reg = ctx.env.get[VariableInMemory]("__reg.loword") - val head = params.head match { + val addToRegs = params.map{ case (sub, param) => addToReg(ctx, param, sub, decimal) } + val newHead = params.indices.find{ i => + !params(i)._1 && addToRegs(i).exists(l => l.opcode == PHA || l.opcode == PHA_W) + }.getOrElse(0) + val head = params(newHead) match { case (false, e) => MosExpressionCompiler.compile(ctx, e, Some(MosExpressionCompiler.getExpressionType(ctx, e) -> reg), BranchSpec.None) case (true, e) => ??? } - params.tail.foldLeft[List[AssemblyLine]](head){case (code, (sub, param)) => code ++ addToReg(ctx, param, sub, decimal)} ++ List( + params.indices.filter(_ != newHead).foldLeft[List[AssemblyLine]](head){case (code, index) => code ++ addToRegs(index)} ++ List( AssemblyLine.accu16, AssemblyLine.zeropage(LDA_W, reg), AssemblyLine.accu8 @@ -238,8 +248,7 @@ object PseudoregisterBuiltIns { AssemblyLine.zeropage(STA, reg), AssemblyLine.zeropage(LDA, reg, 1), h.copy(opcode = op), - AssemblyLine.zeropage(STA, reg, 1), - AssemblyLine.zeropage(LDA, reg))) + AssemblyLine.zeropage(STA, reg, 1))) case List( AssemblyLine0(REP, Immediate, NumericConstant(0x20, _)), l@AssemblyLine0(LDA_W, addrMode, _), @@ -675,4 +684,18 @@ object PseudoregisterBuiltIns { case AssemblyLine0(_, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(2, _))) if th.name == "__reg" => true case _ => false } + + def doesMemoryAccessOverlap(l1: List[AssemblyLine], l2: List[AssemblyLine]): Boolean = { + print() + for{ + a1 <- l1 + if a1.addrMode != Immediate && a1.addrMode != Implied + a2 <- l2 + if a2.addrMode != Immediate && a2.addrMode != Implied + if (a1.opcode == STA) != (a2.opcode == STA) + } { + if (a1.parameter == a2.parameter) return true + } + false + } } diff --git a/src/test/scala/millfork/test/BitOpSuite.scala b/src/test/scala/millfork/test/BitOpSuite.scala index e5a99ec7..f57684fc 100644 --- a/src/test/scala/millfork/test/BitOpSuite.scala +++ b/src/test/scala/millfork/test/BitOpSuite.scala @@ -72,4 +72,21 @@ class BitOpSuite extends FunSuite with Matchers { | noinline void barrier(){} """.stripMargin)(_.readWord(0xc000) should equal(0xc00c)) } + + test("Bit testing optimizations") { + val code =""" + | byte output @$c000 + | noinline byte f() = 5 + | noinline bool g(byte x) = x & 1 == 0 + | void main () { + | byte x + | x = f() + | + | if x & 4 == 0 { x = 0 } else { x = 2 } + | output = x + | } + | + """.stripMargin + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(2)) + } } diff --git a/src/test/scala/millfork/test/BooleanSuite.scala b/src/test/scala/millfork/test/BooleanSuite.scala index bdacc196..ed257a72 100644 --- a/src/test/scala/millfork/test/BooleanSuite.scala +++ b/src/test/scala/millfork/test/BooleanSuite.scala @@ -138,4 +138,21 @@ class BooleanSuite extends FunSuite with Matchers { """.stripMargin EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(code.sliding(4).count(_ == "pass"))) } + + test("Fat boolean optimization") { + val code =""" + | byte output @$c000 + | noinline bool f(byte x) = x & 1 != 0 + | noinline bool g(byte x) = x & 1 == 0 + | void main () { + | output = 5 + | if f(3) { output += 1 } + | if g(2) { output += 1 } + | if f(2) { output += 100 } + | if g(3) { output += 100 } + | } + | + """.stripMargin + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(7)) + } } diff --git a/src/test/scala/millfork/test/WordMathSuite.scala b/src/test/scala/millfork/test/WordMathSuite.scala index 4ac8e76e..7afa7266 100644 --- a/src/test/scala/millfork/test/WordMathSuite.scala +++ b/src/test/scala/millfork/test/WordMathSuite.scala @@ -236,6 +236,23 @@ class WordMathSuite extends FunSuite with Matchers with AppendedClues { } } + test("Word addition 5") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Sixteen, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(""" + | word output @$c000 + | void main () { + | word v + | word u + | v = $308 + | output = $102 + | barrier() + | output = (output.lo:output.hi) + v + | } + | noinline void barrier() { } + """.stripMargin){ m => + m.readWord(0xc000) should equal(0x509) + } + } + test("Word bit ops 2") { EmuCrossPlatformBenchmarkRun(Cpu.Sixteen, Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(""" | word output @$c000