From a1b868e3df3cb74dd0dd405e86c24ff05aeb1b45 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Thu, 6 Jun 2019 13:35:26 +0200 Subject: [PATCH] Optimize byte division by power of two --- .../z80/opt/AlwaysGoodI80Optimizations.scala | 7 ++++ .../millfork/compiler/mos/BuiltIns.scala | 8 ++++- .../millfork/compiler/z80/Z80Multiply.scala | 32 ++++++++++++++----- .../scala/millfork/test/ByteMathSuite.scala | 15 +++++++++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala index 25d07c1c..28f181aa 100644 --- a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala +++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala @@ -736,6 +736,13 @@ object AlwaysGoodI80Optimizations { } ), + for7Registers(reg => + (Elidable & MatchRegister(reg, 0) & HasRegisterParam(reg) & HasOpcode(SWAP) & DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> { (code, ctx) => + val value = ctx.get[Int](0) + val swappedValue = ((value >> 4) & 0xf) | ((value << 4) & 0xf0) + List(ZLine.ldImm8(reg, swappedValue).pos(code.head.source)) + } + ), ) val FreeHL = new RuleBasedAssemblyOptimization("Free HL", diff --git a/src/main/scala/millfork/compiler/mos/BuiltIns.scala b/src/main/scala/millfork/compiler/mos/BuiltIns.scala index 8ee8cd1e..89d7594a 100644 --- a/src/main/scala/millfork/compiler/mos/BuiltIns.scala +++ b/src/main/scala/millfork/compiler/mos/BuiltIns.scala @@ -1032,7 +1032,13 @@ object BuiltIns { Nil } else if (qq > 255) { if (modulo) MosExpressionCompiler.compileToA(ctx, p) - else List(AssemblyLine.immediate(LDA, 0)) + else List(AssemblyLine.immediate(LDA, 0).position(q.position)) + } else if (qq == 1) { + if (modulo) List(AssemblyLine.immediate(LDA, 0).position(q.position)) + else MosExpressionCompiler.compileToA(ctx, p) + } else if (isPowerOfTwoUpTo15(qq)) { + if (modulo) MosExpressionCompiler.compileToA(ctx, p) :+ AssemblyLine.immediate(AND, qq - 1).position(q.position) + else MosExpressionCompiler.compileToA(ctx, p) ++ List.fill(java.lang.Long.bitCount(qq-1))(AssemblyLine.implied(LSR).position(q.position)) } else { compileUnsignedByteDivision(ctx, p, qq.toInt, modulo) } diff --git a/src/main/scala/millfork/compiler/z80/Z80Multiply.scala b/src/main/scala/millfork/compiler/z80/Z80Multiply.scala index 91c61138..2754106b 100644 --- a/src/main/scala/millfork/compiler/z80/Z80Multiply.scala +++ b/src/main/scala/millfork/compiler/z80/Z80Multiply.scala @@ -133,6 +133,15 @@ object Z80Multiply { * Calculate A = p / q or A = p %% q */ def compileUnsignedByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, LhsExpression], q: Expression, modulo: Boolean): List[ZLine] = { + def loadPToA(): List[ZLine] = { + p match { + case Right(pp) => Z80ExpressionCompiler.compileToA(ctx, pp) + case Left(LocalVariableAddressViaHL) => List(ZLine.ld8(ZRegister.A, ZRegister.MEM_HL).position(q.position)) + case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.A, offset).position(q.position)) + case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.A, offset).position(q.position)) + } + } + ctx.env.eval(q) match { case Some(NumericConstant(qq, _)) => if (qq < 0) { @@ -141,15 +150,20 @@ object Z80Multiply { } else if (qq == 0) { ctx.log.error("Unsigned division by zero", q.position) Nil + } else if (qq == 1) { + if (modulo) List(ZLine.ldImm8(ZRegister.A, 0).position(q.position)) + else loadPToA() } else if (qq > 255) { - if (modulo) { - p match { - case Right(pp) => Z80ExpressionCompiler.compileToA(ctx, pp) - case Left(LocalVariableAddressViaHL) => List(ZLine.ld8(ZRegister.A, ZRegister.MEM_HL)) - case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.A, offset)) - case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.A, offset)) - } - } else List(ZLine.ldImm8(ZRegister.A, 0)) + if (modulo) loadPToA() + else List(ZLine.ldImm8(ZRegister.A, 0)) + } else if (isPowerOfTwoUpTo15(qq)) { + val mask = (qq - 1).toInt + val shift = Integer.bitCount(mask) + val postShiftMask = (1 << (8 - shift)) - 1 + if (modulo) loadPToA() :+ ZLine.imm8(ZOpcode.AND, mask) + else if (shift == 4 && ctx.options.flag(CompilationFlag.EmitSharpOpcodes)) loadPToA() ++ List(ZLine.register(ZOpcode.SWAP, ZRegister.A), ZLine.imm8(ZOpcode.AND, 15)) + else if (ctx.options.flag(CompilationFlag.EmitExtended80Opcodes)) loadPToA() ++ List.fill(shift)(ZLine.register(ZOpcode.SRL, ZRegister.A)) + else loadPToA() ++ List.fill(shift)(ZLine.implied(ZOpcode.RRCA)) :+ ZLine.imm8(ZOpcode.AND, postShiftMask) } else { compileUnsignedByteDivisionImpl(ctx, p, qq.toInt, modulo) } @@ -247,4 +261,6 @@ object Z80Multiply { }.toList } } + + private def isPowerOfTwoUpTo15(n: Long): Boolean = if (n <= 0 || n >= 0x8000) false else 0 == ((n-1) & n) } diff --git a/src/test/scala/millfork/test/ByteMathSuite.scala b/src/test/scala/millfork/test/ByteMathSuite.scala index 7d856c9d..b38001ee 100644 --- a/src/test/scala/millfork/test/ByteMathSuite.scala +++ b/src/test/scala/millfork/test/ByteMathSuite.scala @@ -375,4 +375,19 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues { m.readByte(0xc003) should equal(x % y) withClue s"$x %% $y" } } + + test("Byte division 3") { + divisionCase1(0, 2) + divisionCase1(1, 2) + divisionCase1(2, 2) + divisionCase1(250, 128) + divisionCase1(0, 4) + divisionCase1(0, 8) + divisionCase1(1, 4) + divisionCase1(6, 8) + divisionCase1(73, 16) + divisionCase1(75, 128) + divisionCase1(42, 128) + divisionCase1(142, 128) + } }