1
0
mirror of https://github.com/KarolS/millfork.git synced 2024-09-30 00:56:56 +00:00

Optimize byte division by power of two

This commit is contained in:
Karol Stasiak 2019-06-06 13:35:26 +02:00
parent e31737ad40
commit a1b868e3df
4 changed files with 53 additions and 9 deletions

View File

@ -736,6 +736,13 @@ object AlwaysGoodI80Optimizations {
}
),
for7Registers(reg =>
(Elidable & MatchRegister(reg, 0) & HasRegisterParam(reg) & HasOpcode(SWAP) & DoesntMatterWhatItDoesWithFlagsExceptCarry) ~~> { (code, ctx) =>
val value = ctx.get[Int](0)
val swappedValue = ((value >> 4) & 0xf) | ((value << 4) & 0xf0)
List(ZLine.ldImm8(reg, swappedValue).pos(code.head.source))
}
),
)
val FreeHL = new RuleBasedAssemblyOptimization("Free HL",

View File

@ -1032,7 +1032,13 @@ object BuiltIns {
Nil
} else if (qq > 255) {
if (modulo) MosExpressionCompiler.compileToA(ctx, p)
else List(AssemblyLine.immediate(LDA, 0))
else List(AssemblyLine.immediate(LDA, 0).position(q.position))
} else if (qq == 1) {
if (modulo) List(AssemblyLine.immediate(LDA, 0).position(q.position))
else MosExpressionCompiler.compileToA(ctx, p)
} else if (isPowerOfTwoUpTo15(qq)) {
if (modulo) MosExpressionCompiler.compileToA(ctx, p) :+ AssemblyLine.immediate(AND, qq - 1).position(q.position)
else MosExpressionCompiler.compileToA(ctx, p) ++ List.fill(java.lang.Long.bitCount(qq-1))(AssemblyLine.implied(LSR).position(q.position))
} else {
compileUnsignedByteDivision(ctx, p, qq.toInt, modulo)
}

View File

@ -133,6 +133,15 @@ object Z80Multiply {
* Calculate A = p / q or A = p %% q
*/
def compileUnsignedByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, LhsExpression], q: Expression, modulo: Boolean): List[ZLine] = {
def loadPToA(): List[ZLine] = {
p match {
case Right(pp) => Z80ExpressionCompiler.compileToA(ctx, pp)
case Left(LocalVariableAddressViaHL) => List(ZLine.ld8(ZRegister.A, ZRegister.MEM_HL).position(q.position))
case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.A, offset).position(q.position))
case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.A, offset).position(q.position))
}
}
ctx.env.eval(q) match {
case Some(NumericConstant(qq, _)) =>
if (qq < 0) {
@ -141,15 +150,20 @@ object Z80Multiply {
} else if (qq == 0) {
ctx.log.error("Unsigned division by zero", q.position)
Nil
} else if (qq == 1) {
if (modulo) List(ZLine.ldImm8(ZRegister.A, 0).position(q.position))
else loadPToA()
} else if (qq > 255) {
if (modulo) {
p match {
case Right(pp) => Z80ExpressionCompiler.compileToA(ctx, pp)
case Left(LocalVariableAddressViaHL) => List(ZLine.ld8(ZRegister.A, ZRegister.MEM_HL))
case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.A, offset))
case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.A, offset))
}
} else List(ZLine.ldImm8(ZRegister.A, 0))
if (modulo) loadPToA()
else List(ZLine.ldImm8(ZRegister.A, 0))
} else if (isPowerOfTwoUpTo15(qq)) {
val mask = (qq - 1).toInt
val shift = Integer.bitCount(mask)
val postShiftMask = (1 << (8 - shift)) - 1
if (modulo) loadPToA() :+ ZLine.imm8(ZOpcode.AND, mask)
else if (shift == 4 && ctx.options.flag(CompilationFlag.EmitSharpOpcodes)) loadPToA() ++ List(ZLine.register(ZOpcode.SWAP, ZRegister.A), ZLine.imm8(ZOpcode.AND, 15))
else if (ctx.options.flag(CompilationFlag.EmitExtended80Opcodes)) loadPToA() ++ List.fill(shift)(ZLine.register(ZOpcode.SRL, ZRegister.A))
else loadPToA() ++ List.fill(shift)(ZLine.implied(ZOpcode.RRCA)) :+ ZLine.imm8(ZOpcode.AND, postShiftMask)
} else {
compileUnsignedByteDivisionImpl(ctx, p, qq.toInt, modulo)
}
@ -247,4 +261,6 @@ object Z80Multiply {
}.toList
}
}
private def isPowerOfTwoUpTo15(n: Long): Boolean = if (n <= 0 || n >= 0x8000) false else 0 == ((n-1) & n)
}

View File

@ -375,4 +375,19 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
m.readByte(0xc003) should equal(x % y) withClue s"$x %% $y"
}
}
test("Byte division 3") {
divisionCase1(0, 2)
divisionCase1(1, 2)
divisionCase1(2, 2)
divisionCase1(250, 128)
divisionCase1(0, 4)
divisionCase1(0, 8)
divisionCase1(1, 4)
divisionCase1(6, 8)
divisionCase1(73, 16)
divisionCase1(75, 128)
divisionCase1(42, 128)
divisionCase1(142, 128)
}
}