1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-12 03:30:09 +00:00

Optimizations:

– optimize multiplication and division by constants
– optimize multiplication, division and shifts for size
This commit is contained in:
Karol Stasiak 2019-06-24 15:20:39 +02:00
parent 8ea3957023
commit 663e38c264
5 changed files with 137 additions and 31 deletions

View File

@ -960,6 +960,19 @@ object BuiltIns {
}
def compileByteMultiplication(ctx: CompilationContext, v: Expression, c: Int): List[AssemblyLine] = {
c match {
case 0 =>
if (v.isPure) return List(AssemblyLine.immediate(LDA, 0))
else return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.immediate(LDA, 0))
case 1 => return MosExpressionCompiler.compileToA(ctx, v)
case 2 | 4 | 8 | 16 | 32 =>
return MosExpressionCompiler.compileToA(ctx, v) ++ List.fill(Integer.numberOfTrailingZeros(c))(AssemblyLine.implied(ASL))
case 128 =>
return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0x80))
case 64 =>
return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0xC0))
case _ =>
}
val result = ListBuffer[AssemblyLine]()
// TODO: optimise
val addingCode = simpleOperation(ADC, ctx, v, IndexChoice.PreferY, preserveA = false, commutative = false, decimal = false)
@ -975,13 +988,28 @@ object BuiltIns {
result += AssemblyLine.implied(ASL)
}
if ((mult & mask) != 0) {
result ++= List(AssemblyLine.implied(CLC), adc)
result += AssemblyLine.implied(CLC)
result += adc
empty = false
}
mask >>>= 1
}
result.toList
val sizeIfCalling = addingCode.map(_.sizeInBytes).sum + 9
val sizeIfUnrolling = result.map(_.sizeInBytes).sum
var shouldUnroll = true
if (ctx.options.zpRegisterSize >= 2) {
if (ctx.options.flag(CompilationFlag.OptimizeForSize)) {
shouldUnroll = sizeIfUnrolling <= sizeIfCalling
} else if (!ctx.options.flag(CompilationFlag.OptimizeForSpeed)) {
shouldUnroll = sizeIfUnrolling <= sizeIfCalling + 6
}
}
if (shouldUnroll){
result.toList
} else {
indexing ++ List(adc.copy(opcode = LDA)) ++ PseudoregisterBuiltIns.compileByteMultiplication(ctx, None, LiteralExpression(c, 1), storeInRegLo = false)
}
}
//noinspection ZeroIndexToHead
@ -991,13 +1019,26 @@ object BuiltIns {
variables.length match {
case 0 => List(AssemblyLine.immediate(LDA, constant & 0xff))
case 1 =>
val sim = simplicity(ctx.env, variables.head._1)
if (sim >= 'I') {
compileByteMultiplication(ctx, variables.head._1, constant)
if (constant == 1) {
MosExpressionCompiler.compileToA(ctx, variables.head._1)
} else {
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++
List(AssemblyLine.zeropage(STA, ctx.env.get[ThingInMemory]("__reg.b0"))) ++
compileByteMultiplication(ctx, VariableExpression("__reg.b0"), constant)
val sim = simplicity(ctx.env, variables.head._1)
if (sim >= 'I') {
compileByteMultiplication(ctx, variables.head._1, constant)
} else {
constant match {
case 2 | 4 | 8 | 16 | 32 =>
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List.fill(Integer.numberOfTrailingZeros(constant))(AssemblyLine.implied(ASL))
case 128 =>
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0x80))
case 64 =>
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0xC0))
case _ =>
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++
List(AssemblyLine.zeropage(STA, ctx.env.get[ThingInMemory]("__reg.b0"))) ++
compileByteMultiplication(ctx, VariableExpression("__reg.b0"), constant)
}
}
}
case 2 =>
if (constant == 1)
@ -1036,6 +1077,8 @@ object BuiltIns {
} else if (qq == 1) {
if (modulo) List(AssemblyLine.immediate(LDA, 0).position(q.position))
else MosExpressionCompiler.compileToA(ctx, p)
} else if (qq >= 128 && !modulo) {
MosExpressionCompiler.compileToA(ctx, p) ++ List(AssemblyLine.immediate(CMP, qq), AssemblyLine.immediate(LDA, 0), AssemblyLine.implied(ROL))
} else if (isPowerOfTwoUpTo15(qq)) {
if (modulo) MosExpressionCompiler.compileToA(ctx, p) :+ AssemblyLine.immediate(AND, qq - 1).position(q.position)
else MosExpressionCompiler.compileToA(ctx, p) ++ List.fill(java.lang.Long.bitCount(qq-1))(AssemblyLine.implied(LSR).position(q.position))

View File

@ -272,6 +272,14 @@ object PseudoregisterBuiltIns {
}
}
private def unrollShift(ctx: CompilationContext, count: Long, bodySize8bit: Int, bodySize16bit: Int): Boolean = {
// TODO: figure out how this interacts with the optimizer
val bodySize = if (ctx.options.flag(CompilationFlag.EmitNative65816Opcodes)) bodySize16bit else bodySize8bit
if (ctx.options.flag(CompilationFlag.OptimizeForSpeed)) true
else if (ctx.options.flag(CompilationFlag.OptimizeForSize)) count * bodySize < bodySize + 5 // will unroll up to <<2 (<<3 on 65816)
else count * bodySize < bodySize + 13 // will unroll up to <<4 (<<7 on 65816)
}
def compileWordShiftOps(left: Boolean, ctx: CompilationContext, l: Expression, r: Expression): List[AssemblyLine] = {
if (ctx.options.zpRegisterSize < 2) {
ctx.log.error("Word shifting requires the zeropage pseudoregister", l.position)
@ -279,12 +287,12 @@ object PseudoregisterBuiltIns {
}
val b = ctx.env.get[Type]("byte")
val w = ctx.env.get[Type]("word")
val reg = ctx.env.get[VariableInMemory]("__reg")
val reg = ctx.env.get[VariableInMemory]("__reg.loword")
val firstParamCompiled = MosExpressionCompiler.compile(ctx, l, Some(MosExpressionCompiler.getExpressionType(ctx, l) -> reg), NoBranching)
ctx.env.eval(r) match {
case Some(NumericConstant(0, _)) =>
List(AssemblyLine.zeropage(LDA, reg), AssemblyLine.zeropage(LDX, reg, 1))
case Some(NumericConstant(v, _)) if v > 0 =>
case Some(NumericConstant(v, _)) if v > 0 && unrollShift(ctx, v, 2, 4) =>
if (ctx.options.flag(CompilationFlag.EmitNative65816Opcodes)) {
firstParamCompiled ++
List(AssemblyLine.accu16) ++
@ -468,17 +476,30 @@ object PseudoregisterBuiltIns {
case (2 | 1, 1) => // ok
case _ => ctx.log.fatal("Invalid code path", param2.position)
}
(ctx.env.eval(param1), ctx.env.eval(param2)) match {
case (Some(l), Some(r)) =>
val operator = if (modulo) MathOperator.Modulo else MathOperator.Divide
val product = CompoundConstant(operator, l, r).quickSimplify
return List(AssemblyLine.immediate(LDA, product.loByte), AssemblyLine.immediate(LDX, product.hiByte))
// TODO: powers of 2, like with *
case _ =>
}
val b = ctx.env.get[Type]("byte")
val w = ctx.env.get[Type]("word")
val reg = ctx.env.get[VariableInMemory]("__reg")
(ctx.env.eval(param1), ctx.env.eval(param2)) match {
case (Some(l), Some(r)) =>
if (r.isProvablyZero) {
ctx.log.error("Unsigned division by zero", param2.position)
}
val operator = if (modulo) MathOperator.Modulo else MathOperator.Divide
val product = CompoundConstant(operator, l, r).quickSimplify
return List(AssemblyLine.immediate(LDA, product.loByte), AssemblyLine.immediate(LDX, product.hiByte))
case (_, Some(NumericConstant(p, _))) =>
if (p == 0) {
ctx.log.error("Unsigned division by zero", param2.position)
}
if (p == 1) {
if (modulo) return MosExpressionCompiler.compile(ctx, param1, None, BranchSpec.None) ++ List(AssemblyLine.immediate(LDA, 0), AssemblyLine.immediate(LDX, 0))
else return MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
} else if (p < 256 && isPowerOfTwoUpTo15(p)) {
if (modulo) return MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None) ++ List(AssemblyLine.immediate(AND, p - 1), AssemblyLine.immediate(LDX, 0))
else return compileWordShiftOps(left = false, ctx, param1, LiteralExpression(Integer.numberOfTrailingZeros(p.toInt), 1))
}
case _ =>
}
val code1 = MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
val code2 = MosExpressionCompiler.compile(ctx, param2, Some(b -> RegisterVariable(MosRegister.A, b)), BranchSpec.None)
val load = if (!usesRegLo(code2) && !usesRegHi(code2)) {

View File

@ -912,24 +912,24 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
if (f.functionName == "%%=") {
calculateAddressToAppropriatePointer(ctx, l, forWriting = true) match {
case Some((LocalVariableAddressViaHL, List(ZLine0(LD_16, TwoRegisters(ZRegister.HL, ZRegister.IMM_16), addr)))) =>
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r) ++ List(
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = true) ++ List(
ZLine.ldAbs8(addr, ZRegister.A),
ZLine.register(XOR, ZRegister.A),
ZLine.ldAbs8(addr+1, ZRegister.A)
)
case Some((lvo@LocalVariableAddressViaHL, code)) =>
code ++ stashHLIfChanged(ctx, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r)) ++ List(
code ++ stashHLIfChanged(ctx, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true)) ++ List(
ZLine.ld8(ZRegister.MEM_HL, ZRegister.A),
ZLine.register(INC_16, ZRegister.HL),
ZLine.ldImm8(ZRegister.MEM_HL, 0)
)
case Some((lvo@LocalVariableAddressViaIX(offset), code)) =>
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++ List(
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true) ++ List(
ZLine.ldViaIx(offset, ZRegister.A),
ZLine.ld0ViaIx(offset + 1)
)
case Some((lvo@LocalVariableAddressViaIY(offset), code)) =>
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++ List(
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true) ++ List(
ZLine.ldViaIy(offset, ZRegister.A),
ZLine.ld0ViaIy(offset + 1)
)
@ -942,7 +942,7 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
case Some((lvo@LocalVariableAddressViaHL, code)) =>
code ++
stashHLIfChanged(ctx,
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(LocalVariableAddressViaHL), r) ++ (
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(LocalVariableAddressViaHL), r, modulo = false) ++ (
if (ctx.options.flags(CompilationFlag.EmitIntel8080Opcodes)) List(ZLine.implied(EX_DE_HL))
else List(ZLine.ld8(ZRegister.E, ZRegister.L), ZLine.ld8(ZRegister.D, ZRegister.H))
)
@ -954,11 +954,11 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
)
case Some((lvo@LocalVariableAddressViaIX(offset), code)) =>
code ++
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = false) ++
storeHLViaIX(ctx, offset, 2, signedSource = false)
case Some((lvo@LocalVariableAddressViaIY(offset), code)) =>
code ++
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = false) ++
storeHLViaIY(ctx, offset, 2, signedSource = false)
case _ =>
ctx.log.error("Invalid left-hand side", l.position)
@ -969,14 +969,15 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
case "/" | "%%" =>
assertSizesForDivision(ctx, params, inPlace = false)
val (l, r, size) = assertArithmeticBinary(ctx, params)
val modulo = f.functionName == "%%"
size match {
case 1 =>
targetifyA(ctx, target, Z80Multiply.compileUnsignedByteDivision(ctx, Right(l), r, f.functionName == "%%"), isSigned = false)
targetifyA(ctx, target, Z80Multiply.compileUnsignedByteDivision(ctx, Right(l), r, modulo), isSigned = false)
case 2 =>
if (f.functionName == "%%") {
targetifyA(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r), isSigned = false)
if (modulo) {
targetifyA(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = true), isSigned = false)
} else {
targetifyHL(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r))
targetifyHL(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = false))
}
}
case "*'=" =>

View File

@ -106,7 +106,7 @@ object Z80Multiply {
/**
* Calculate HL = p / q and A = p %% q
*/
def compileUnsignedWordByByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, Expression], q: Expression): List[ZLine] = {
def compileUnsignedWordByByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, Expression], q: Expression, modulo: Boolean): List[ZLine] = {
val pb = p match {
case Right(pp) => Z80ExpressionCompiler.compileToHL(ctx, pp)
case Left(LocalVariableAddressViaHL) => List(
@ -118,6 +118,42 @@ object Z80Multiply {
case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.L, offset), ZLine.ldViaIx(ZRegister.H, offset+1))
case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.L, offset), ZLine.ldViaIy(ZRegister.H, offset+1))
}
ctx.env.eval(q) match {
case Some(NumericConstant(0, _)) =>
ctx.log.error("Unsigned division by zero", q.position)
return pb
case Some(NumericConstant(1, _)) =>
if (modulo) {
return pb :+ ZLine.ldImm8(ZRegister.A, 0)
} else {
return pb
}
case Some(NumericConstant(qc, _)) if qc <= 255 && isPowerOfTwoUpTo15(qc) =>
val count = Integer.numberOfTrailingZeros(qc.toInt)
if (modulo) {
return pb ++ List(ZLine.ld8(ZRegister.A, ZRegister.L), ZLine.imm8(ZOpcode.AND, qc.toInt - 1))
} else {
val extendedOps = ctx.options.flag(CompilationFlag.EmitExtended80Opcodes)
val shiftHL = if (extendedOps) {
(0L until count).flatMap(_ => List(
ZLine.register(ZOpcode.SRL, ZRegister.H),
ZLine.register(ZOpcode.RR, ZRegister.L)
))
} else {
(0 until count).flatMap(_ => List(
ZLine.ld8(ZRegister.A, ZRegister.H),
ZLine.register(ZOpcode.OR, ZRegister.A),
ZLine.implied(ZOpcode.RRA),
ZLine.ld8(ZRegister.H, ZRegister.A),
ZLine.ld8(ZRegister.A, ZRegister.L),
ZLine.implied(ZOpcode.RRA),
ZLine.ld8(ZRegister.L, ZRegister.A)
))
}
return pb ++ shiftHL
}
case _ =>
}
val qb = Z80ExpressionCompiler.compileToA(ctx, q)
val load = if (qb.exists(Z80ExpressionCompiler.changesHL)) {
pb ++ Z80ExpressionCompiler.stashHLIfChanged(ctx, qb)
@ -168,7 +204,7 @@ object Z80Multiply {
compileUnsignedByteDivisionImpl(ctx, p, qq.toInt, modulo)
}
case _ =>
val call = compileUnsignedWordByByteDivision(ctx, p, q)
val call = compileUnsignedWordByByteDivision(ctx, p, q, modulo = modulo)
if (modulo) {
call
} else {
@ -253,6 +289,9 @@ object Z80Multiply {
count match {
case 0 => List(ZLine.ldImm8(A, 0))
case 1 => Nil
case 128 => List(ZLine.implied(RRCA), ZLine.imm8(AND, 0x80))
case 64 => List(ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.imm8(AND, 0xC0))
case 32 => List(ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.imm8(AND, 0xE0))
case n if n > 0 && n.-(1).&(n).==(0) => List.fill(Integer.numberOfTrailingZeros(n))(ZLine.register(ADD, A))
case _ =>
ZLine.ld8(E,A) :: Integer.toString(count & 0xff, 2).tail.flatMap{

View File

@ -157,6 +157,7 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
private def multiplyCase1(x: Int, y: Int): Unit = {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
s"""
| import zp_reg
| byte output @$$c000
| void main () {
| output = $x
@ -185,6 +186,7 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
private def multiplyCase2(x: Int, y: Int): Unit = {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
s"""
| import zp_reg
| byte output @$$c000
| void main () {
| byte a