mirror of
https://github.com/KarolS/millfork.git
synced 2025-01-12 03:30:09 +00:00
Optimizations:
– optimize multiplication and division by constants – optimize multiplication, division and shifts for size
This commit is contained in:
parent
8ea3957023
commit
663e38c264
@ -960,6 +960,19 @@ object BuiltIns {
|
||||
}
|
||||
|
||||
def compileByteMultiplication(ctx: CompilationContext, v: Expression, c: Int): List[AssemblyLine] = {
|
||||
c match {
|
||||
case 0 =>
|
||||
if (v.isPure) return List(AssemblyLine.immediate(LDA, 0))
|
||||
else return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.immediate(LDA, 0))
|
||||
case 1 => return MosExpressionCompiler.compileToA(ctx, v)
|
||||
case 2 | 4 | 8 | 16 | 32 =>
|
||||
return MosExpressionCompiler.compileToA(ctx, v) ++ List.fill(Integer.numberOfTrailingZeros(c))(AssemblyLine.implied(ASL))
|
||||
case 128 =>
|
||||
return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0x80))
|
||||
case 64 =>
|
||||
return MosExpressionCompiler.compileToA(ctx, v) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0xC0))
|
||||
case _ =>
|
||||
}
|
||||
val result = ListBuffer[AssemblyLine]()
|
||||
// TODO: optimise
|
||||
val addingCode = simpleOperation(ADC, ctx, v, IndexChoice.PreferY, preserveA = false, commutative = false, decimal = false)
|
||||
@ -975,13 +988,28 @@ object BuiltIns {
|
||||
result += AssemblyLine.implied(ASL)
|
||||
}
|
||||
if ((mult & mask) != 0) {
|
||||
result ++= List(AssemblyLine.implied(CLC), adc)
|
||||
result += AssemblyLine.implied(CLC)
|
||||
result += adc
|
||||
empty = false
|
||||
}
|
||||
|
||||
mask >>>= 1
|
||||
}
|
||||
result.toList
|
||||
val sizeIfCalling = addingCode.map(_.sizeInBytes).sum + 9
|
||||
val sizeIfUnrolling = result.map(_.sizeInBytes).sum
|
||||
var shouldUnroll = true
|
||||
if (ctx.options.zpRegisterSize >= 2) {
|
||||
if (ctx.options.flag(CompilationFlag.OptimizeForSize)) {
|
||||
shouldUnroll = sizeIfUnrolling <= sizeIfCalling
|
||||
} else if (!ctx.options.flag(CompilationFlag.OptimizeForSpeed)) {
|
||||
shouldUnroll = sizeIfUnrolling <= sizeIfCalling + 6
|
||||
}
|
||||
}
|
||||
if (shouldUnroll){
|
||||
result.toList
|
||||
} else {
|
||||
indexing ++ List(adc.copy(opcode = LDA)) ++ PseudoregisterBuiltIns.compileByteMultiplication(ctx, None, LiteralExpression(c, 1), storeInRegLo = false)
|
||||
}
|
||||
}
|
||||
|
||||
//noinspection ZeroIndexToHead
|
||||
@ -991,13 +1019,26 @@ object BuiltIns {
|
||||
variables.length match {
|
||||
case 0 => List(AssemblyLine.immediate(LDA, constant & 0xff))
|
||||
case 1 =>
|
||||
val sim = simplicity(ctx.env, variables.head._1)
|
||||
if (sim >= 'I') {
|
||||
compileByteMultiplication(ctx, variables.head._1, constant)
|
||||
if (constant == 1) {
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1)
|
||||
} else {
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++
|
||||
List(AssemblyLine.zeropage(STA, ctx.env.get[ThingInMemory]("__reg.b0"))) ++
|
||||
compileByteMultiplication(ctx, VariableExpression("__reg.b0"), constant)
|
||||
val sim = simplicity(ctx.env, variables.head._1)
|
||||
if (sim >= 'I') {
|
||||
compileByteMultiplication(ctx, variables.head._1, constant)
|
||||
} else {
|
||||
constant match {
|
||||
case 2 | 4 | 8 | 16 | 32 =>
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List.fill(Integer.numberOfTrailingZeros(constant))(AssemblyLine.implied(ASL))
|
||||
case 128 =>
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0x80))
|
||||
case 64 =>
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++ List(AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.implied(ROR), AssemblyLine.immediate(AND, 0xC0))
|
||||
case _ =>
|
||||
MosExpressionCompiler.compileToA(ctx, variables.head._1) ++
|
||||
List(AssemblyLine.zeropage(STA, ctx.env.get[ThingInMemory]("__reg.b0"))) ++
|
||||
compileByteMultiplication(ctx, VariableExpression("__reg.b0"), constant)
|
||||
}
|
||||
}
|
||||
}
|
||||
case 2 =>
|
||||
if (constant == 1)
|
||||
@ -1036,6 +1077,8 @@ object BuiltIns {
|
||||
} else if (qq == 1) {
|
||||
if (modulo) List(AssemblyLine.immediate(LDA, 0).position(q.position))
|
||||
else MosExpressionCompiler.compileToA(ctx, p)
|
||||
} else if (qq >= 128 && !modulo) {
|
||||
MosExpressionCompiler.compileToA(ctx, p) ++ List(AssemblyLine.immediate(CMP, qq), AssemblyLine.immediate(LDA, 0), AssemblyLine.implied(ROL))
|
||||
} else if (isPowerOfTwoUpTo15(qq)) {
|
||||
if (modulo) MosExpressionCompiler.compileToA(ctx, p) :+ AssemblyLine.immediate(AND, qq - 1).position(q.position)
|
||||
else MosExpressionCompiler.compileToA(ctx, p) ++ List.fill(java.lang.Long.bitCount(qq-1))(AssemblyLine.implied(LSR).position(q.position))
|
||||
|
@ -272,6 +272,14 @@ object PseudoregisterBuiltIns {
|
||||
}
|
||||
}
|
||||
|
||||
private def unrollShift(ctx: CompilationContext, count: Long, bodySize8bit: Int, bodySize16bit: Int): Boolean = {
|
||||
// TODO: figure out how this interacts with the optimizer
|
||||
val bodySize = if (ctx.options.flag(CompilationFlag.EmitNative65816Opcodes)) bodySize16bit else bodySize8bit
|
||||
if (ctx.options.flag(CompilationFlag.OptimizeForSpeed)) true
|
||||
else if (ctx.options.flag(CompilationFlag.OptimizeForSize)) count * bodySize < bodySize + 5 // will unroll up to <<2 (<<3 on 65816)
|
||||
else count * bodySize < bodySize + 13 // will unroll up to <<4 (<<7 on 65816)
|
||||
}
|
||||
|
||||
def compileWordShiftOps(left: Boolean, ctx: CompilationContext, l: Expression, r: Expression): List[AssemblyLine] = {
|
||||
if (ctx.options.zpRegisterSize < 2) {
|
||||
ctx.log.error("Word shifting requires the zeropage pseudoregister", l.position)
|
||||
@ -279,12 +287,12 @@ object PseudoregisterBuiltIns {
|
||||
}
|
||||
val b = ctx.env.get[Type]("byte")
|
||||
val w = ctx.env.get[Type]("word")
|
||||
val reg = ctx.env.get[VariableInMemory]("__reg")
|
||||
val reg = ctx.env.get[VariableInMemory]("__reg.loword")
|
||||
val firstParamCompiled = MosExpressionCompiler.compile(ctx, l, Some(MosExpressionCompiler.getExpressionType(ctx, l) -> reg), NoBranching)
|
||||
ctx.env.eval(r) match {
|
||||
case Some(NumericConstant(0, _)) =>
|
||||
List(AssemblyLine.zeropage(LDA, reg), AssemblyLine.zeropage(LDX, reg, 1))
|
||||
case Some(NumericConstant(v, _)) if v > 0 =>
|
||||
case Some(NumericConstant(v, _)) if v > 0 && unrollShift(ctx, v, 2, 4) =>
|
||||
if (ctx.options.flag(CompilationFlag.EmitNative65816Opcodes)) {
|
||||
firstParamCompiled ++
|
||||
List(AssemblyLine.accu16) ++
|
||||
@ -468,17 +476,30 @@ object PseudoregisterBuiltIns {
|
||||
case (2 | 1, 1) => // ok
|
||||
case _ => ctx.log.fatal("Invalid code path", param2.position)
|
||||
}
|
||||
(ctx.env.eval(param1), ctx.env.eval(param2)) match {
|
||||
case (Some(l), Some(r)) =>
|
||||
val operator = if (modulo) MathOperator.Modulo else MathOperator.Divide
|
||||
val product = CompoundConstant(operator, l, r).quickSimplify
|
||||
return List(AssemblyLine.immediate(LDA, product.loByte), AssemblyLine.immediate(LDX, product.hiByte))
|
||||
// TODO: powers of 2, like with *
|
||||
case _ =>
|
||||
}
|
||||
val b = ctx.env.get[Type]("byte")
|
||||
val w = ctx.env.get[Type]("word")
|
||||
val reg = ctx.env.get[VariableInMemory]("__reg")
|
||||
(ctx.env.eval(param1), ctx.env.eval(param2)) match {
|
||||
case (Some(l), Some(r)) =>
|
||||
if (r.isProvablyZero) {
|
||||
ctx.log.error("Unsigned division by zero", param2.position)
|
||||
}
|
||||
val operator = if (modulo) MathOperator.Modulo else MathOperator.Divide
|
||||
val product = CompoundConstant(operator, l, r).quickSimplify
|
||||
return List(AssemblyLine.immediate(LDA, product.loByte), AssemblyLine.immediate(LDX, product.hiByte))
|
||||
case (_, Some(NumericConstant(p, _))) =>
|
||||
if (p == 0) {
|
||||
ctx.log.error("Unsigned division by zero", param2.position)
|
||||
}
|
||||
if (p == 1) {
|
||||
if (modulo) return MosExpressionCompiler.compile(ctx, param1, None, BranchSpec.None) ++ List(AssemblyLine.immediate(LDA, 0), AssemblyLine.immediate(LDX, 0))
|
||||
else return MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
|
||||
} else if (p < 256 && isPowerOfTwoUpTo15(p)) {
|
||||
if (modulo) return MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None) ++ List(AssemblyLine.immediate(AND, p - 1), AssemblyLine.immediate(LDX, 0))
|
||||
else return compileWordShiftOps(left = false, ctx, param1, LiteralExpression(Integer.numberOfTrailingZeros(p.toInt), 1))
|
||||
}
|
||||
case _ =>
|
||||
}
|
||||
val code1 = MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
|
||||
val code2 = MosExpressionCompiler.compile(ctx, param2, Some(b -> RegisterVariable(MosRegister.A, b)), BranchSpec.None)
|
||||
val load = if (!usesRegLo(code2) && !usesRegHi(code2)) {
|
||||
|
@ -912,24 +912,24 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
|
||||
if (f.functionName == "%%=") {
|
||||
calculateAddressToAppropriatePointer(ctx, l, forWriting = true) match {
|
||||
case Some((LocalVariableAddressViaHL, List(ZLine0(LD_16, TwoRegisters(ZRegister.HL, ZRegister.IMM_16), addr)))) =>
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r) ++ List(
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = true) ++ List(
|
||||
ZLine.ldAbs8(addr, ZRegister.A),
|
||||
ZLine.register(XOR, ZRegister.A),
|
||||
ZLine.ldAbs8(addr+1, ZRegister.A)
|
||||
)
|
||||
case Some((lvo@LocalVariableAddressViaHL, code)) =>
|
||||
code ++ stashHLIfChanged(ctx, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r)) ++ List(
|
||||
code ++ stashHLIfChanged(ctx, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true)) ++ List(
|
||||
ZLine.ld8(ZRegister.MEM_HL, ZRegister.A),
|
||||
ZLine.register(INC_16, ZRegister.HL),
|
||||
ZLine.ldImm8(ZRegister.MEM_HL, 0)
|
||||
)
|
||||
case Some((lvo@LocalVariableAddressViaIX(offset), code)) =>
|
||||
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++ List(
|
||||
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true) ++ List(
|
||||
ZLine.ldViaIx(offset, ZRegister.A),
|
||||
ZLine.ld0ViaIx(offset + 1)
|
||||
)
|
||||
case Some((lvo@LocalVariableAddressViaIY(offset), code)) =>
|
||||
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++ List(
|
||||
code ++ Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = true) ++ List(
|
||||
ZLine.ldViaIy(offset, ZRegister.A),
|
||||
ZLine.ld0ViaIy(offset + 1)
|
||||
)
|
||||
@ -942,7 +942,7 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
|
||||
case Some((lvo@LocalVariableAddressViaHL, code)) =>
|
||||
code ++
|
||||
stashHLIfChanged(ctx,
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(LocalVariableAddressViaHL), r) ++ (
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(LocalVariableAddressViaHL), r, modulo = false) ++ (
|
||||
if (ctx.options.flags(CompilationFlag.EmitIntel8080Opcodes)) List(ZLine.implied(EX_DE_HL))
|
||||
else List(ZLine.ld8(ZRegister.E, ZRegister.L), ZLine.ld8(ZRegister.D, ZRegister.H))
|
||||
)
|
||||
@ -954,11 +954,11 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
|
||||
)
|
||||
case Some((lvo@LocalVariableAddressViaIX(offset), code)) =>
|
||||
code ++
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = false) ++
|
||||
storeHLViaIX(ctx, offset, 2, signedSource = false)
|
||||
case Some((lvo@LocalVariableAddressViaIY(offset), code)) =>
|
||||
code ++
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r) ++
|
||||
Z80Multiply.compileUnsignedWordByByteDivision(ctx, Left(lvo), r, modulo = false) ++
|
||||
storeHLViaIY(ctx, offset, 2, signedSource = false)
|
||||
case _ =>
|
||||
ctx.log.error("Invalid left-hand side", l.position)
|
||||
@ -969,14 +969,15 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
|
||||
case "/" | "%%" =>
|
||||
assertSizesForDivision(ctx, params, inPlace = false)
|
||||
val (l, r, size) = assertArithmeticBinary(ctx, params)
|
||||
val modulo = f.functionName == "%%"
|
||||
size match {
|
||||
case 1 =>
|
||||
targetifyA(ctx, target, Z80Multiply.compileUnsignedByteDivision(ctx, Right(l), r, f.functionName == "%%"), isSigned = false)
|
||||
targetifyA(ctx, target, Z80Multiply.compileUnsignedByteDivision(ctx, Right(l), r, modulo), isSigned = false)
|
||||
case 2 =>
|
||||
if (f.functionName == "%%") {
|
||||
targetifyA(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r), isSigned = false)
|
||||
if (modulo) {
|
||||
targetifyA(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = true), isSigned = false)
|
||||
} else {
|
||||
targetifyHL(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r))
|
||||
targetifyHL(ctx, target, Z80Multiply.compileUnsignedWordByByteDivision(ctx, Right(l), r, modulo = false))
|
||||
}
|
||||
}
|
||||
case "*'=" =>
|
||||
|
@ -106,7 +106,7 @@ object Z80Multiply {
|
||||
/**
|
||||
* Calculate HL = p / q and A = p %% q
|
||||
*/
|
||||
def compileUnsignedWordByByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, Expression], q: Expression): List[ZLine] = {
|
||||
def compileUnsignedWordByByteDivision(ctx: CompilationContext, p: Either[LocalVariableAddressOperand, Expression], q: Expression, modulo: Boolean): List[ZLine] = {
|
||||
val pb = p match {
|
||||
case Right(pp) => Z80ExpressionCompiler.compileToHL(ctx, pp)
|
||||
case Left(LocalVariableAddressViaHL) => List(
|
||||
@ -118,6 +118,42 @@ object Z80Multiply {
|
||||
case Left(LocalVariableAddressViaIX(offset)) => List(ZLine.ldViaIx(ZRegister.L, offset), ZLine.ldViaIx(ZRegister.H, offset+1))
|
||||
case Left(LocalVariableAddressViaIY(offset)) => List(ZLine.ldViaIy(ZRegister.L, offset), ZLine.ldViaIy(ZRegister.H, offset+1))
|
||||
}
|
||||
ctx.env.eval(q) match {
|
||||
case Some(NumericConstant(0, _)) =>
|
||||
ctx.log.error("Unsigned division by zero", q.position)
|
||||
return pb
|
||||
case Some(NumericConstant(1, _)) =>
|
||||
if (modulo) {
|
||||
return pb :+ ZLine.ldImm8(ZRegister.A, 0)
|
||||
} else {
|
||||
return pb
|
||||
}
|
||||
case Some(NumericConstant(qc, _)) if qc <= 255 && isPowerOfTwoUpTo15(qc) =>
|
||||
val count = Integer.numberOfTrailingZeros(qc.toInt)
|
||||
if (modulo) {
|
||||
return pb ++ List(ZLine.ld8(ZRegister.A, ZRegister.L), ZLine.imm8(ZOpcode.AND, qc.toInt - 1))
|
||||
} else {
|
||||
val extendedOps = ctx.options.flag(CompilationFlag.EmitExtended80Opcodes)
|
||||
val shiftHL = if (extendedOps) {
|
||||
(0L until count).flatMap(_ => List(
|
||||
ZLine.register(ZOpcode.SRL, ZRegister.H),
|
||||
ZLine.register(ZOpcode.RR, ZRegister.L)
|
||||
))
|
||||
} else {
|
||||
(0 until count).flatMap(_ => List(
|
||||
ZLine.ld8(ZRegister.A, ZRegister.H),
|
||||
ZLine.register(ZOpcode.OR, ZRegister.A),
|
||||
ZLine.implied(ZOpcode.RRA),
|
||||
ZLine.ld8(ZRegister.H, ZRegister.A),
|
||||
ZLine.ld8(ZRegister.A, ZRegister.L),
|
||||
ZLine.implied(ZOpcode.RRA),
|
||||
ZLine.ld8(ZRegister.L, ZRegister.A)
|
||||
))
|
||||
}
|
||||
return pb ++ shiftHL
|
||||
}
|
||||
case _ =>
|
||||
}
|
||||
val qb = Z80ExpressionCompiler.compileToA(ctx, q)
|
||||
val load = if (qb.exists(Z80ExpressionCompiler.changesHL)) {
|
||||
pb ++ Z80ExpressionCompiler.stashHLIfChanged(ctx, qb)
|
||||
@ -168,7 +204,7 @@ object Z80Multiply {
|
||||
compileUnsignedByteDivisionImpl(ctx, p, qq.toInt, modulo)
|
||||
}
|
||||
case _ =>
|
||||
val call = compileUnsignedWordByByteDivision(ctx, p, q)
|
||||
val call = compileUnsignedWordByByteDivision(ctx, p, q, modulo = modulo)
|
||||
if (modulo) {
|
||||
call
|
||||
} else {
|
||||
@ -253,6 +289,9 @@ object Z80Multiply {
|
||||
count match {
|
||||
case 0 => List(ZLine.ldImm8(A, 0))
|
||||
case 1 => Nil
|
||||
case 128 => List(ZLine.implied(RRCA), ZLine.imm8(AND, 0x80))
|
||||
case 64 => List(ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.imm8(AND, 0xC0))
|
||||
case 32 => List(ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.implied(RRCA), ZLine.imm8(AND, 0xE0))
|
||||
case n if n > 0 && n.-(1).&(n).==(0) => List.fill(Integer.numberOfTrailingZeros(n))(ZLine.register(ADD, A))
|
||||
case _ =>
|
||||
ZLine.ld8(E,A) :: Integer.toString(count & 0xff, 2).tail.flatMap{
|
||||
|
@ -157,6 +157,7 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
|
||||
private def multiplyCase1(x: Int, y: Int): Unit = {
|
||||
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
|
||||
s"""
|
||||
| import zp_reg
|
||||
| byte output @$$c000
|
||||
| void main () {
|
||||
| output = $x
|
||||
@ -185,6 +186,7 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
|
||||
private def multiplyCase2(x: Int, y: Int): Unit = {
|
||||
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
|
||||
s"""
|
||||
| import zp_reg
|
||||
| byte output @$$c000
|
||||
| void main () {
|
||||
| byte a
|
||||
|
Loading…
x
Reference in New Issue
Block a user