1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-10 20:29:35 +00:00

Add 16-bit multiplication

This commit is contained in:
Karol Stasiak 2019-09-04 21:17:06 +02:00
parent f18135ee7f
commit 3ce2249399
15 changed files with 365 additions and 36 deletions

View File

@ -2,6 +2,8 @@
## Current version
* Added full 16-bit multiplication.
* Added preliminary support for EasyFlash.
* Allowed defining custom output padding byte value.

View File

@ -92,6 +92,7 @@ TODO
`byte * byte` (zpreg)
`word * byte` (zpreg)
`byte * word` (zpreg)
`word * word` (zpreg)
* `/`, `%%`: unsigned division and unsigned modulo
`unsigned byte / unsigned byte` (zpreg)
@ -201,6 +202,7 @@ An expression of form `a[f()] += b` may call `f` an undefined number of times.
`mutable byte *= constant byte`
`mutable byte *= byte` (zpreg)
`mutable word *= unsigned byte` (zpreg)
`mutable word *= word` (zpreg)
* `*'=`: decimal multiplication in place
`mutable byte *'= constant byte`

View File

@ -83,7 +83,7 @@ __divmod_u16u8u16u8_skip:
? RET
}
inline asm word __mul_u16u8u16() {
noinline asm word __mul_u16u8u16() {
? LD HL,0
? LD B,8
__mul_u16u8u16_loop:
@ -107,3 +107,46 @@ __mul_u16u8u16_skip:
#endif
? RET
}
#if CPUFEATURE_Z80 || CPUFEATURE_GAMEBOY
noinline asm word __mul_u16u16u16() {
ld hl,0
ld a,16
__mul_u16u16u16_loop:
add hl,hl
rl e
rl d
jr nc,__mul_u16u16u16_skip
add hl,bc
__mul_u16u16u16_skip:
dec a
jr nz,__mul_u16u16u16_loop
ret
}
#else
noinline asm word __mul_u16u16u16() {
ld hl,0
call __mul_u16u16u16_q
call __mul_u16u16u16_q
call __mul_u16u16u16_q
jp __mul_u16u16u16_q
}
noinline asm word __mul_u16u16u16_q(word hl) {
call __mul_u16u16u16_s
call __mul_u16u16u16_s
call __mul_u16u16u16_s
jp __mul_u16u16u16_s
}
noinline asm word __mul_u16u16u16_s(word hl) {
add hl,hl
ld a,e
add a,e
ld e,a
ld a,d
adc a,d
ld d,a
ret nc
add hl,bc
ret
}
#endif

View File

@ -128,4 +128,156 @@ noinline asm word call(word ax) {
JMP ((__reg + 2))
}
noinline asm word __mul_u16u16u16() {
#if ZPREG_SIZE < 6
? LDA __reg+1
? PHA
? LDA __reg
? PHA
? TSX
#else
? LDA __reg
? STA __reg+4
? LDA __reg+1
? STA __reg+5
#endif
#if CPUFEATURE_65C02 && not(CPUFEATURE_65CE02)
? STZ __reg
? STZ __reg+1
#else
? LDA #0
? STA __reg
? STA __reg+1
#endif
? LDY #16
__mul_u16u16u32_loop:
? ASL __reg
? ROL __reg+1
? ROL __reg+2
? ROL __reg+3
? BCC __mul_u16u16u32_skip
#if ZPREG_SIZE < 6
? LDA $101,X
#else
? LDA __reg+4
#endif
? CLC
? ADC __reg
? STA __reg
#if ZPREG_SIZE < 6
? LDA $102,X
#else
? LDA __reg+5
#endif
? ADC __reg+1
? STA __reg+1
? BCC __mul_u16u16u32_skip
? INC __reg+2
__mul_u16u16u32_skip:
? DEY
? BNE __mul_u16u16u32_loop
#if ZPREG_SIZE < 6
#if OPTIMIZE_FOR_SPEED
? INX
? INX
? TXS
#else
? PLA
? PLA
#endif
#endif
? LDA __reg
? LDX __reg+1
? RTS
}
// divide (__reg[1]:__reg[0])/(__reg[3]:__reg[2])
// remainder in (__reg[2]:__reg[3]), quotient in (__reg[1]:__reg[0])
noinline asm word __divmod_u16u16u16u16() {
#if ZPREG_SIZE < 6
? LDA __reg+3
? PHA
? LDA __reg+2
? PHA
? TSX
#else
? LDA __reg+2
? STA __reg+4
? LDA __reg+3
? STA __reg+5
#endif
#if CPUFEATURE_65C02 && not(CPUFEATURE_65CE02)
? STZ __reg+2
? STZ __reg+3
#else
? LDA #0
? STA __reg+2
? STA __reg+3
#endif
#if ZPREG_SIZE < 6
? LDA #16
? PHA
#else
? LDX #16
#endif
__divmod_u16u16u16u16_loop:
? ASL __reg
? ROL __reg+1
? ROL __reg+2
? ROL __reg+3
? LDA __reg+2
sec
#if ZPREG_SIZE < 6
? SBC $102,X
#else
? SBC __reg+4
#endif
? TAY
? LDA __reg+3
#if ZPREG_SIZE < 6
? SBC $103,X
#else
? SBC __reg+5
#endif
? BCC __divmod_u16u16u16u16_skip
? STA __reg+3
? STY __reg+2
? INC __reg
__divmod_u16u16u16u16_skip:
#if ZPREG_SIZE < 6
? DEC $101,X
#else
? DEX
#endif
? BNE __divmod_u16u16u16u16_loop
#if ZPREG_SIZE < 6
#if OPTIMIZE_FOR_SIZE
? PLA
? PLA
? PLA
#else
? INX
? INX
? INX
? TXS
#endif
#endif
? RTS
}
asm word __div_u16u16u16u16() {
JSR __divmod_u16u16u16u16
? LDA __reg
? LDX __reg+1
? RTS
}
asm word __mod_u16u16u16u16() {
JSR __divmod_u16u16u16u16
? LDA __reg+2
? LDX __reg+3
? RTS
}
#endif

View File

@ -14,11 +14,14 @@ object ZeropageRegisterOptimizations {
val functionsThatUsePseudoregisterAsInput: Map[String, Set[Int]] = Map(
"call" -> Set(2, 3),
"__mul_u16u16u16" -> Set(0, 1, 2, 3),
"__mul_u8u8u8" -> Set(0, 1),
"__mod_u8u8u8u8" -> Set(0, 1),
"__div_u8u8u8u8" -> Set(0, 1),
"__mod_u16u8u16u8" -> Set(0, 1, 2),
"__div_u16u8u16u8" -> Set(0, 1, 2),
"__div_u16u16u16u16" -> Set(0, 1, 2, 3),
"__mod_u16u16u16u16" -> Set(0, 1, 2, 3),
"__mul_u16u8u16" -> Set(0, 1, 2),
"__adc_decimal" -> Set(2, 3),
"__sbc_decimal" -> Set(2, 3),

View File

@ -42,8 +42,9 @@ object CoarseFlowAnalyzer {
result
}
val preservesB: Set[String] = Set("__mul_u8u8u8")
val preservesC: Set[String] = if (z80) Set("__mul_u8u8u8") else Set()
// TODO: u16u16u16u16 division
val preservesB: Set[String] = Set("__mul_u8u8u8", "__mul_u16u16u16")
val preservesC: Set[String] = if (z80) Set("__mul_u8u8u8", "__mul_u16u16u16") else Set("__mul_u16u16u16")
val preservesD: Set[String] = Set()
val preservesE: Set[String] = Set("__divmod_u16u8u16u8")
val preservesH: Set[String] = Set("__mul_u8u8u8")

View File

@ -183,11 +183,12 @@ object ReverseFlowAnalyzer {
val cache = new FlowCache[ZLine, CpuImportance]("z80 reverse")
// TODO: u16u16u16u16 division
val readsA: Set[String] = Set("__mul_u8u8u8", "__mul_u16u8u16", "call")
val readsB: Set[String] = Set("")
val readsC: Set[String] = Set("")
val readsD: Set[String] = Set("__mul_u8u8u8","__mul_u16u8u16", "__divmod_u16u8u16u8", "call")
val readsE: Set[String] = Set("__mul_u16u8u16", "call")
val readsB: Set[String] = Set("__mul_u16u16u16")
val readsC: Set[String] = Set("__mul_u16u16u16")
val readsD: Set[String] = Set("__mul_u8u8u8","__mul_u16u8u16", "__divmod_u16u8u16u8", "call", "__mul_u16u16u16")
val readsE: Set[String] = Set("__mul_u16u8u16", "call", "__mul_u16u16u16")
val readsH: Set[String] = Set("__divmod_u16u8u16u8", "call")
val readsL: Set[String] = Set("__divmod_u16u8u16u8", "call")

View File

@ -50,32 +50,23 @@ class AbstractExpressionCompiler[T <: AbstractCode] {
val lSize = lType.size
val rType = getExpressionType(ctx, params(1))
val rSize = rType.size
if (lSize != 1 && lSize != 2) {
ctx.log.error("Long multiplication not supported", params.head.position)
}
if (rSize != 1 && rSize != 2) {
ctx.log.error("Long multiplication not supported", params.head.position)
}
if (inPlace) {
if (lSize != 1 && lSize != 2) {
ctx.log.error("Long multiplication not supported", params.head.position)
}
if (rSize != 1) {
ctx.log.error("Long multiplication not supported", params.head.position)
}
if (lSize == 2 && rType.isSigned) {
if (lSize == 2 && rSize == 1 && rType.isSigned) {
ctx.log.error("Signed multiplication not supported", params.head.position)
}
} else {
if (lSize > 2 || rSize > 2 || lSize + rSize > 3) {
ctx.log.error("Long multiplication not supported", params.head.position)
}
if (lSize == 2 && rType.isSigned) {
if (lSize == 2 && rSize == 1 && rType.isSigned) {
ctx.log.error("Signed multiplication not supported", params.head.position)
}
if (rSize == 2 && lType.isSigned) {
if (rSize == 2 && lSize == 1 && lType.isSigned) {
ctx.log.error("Signed multiplication not supported", params.head.position)
}
if (lSize + rSize > 2) {
if (params.size != 2) {
ctx.log.error("Cannot multiply more than 2 large numbers at once", params.headOption.flatMap(_.position))
return
}
}
}
}

View File

@ -574,13 +574,61 @@ object PseudoregisterBuiltIns {
private def isPowerOfTwoUpTo15(n: Long): Boolean = if (n <= 0 || n >= 0x8000) false else 0 == ((n-1) & n)
def compileWordWordMultiplication(ctx: CompilationContext, param1OrRegister: Option[Expression], param2: Expression): List[AssemblyLine] = {
if (ctx.options.zpRegisterSize < 4) {
ctx.log.error("Variable word-word multiplication requires the zeropage pseudoregister of size at least 4", param1OrRegister.flatMap(_.position))
return Nil
}
val w = ctx.env.get[Type]("word")
val reg = ctx.env.get[VariableInMemory]("__reg")
val load: List[AssemblyLine] = param1OrRegister match {
case Some(param1) =>
val code1 = MosExpressionCompiler.compile(ctx, param1, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
val code2 = MosExpressionCompiler.compile(ctx, param2, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
if (!usesRegLo(code2) && !usesRegHi(code2)) {
code1 ++ List(AssemblyLine.zeropage(STA, reg), AssemblyLine.zeropage(STX, reg, 1)) ++ code2 ++ List(AssemblyLine.zeropage(STA, reg, 2), AssemblyLine.zeropage(STX, reg, 3))
} else if (!usesReg2(code1) && !usesReg3(code1)) {
code2 ++ List(AssemblyLine.zeropage(STA, reg, 2), AssemblyLine.zeropage(STX, reg, 3)) ++ code1 ++ List(AssemblyLine.zeropage(STA, reg), AssemblyLine.zeropage(STX, reg, 1))
} else {
code2 ++ List(
AssemblyLine.implied(PHA),
AssemblyLine.implied(TXA),
AssemblyLine.implied(PHA)
) ++ code1 ++ List(
AssemblyLine.zeropage(STA, reg),
AssemblyLine.zeropage(STX, reg, 1),
AssemblyLine.implied(PLA),
AssemblyLine.zeropage(STA, reg, 3),
AssemblyLine.implied(PLA),
AssemblyLine.zeropage(STA, reg, 2)
)
}
case None =>
val code2 = MosExpressionCompiler.compile(ctx, param2, Some(w -> RegisterVariable(MosRegister.AX, w)), BranchSpec.None)
if (!usesRegLo(code2) && !usesRegHi(code2)) {
List(AssemblyLine.zeropage(STA, reg), AssemblyLine.zeropage(STX, reg, 1)) ++ code2 ++ List(AssemblyLine.zeropage(STA, reg, 2), AssemblyLine.zeropage(STX, reg, 3))
} else {
List(AssemblyLine.implied(PHA), AssemblyLine.implied(TXA), AssemblyLine.implied(PHA)) ++ code2 ++ List(
AssemblyLine.zeropage(STA, reg, 2),
AssemblyLine.zeropage(STX, reg, 3),
AssemblyLine.implied(PLA),
AssemblyLine.zeropage(STA, reg, 1),
AssemblyLine.implied(PLA),
AssemblyLine.zeropage(STA, reg)
)
}
}
load :+ AssemblyLine.absoluteOrLongAbsolute(JSR, ctx.env.get[FunctionInMemory]("__mul_u16u16u16"), ctx.options)
}
def compileWordMultiplication(ctx: CompilationContext, param1OrRegister: Option[Expression], param2: Expression, storeInRegLo: Boolean): List[AssemblyLine] = {
if (ctx.options.zpRegisterSize < 3) {
ctx.log.error("Variable word multiplication requires the zeropage pseudoregister of size at least 3", param1OrRegister.flatMap(_.position))
ctx.log.error("Variable word-byte multiplication requires the zeropage pseudoregister of size at least 3", param1OrRegister.flatMap(_.position))
return Nil
}
(param1OrRegister.fold(2)(e => AbstractExpressionCompiler.getExpressionType(ctx, e).size),
AbstractExpressionCompiler.getExpressionType(ctx, param2).size) match {
case (2, 2) => return compileWordWordMultiplication(ctx, param1OrRegister, param2)
case (1, 2) => return compileWordMultiplication(ctx, Some(param2), param1OrRegister.get, storeInRegLo)
case (2 | 1, 1) => // ok
case _ => ctx.log.fatal("Invalid code path", param2.position)
@ -717,6 +765,12 @@ object PseudoregisterBuiltIns {
case _ => false
}
def usesReg3(code: List[AssemblyLine]): Boolean = code.exists{
case AssemblyLine0(JSR | BSR | TCD | TDC, _, _) => true
case AssemblyLine0(_, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(3, _))) if th.name == "__reg" => true
case _ => false
}
def doesMemoryAccessOverlap(l1: List[AssemblyLine], l2: List[AssemblyLine]): Boolean = {
for{
a1 <- l1

View File

@ -897,7 +897,7 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
targetifyA(ctx, target, Z80Multiply.compile8BitMultiply(ctx, params), isSigned = false)
case 2 =>
//noinspection ZeroIndexToHead
targetifyHL(ctx, target, Z80Multiply.compile16And8BitMultiplyToHL(ctx, params(0), params(1)))
targetifyHL(ctx, target, Z80Multiply.compile16BitMultiplyToHL(ctx, params(0), params(1)))
}
case "|" =>
getArithmeticParamMaxSize(ctx, params) match {
@ -1073,7 +1073,7 @@ object Z80ExpressionCompiler extends AbstractExpressionCompiler[ZLine] {
case 1 =>
Z80Multiply.compile8BitInPlaceMultiply(ctx, l, r)
case 2 =>
Z80Multiply.compile16And8BitInPlaceMultiply(ctx, l, r)
Z80Multiply.compile16BitInPlaceMultiply(ctx, l, r)
}
case "/=" | "%%=" =>
assertSizesForDivision(ctx, params, inPlace = true)

View File

@ -29,6 +29,14 @@ object Z80Multiply {
ctx.env.get[ThingInMemory]("__mul_u16u8u16").toAddress))
}
/**
* Compiles HL = BC * DE
*/
private def multiplication16And16(ctx: CompilationContext): List[ZLine] = {
List(ZLine(ZOpcode.CALL, NoRegisters,
ctx.env.get[ThingInMemory]("__mul_u16u16u16").toAddress))
}
/**
* Calculate A = l * r
*/
@ -256,13 +264,35 @@ object Z80Multiply {
result.toList
}
def compile16x16BitMultiplyToHL(ctx: CompilationContext, l: Expression, r: Expression): List[ZLine] = {
(ctx.env.eval(l), ctx.env.eval(r)) match {
case (None, Some(c)) =>
Z80ExpressionCompiler.compileToDE(ctx, l) ++ List(ZLine.ldImm16(ZRegister.BC, c)) ++ multiplication16And16(ctx)
case (Some(c), None) =>
Z80ExpressionCompiler.compileToDE(ctx, r) ++ List(ZLine.ldImm16(ZRegister.BC, c)) ++ multiplication16And16(ctx)
case (Some(c), Some(d)) =>
List(ZLine.ldImm16(ZRegister.HL, CompoundConstant(MathOperator.Times, c, d).quickSimplify.subword(0)))
case _ =>
val ld = Z80ExpressionCompiler.compileToDE(ctx, l)
val rb = Z80ExpressionCompiler.compileToBC(ctx, r)
val loadRegisters = (ld.exists(Z80ExpressionCompiler.changesBC), rb.exists(Z80ExpressionCompiler.changesDE)) match {
case (true, true) => ld ++ Z80ExpressionCompiler.stashDEIfChanged(ctx, rb)
case (false, true) => rb ++ ld
case (true, false) => ld ++ rb
case (false, false) => ld ++ rb
}
loadRegisters ++ multiplication16And16(ctx)
}
}
/**
* Calculate HL = l * r
*/
def compile16And8BitMultiplyToHL(ctx: CompilationContext, l: Expression, r: Expression): List[ZLine] = {
def compile16BitMultiplyToHL(ctx: CompilationContext, l: Expression, r: Expression): List[ZLine] = {
(AbstractExpressionCompiler.getExpressionType(ctx, l).size,
AbstractExpressionCompiler.getExpressionType(ctx, r).size) match {
case (1, 2) => return compile16And8BitMultiplyToHL(ctx, r, l)
case (2, 2) => return compile16x16BitMultiplyToHL(ctx, l, r)
case (1, 2) => return compile16BitMultiplyToHL(ctx, r, l)
case (2 | 1, 1) => // ok
case _ => ctx.log.fatal("Invalid code path", l.position)
}
@ -280,8 +310,8 @@ object Z80Multiply {
/**
* Calculate l = l * r
*/
def compile16And8BitInPlaceMultiply(ctx: CompilationContext, l: LhsExpression, r: Expression): List[ZLine] = {
compile16And8BitMultiplyToHL(ctx, l, r) ++ Z80ExpressionCompiler.storeHL(ctx, l, signedSource = false)
def compile16BitInPlaceMultiply(ctx: CompilationContext, l: LhsExpression, r: Expression): List[ZLine] = {
compile16BitMultiplyToHL(ctx, l, r) ++ Z80ExpressionCompiler.storeHL(ctx, l, signedSource = false)
}
/**

View File

@ -13,8 +13,10 @@ object UnusedFunctions extends NodeOptimization {
private val operatorImplementations: List[(String, Int, String)] = List(
("*", 2, "__mul_u8u8u8"),
("*", 3, "__mul_u16u8u16"),
("*", 4, "__mul_u16u16u16"),
("*=", 2, "__mul_u8u8u8"),
("*=", 2, "__mul_u16u8u16"),
("*=", 4, "__mul_u16u16u16"),
("/=", 0, "__divmod_u16u8u16u8"),
("/", 0, "__divmod_u16u8u16u8"),
("%%=", 0, "__divmod_u16u8u16u8"),
@ -31,6 +33,7 @@ object UnusedFunctions extends NodeOptimization {
("/", 2, "__mod_u16u8u16u8"),
("/=", 2, "__div_u16u8u16u8"),
("/", 2, "__div_u16u8u16u8"),
// TODO: u16u16u16u16 division
("+'", 4, "__adc_decimal"),
("+'=", 4, "__adc_decimal"),
("-'", 4, "__sub_decimal"),

View File

@ -274,7 +274,11 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program
}
}
val objectsThatMayBeUnused = Set("__mul_u8u8u8", "__constant8", "identity$", "__mul_u16u8u16", "__divmod_u16u8u16u8", "__mod_u8u8u8u8", "__div_u8u8u8u8") ++
val objectsThatMayBeUnused = Set("__constant8", "identity$",
"__mul_u8u8u8", "__mul_u16u8u16", "__mul_u16u16u16",
"__mod_u8u8u8u8", "__div_u8u8u8u8",
"__divmod_u16u8u16u8", "__mod_u16u8u16u8", "__div_u16u8u16u8",
"__divmod_u16u16u16u16", "__mod_u16u16u16u16", "__div_u16u16u16u16") ++
compiledFunctions.keySet.filter(_.endsWith(".trampoline"))
val unusedRuntimeObjects = objectsThatMayBeUnused.filterNot(name =>{
compiledFunctions.exists{

View File

@ -1,6 +1,6 @@
package millfork.test
import millfork.Cpu
import millfork.test.emu.{EmuBenchmarkRun, EmuCmosBenchmarkRun, EmuCrossPlatformBenchmarkRun}
import millfork.test.emu.{EmuBenchmarkRun, EmuCmosBenchmarkRun, EmuCrossPlatformBenchmarkRun, EmuUnoptimizedCrossPlatformRun}
import org.scalatest.{AppendedClues, FunSuite, Matchers}
/**
@ -628,4 +628,40 @@ class WordMathSuite extends FunSuite with Matchers with AppendedClues {
m.readByte(0xc006) should equal(x % y) withClue s"= $x %% $y"
}
}
test("Word/word multiplication 1") {
multiplyCaseWW1(0, 0)
multiplyCaseWW1(0, 5)
multiplyCaseWW1(7, 0)
multiplyCaseWW1(2, 5)
multiplyCaseWW1(7, 2)
multiplyCaseWW1(100, 2)
multiplyCaseWW1(1000, 2)
multiplyCaseWW1(2, 1000)
multiplyCaseWW1(1522, 1000)
multiplyCaseWW1(54, 4)
multiplyCaseWW1(35000, 9)
multiplyCaseWW1(43, 35000)
multiplyCaseWW1(53459, 48233)
}
private def multiplyCaseWW1(x: Int, y: Int): Unit = {
EmuUnoptimizedCrossPlatformRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080)(
s"""
| import zp_reg
| word output0 @$$c000
| word output1 @$$c002
| word output2 @$$c004
| void main () {
| output0 = $x
| output0 *= word($y)
| output1 = id($x) * id($y)
| }
| noinline word id(word w) = w
""".
stripMargin){m =>
m.readWord(0xc000) should equal((x * y) & 0xffff) withClue s"= $x * $y (c000)"
m.readWord(0xc002) should equal((x * y) & 0xffff) withClue s"= $x * $y (c002)"
}
}
}

View File

@ -37,9 +37,16 @@ object EmuRun {
), None, 4, Map(), JobContext(TestErrorReporting.log, new LabelGenerator))
val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source)
TestErrorReporting.log.info(s"Parsing $filename")
MosParser("", preprocessedSource, "", options, features).toAst match {
val parser = MosParser("", preprocessedSource, "", options, features)
parser.toAst match {
case Success(x, _) => Some(x)
case _ => None
case f: Failure[_, _] =>
TestErrorReporting.log.error(f.toString)
TestErrorReporting.log.error(f.extra.toString)
TestErrorReporting.log.error(f.lastParser.toString)
TestErrorReporting.log.error("Syntax error", Some(parser.lastPosition))
TestErrorReporting.log.error("Parsing error")
None
}
}