From cbe6d03e603219f6038ec5ead1f482953a6e84a2 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Fri, 28 Sep 2018 23:45:26 +0200 Subject: [PATCH] 8080: Optimized multiplication --- include/i80_math.mfk | 51 ++++++++++++++++ include/stdlib_i80.mfk | 2 + .../z80/opt/AlwaysGoodI80Optimizations.scala | 18 ++++++ .../assembly/z80/opt/CoarseFlowAnalyzer.scala | 21 ++++++- .../z80/opt/ReverseFlowAnalyzer.scala | 59 +++++++++++-------- .../millfork/compiler/z80/Z80Multiply.scala | 53 ++--------------- .../millfork/node/opt/UnusedFunctions.scala | 8 ++- .../scala/millfork/test/emu/EmuZ80Run.scala | 48 +++++++++++++-- 8 files changed, 179 insertions(+), 81 deletions(-) create mode 100644 include/i80_math.mfk diff --git a/include/i80_math.mfk b/include/i80_math.mfk new file mode 100644 index 00000000..adcd65c2 --- /dev/null +++ b/include/i80_math.mfk @@ -0,0 +1,51 @@ + +#if not(ARCH_I80) +#warn i80_math module should be used only on 8080-like targets +#endif + +#pragma zilog_syntax + +#if CPUFEATURE_Z80 || CPUFEATURE_GAMEBOY + +inline asm byte __mul_u8u8u8() { + ? LD E,A + ? LD A, 0 + ? JR __mul_u8u8u8_start + __mul_u8u8u8_add: + ? ADD A,E + __mul_u8u8u8_loop: + ? SLA E + __mul_u8u8u8_start: + ? SRL D + ? JR C, __mul_u8u8u8_add + ? JR NZ, __mul_u8u8u8_loop + ? RET +} + +#else + +inline asm byte __mul_u8u8u8() { + ? LD E,A + ? LD C, 0 + ? JP __mul_u8u8u8_start + __mul_u8u8u8_add: + ? LD A,C + ? ADD A,E + ? LD C,A + __mul_u8u8u8_loop: + ? LD A,E + ? ADD A,A + ? LD E,A + __mul_u8u8u8_start: + ? OR A + ? LD A,D + ? RRA + ? LD D,A + ? JP C, __mul_u8u8u8_add + ? OR A + ? JP NZ, __mul_u8u8u8_loop + ? LD A,C + ? RET +} + +#endif diff --git a/include/stdlib_i80.mfk b/include/stdlib_i80.mfk index 6dadc1aa..c30b02b0 100644 --- a/include/stdlib_i80.mfk +++ b/include/stdlib_i80.mfk @@ -6,6 +6,8 @@ #pragma zilog_syntax +import i80_math + macro asm void poke(word const addr, byte a) { LD (addr), A } diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala index 1d34cd13..4b957e58 100644 --- a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala +++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodI80Optimizations.scala @@ -1030,10 +1030,28 @@ object AlwaysGoodI80Optimizations { ) + val PointlessArithmetic = new RuleBasedAssemblyOptimization("Pointless arithmetic", + needsFlowInfo = FlowInfoRequirement.BackwardFlow, + (Elidable & HasOpcodeIn(Set(ADD, ADC, SUB, SBC, OR, AND, XOR, CP)) & DoesntMatterWhatItDoesWithFlags & DoesntMatterWhatItDoesWith(ZRegister.A)) ~~> (_ => Nil) + ) + + val ConstantMultiplication = new RuleBasedAssemblyOptimization("Constant multiplication", + needsFlowInfo = FlowInfoRequirement.BothFlows, + (Elidable & HasOpcode(CALL) & RefersTo("__mul_u8u8u8", 0) + & MatchRegister(ZRegister.A, 4) + & MatchRegister(ZRegister.D, 5) + & DoesntMatterWhatItDoesWithFlags + & DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C)) ~~> { (code, ctx) => + val product = ctx.get[Int](4) * ctx.get[Int](5) + List(ZLine.ldImm8(ZRegister.A, product)) + }, + ) val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]]( BranchInPlaceRemoval, + ConstantMultiplication, FreeHL, + PointlessArithmetic, PointlessLoad, PointlessStackStashing, ReloadingKnownValueFromMemory, diff --git a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala index 76b7f9f4..a61d9e9e 100644 --- a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala @@ -2,7 +2,7 @@ package millfork.assembly.z80.opt import millfork.assembly.opt.{AnyStatus, SingleStatus, Status} import millfork.assembly.z80._ -import millfork.env.{Label, MemoryAddressConstant, NormalFunction, NumericConstant} +import millfork.env._ import millfork.node.ZRegister import millfork.{CompilationFlag, CompilationOptions, Cpu} @@ -19,6 +19,13 @@ object CoarseFlowAnalyzer { val codeArray = code.toArray val z80 = compilationOptions.flag(CompilationFlag.EmitZ80Opcodes) + val preservesB: Set[String] = Set("__mul_u8u8u8") + val preservesC: Set[String] = if (z80) Set("__mul_u8u8u8") else Set() + val preservesD: Set[String] = Set() + val preservesE: Set[String] = Set() + val preservesH: Set[String] = Set("__mul_u8u8u8") + val preservesL: Set[String] = Set("__mul_u8u8u8") + var changed = true while (changed) { changed = false @@ -38,6 +45,18 @@ object CoarseFlowAnalyzer { case _ => None }).fold(currentStatus)(_ ~ _) + case ZLine(CALL, _, MemoryAddressConstant(fun: FunctionInMemory), _) => + val n = fun.name + val result = initialStatus.copy(memIx = currentStatus.memIx) + currentStatus = result.copy( + b = if (preservesB(n)) currentStatus.b else result.b, + c = if (preservesC(n)) currentStatus.c else result.c, + d = if (preservesD(n)) currentStatus.d else result.d, + e = if (preservesE(n)) currentStatus.e else result.e, + h = if (preservesH(n)) currentStatus.h else result.h, + l = if (preservesL(n)) currentStatus.l else result.l + ) + case ZLine(CALL, _, _, _) => currentStatus = initialStatus.copy(memIx = currentStatus.memIx) case ZLine(BYTE, _, _, _) => diff --git a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala index 53f67d1e..a7d75b9d 100644 --- a/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/ReverseFlowAnalyzer.scala @@ -175,6 +175,14 @@ case class CpuImportance(a: Importance = UnknownImportance, object ReverseFlowAnalyzer { + val readsA = Set("__mul_u8u8u8") + val readsB = Set("") + val readsC = Set("") + val readsD = Set("__mul_u8u8u8") + val readsE = Set("") + val readsH = Set("") + val readsL = Set("") + //noinspection RedundantNewCaseClass def analyze(f: NormalFunction, code: List[ZLine]): List[CpuImportance] = { val importanceArray = Array.fill[CpuImportance](code.length)(new CpuImportance()) @@ -336,16 +344,17 @@ object ReverseFlowAnalyzer { case ZLine(PUSH, OneRegister(r), _, _) => currentImportance = currentImportance.butReadsRegister(r) case ZLine(CALL | JP, NoRegisters, MemoryAddressConstant(fun: FunctionInMemory), _) => + val n = fun.name fun.params match { case NormalParamSignature(List(v)) if v.typ.size == 1 => currentImportance = currentImportance.copy( a = Important, - b = Unimportant, - c = Unimportant, - d = Unimportant, - e = Unimportant, - h = Unimportant, - l = Unimportant, + b = if (readsB(n)) Important else Unimportant, + c = if (readsC(n)) Important else Unimportant, + d = if (readsD(n)) Important else Unimportant, + e = if (readsE(n)) Important else Unimportant, + h = if (readsH(n)) Important else Unimportant, + l = if (readsL(n)) Important else Unimportant, hlNumeric = Unimportant, iyh = Unimportant, iyl = Unimportant, @@ -357,11 +366,11 @@ object ReverseFlowAnalyzer { ) case NormalParamSignature(List(v)) if v.typ.size == 2 => currentImportance = currentImportance.copy( - a = Unimportant, - b = Unimportant, - c = Unimportant, - d = Unimportant, - e = Unimportant, + a = if (readsA(n)) Important else Unimportant, + b = if (readsB(n)) Important else Unimportant, + c = if (readsC(n)) Important else Unimportant, + d = if (readsD(n)) Important else Unimportant, + e = if (readsE(n)) Important else Unimportant, h = Important, l = Important, hlNumeric = Unimportant, @@ -375,10 +384,10 @@ object ReverseFlowAnalyzer { ) case NormalParamSignature(List(v)) if v.typ.size == 3 => currentImportance = currentImportance.copy( - a = Unimportant, - b = Unimportant, - c = Unimportant, - d = Unimportant, + a = if (readsA(n)) Important else Unimportant, + b = if (readsB(n)) Important else Unimportant, + c = if (readsC(n)) Important else Unimportant, + d = if (readsD(n)) Important else Unimportant, e = Important, h = Important, l = Important, @@ -393,9 +402,9 @@ object ReverseFlowAnalyzer { ) case NormalParamSignature(List(v)) if v.typ.size == 4 => currentImportance = currentImportance.copy( - a = Unimportant, - b = Unimportant, - c = Unimportant, + a = if (readsA(n)) Important else Unimportant, + b = if (readsB(n)) Important else Unimportant, + c = if (readsC(n)) Important else Unimportant, d = Important, e = Important, h = Important, @@ -411,13 +420,13 @@ object ReverseFlowAnalyzer { ) case NormalParamSignature(_) | AssemblyParamSignature(Nil) => currentImportance = currentImportance.copy( - a = Unimportant, - b = Unimportant, - c = Unimportant, - d = Unimportant, - e = Unimportant, - h = Unimportant, - l = Unimportant, + a = if (readsA(n)) Important else Unimportant, + b = if (readsB(n)) Important else Unimportant, + c = if (readsC(n)) Important else Unimportant, + d = if (readsD(n)) Important else Unimportant, + e = if (readsE(n)) Important else Unimportant, + h = if (readsH(n)) Important else Unimportant, + l = if (readsL(n)) Important else Unimportant, hlNumeric = Unimportant, iyh = Unimportant, iyl = Unimportant, diff --git a/src/main/scala/millfork/compiler/z80/Z80Multiply.scala b/src/main/scala/millfork/compiler/z80/Z80Multiply.scala index 98594c7a..c5109935 100644 --- a/src/main/scala/millfork/compiler/z80/Z80Multiply.scala +++ b/src/main/scala/millfork/compiler/z80/Z80Multiply.scala @@ -1,9 +1,8 @@ package millfork.compiler.z80 -import millfork.CompilationFlag import millfork.assembly.z80._ -import millfork.compiler.{BranchSpec, CompilationContext} -import millfork.env.{CompoundConstant, Constant, MathOperator, NumericConstant} +import millfork.compiler.CompilationContext +import millfork.env._ import millfork.node.{ConstantArrayElementExpression, Expression, LhsExpression, ZRegister} /** @@ -15,52 +14,8 @@ object Z80Multiply { * Compiles A = A * D */ private def multiplication(ctx: CompilationContext): List[ZLine] = { - import millfork.assembly.z80.ZOpcode._ - import ZRegister._ - import ZLine._ - if(ctx.options.flag(CompilationFlag.EmitExtended80Opcodes)) { - val lblAdd = ctx.nextLabel("mu") - val lblLoop = ctx.nextLabel("mu") - val lblStart = ctx.nextLabel("mu") - List( - ld8(E, A), - ldImm8(A, 0), - jumpR(ctx, lblStart), - label(lblAdd), - register(ADD, E), - label(lblLoop), - register(SLA, E), - label(lblStart), - register(SRL, D), - jumpR(ctx, lblAdd, IfFlagSet(ZFlag.C)), - jumpR(ctx, lblLoop, IfFlagClear(ZFlag.Z))) - } else { - // TODO: optimize - val lblAdd = ctx.nextLabel("mu") - val lblLoop = ctx.nextLabel("mu") - val lblStart = ctx.nextLabel("mu") - List( - ld8(E, A), - ldImm8(C, 0), - jumpR(ctx, lblStart), - label(lblAdd), - ld8(A, C), - register(ADD, E), - ld8(C, A), - label(lblLoop), - ld8(A, E), - register(ADD, A), - ld8(E, A), - label(lblStart), - register(OR, A), - ld8(A, D), - implied(RRA), - ld8(D, A), - jumpR(ctx, lblAdd, IfFlagSet(ZFlag.C)), - register(OR, A), - jumpR(ctx, lblLoop, IfFlagClear(ZFlag.Z)), - ld8(A, C)) - } + List(ZLine(ZOpcode.CALL, NoRegisters, + ctx.env.get[ThingInMemory]("__mul_u8u8u8").toAddress)) } /** diff --git a/src/main/scala/millfork/node/opt/UnusedFunctions.scala b/src/main/scala/millfork/node/opt/UnusedFunctions.scala index 36198633..06a61927 100644 --- a/src/main/scala/millfork/node/opt/UnusedFunctions.scala +++ b/src/main/scala/millfork/node/opt/UnusedFunctions.scala @@ -1,6 +1,6 @@ package millfork.node.opt -import millfork.{CompilationFlag, CompilationOptions} +import millfork.{CompilationFlag, CompilationOptions, CpuFamily} import millfork.env._ import millfork.error.ConsoleLogger import millfork.node._ @@ -33,8 +33,12 @@ object UnusedFunctions extends NodeOptimization { }.toSet val allCalledFunctions = getAllCalledFunctions(nodes).toSet var unusedFunctions = allNormalFunctions -- allCalledFunctions + val effectiveZpSize = options.platform.cpuFamily match { + case CpuFamily.M6502 => options.zpRegisterSize + case _ => 999999 + } for((op, zp, fun) <- operatorImplementations) { - if (allCalledFunctions.contains(op) && options.zpRegisterSize >= zp) { + if (allCalledFunctions.contains(op) && effectiveZpSize >= zp) { unusedFunctions -= fun } } diff --git a/src/test/scala/millfork/test/emu/EmuZ80Run.scala b/src/test/scala/millfork/test/emu/EmuZ80Run.scala index be9648f9..27c02059 100644 --- a/src/test/scala/millfork/test/emu/EmuZ80Run.scala +++ b/src/test/scala/millfork/test/emu/EmuZ80Run.scala @@ -1,5 +1,8 @@ package millfork.test.emu +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} + import com.codingrodent.microprocessor.Z80.{CPUConstants, Z80Core} import eu.rekawek.coffeegb.AddressSpace import eu.rekawek.coffeegb.cpu.{Cpu, InterruptManager, SpeedMode} @@ -10,17 +13,49 @@ import millfork.assembly.z80.ZLine import millfork.compiler.{CompilationContext, LabelGenerator} import millfork.env.{Environment, InitializedArray, InitializedMemoryVariable, NormalFunction} import millfork.error.ConsoleLogger -import millfork.node.StandardCallGraph +import millfork.node.{Program, StandardCallGraph} import millfork.node.opt.NodeOptimization import millfork.output.{MemoryBank, Z80Assembler} -import millfork.parser.{PreprocessingResult, Preprocessor, Z80Parser} -import millfork.{CompilationFlag, CompilationOptions, CpuFamily, JobContext} +import millfork.parser.{MosParser, PreprocessingResult, Preprocessor, Z80Parser} +import millfork._ import millfork.compiler.z80.Z80Compiler import org.scalatest.Matchers +import scala.collection.JavaConverters._ +import scala.collection.mutable + /** * @author Karol Stasiak */ +object EmuZ80Run { + + private def preload(cpu: millfork.Cpu.Value, filename: String): Option[Program] = { + TestErrorReporting.log.info(s"Loading $filename for $cpu") + val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") + val options = CompilationOptions(EmuPlatform.get(cpu), Map( + CompilationFlag.LenientTextEncoding -> true + ), None, 0, Map(), JobContext(TestErrorReporting.log, new LabelGenerator)) + val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source) + TestErrorReporting.log.debug(s"Features: $features") + TestErrorReporting.log.info(s"Parsing $filename") + val parser = Z80Parser(filename, preprocessedSource, "", options, features, useIntelSyntax = false) + parser.toAst match { + case Success(x, _) => Some(x) + case f: Failure[_, _] => + TestErrorReporting.log.error(f.toString) + TestErrorReporting.log.error(f.extra.toString) + TestErrorReporting.log.error(f.lastParser.toString) + TestErrorReporting.log.error("Syntax error", Some(parser.lastPosition)) + TestErrorReporting.log.error("Parsing error") + ??? + } + } + + private lazy val cache: mutable.Map[millfork.Cpu.Value, Option[Program]] = mutable.Map[millfork.Cpu.Value, Option[Program]]() + + def cachedMath(cpu: millfork.Cpu.Value): Program = synchronized { cache.getOrElseUpdate(cpu, preload(cpu, "include/i80_math.mfk")).getOrElse(throw new IllegalStateException()) } +} + class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], assemblyOptimizations: List[AssemblyOptimization[ZLine]]) extends Matchers { def inline: Boolean = false @@ -58,7 +93,12 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio // prepare - val program = nodeOptimizations.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options)) + val withLibraries = { + var tmp = unoptimized + tmp += EmuZ80Run.cachedMath(cpu) + tmp + } + val program = nodeOptimizations.foldLeft(withLibraries)((p, opt) => p.applyNodeOptimization(opt, options)) val callGraph = new StandardCallGraph(program, log) val env = new Environment(None, "", CpuFamily.I80, options.jobContext) env.collectDeclarations(program, options)