diff --git a/docs/api/command-line.md b/docs/api/command-line.md index 24d8e952..b29cd2b8 100644 --- a/docs/api/command-line.md +++ b/docs/api/command-line.md @@ -148,6 +148,11 @@ See the [documentation about inlining](../abi/inlining.md). Computationally easy It enables certain optimization similar to what inlining would enable, but without actual inlining. `.ini` equivalent: `ipo`. +* `-foptimize-stdlib`, `-fno-optimize-stdlib` – +Whether should replace some standard library calls with constant parameters with more efficient variants. +Currently affects `putstrz` and `strzlen`, but may affect more functions in the future. +`.ini` equivalent: `optimize_stdlib`. + * `-Os`, `--size` – Optimize for size, sacrificing some speed (experimental). * `-Of`, `--fast` – Optimize for speed, even if it increases the size a bit (experimental). diff --git a/src/main/scala/millfork/CompilationOptions.scala b/src/main/scala/millfork/CompilationOptions.scala index b2ef2a92..52efbce4 100644 --- a/src/main/scala/millfork/CompilationOptions.scala +++ b/src/main/scala/millfork/CompilationOptions.scala @@ -316,7 +316,9 @@ object CompilationFlag extends Enumeration { UseIntelSyntaxForInput, UseIntelSyntaxForOutput, // optimization options: - DangerousOptimizations, InlineFunctions, InterproceduralOptimization, OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, + OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, + DangerousOptimizations, InlineFunctions, InterproceduralOptimization, + OptimizeStdlib, // memory allocation options VariableOverlap, CompactReturnDispatchParams, LUnixRelocatableCode, // runtime check options @@ -353,6 +355,7 @@ object CompilationFlag extends Enumeration { "output_intel_syntax" -> UseIntelSyntaxForOutput, "input_intel_syntax" -> UseIntelSyntaxForInput, "ipo" -> InterproceduralOptimization, + "optimize_stdlib" -> OptimizeStdlib, "inline" -> InlineFunctions, "dangerous_optimizations" -> DangerousOptimizations, "decimal_mode" -> DecimalMode, diff --git a/src/main/scala/millfork/Main.scala b/src/main/scala/millfork/Main.scala index 475e8234..bf02f165 100644 --- a/src/main/scala/millfork/Main.scala +++ b/src/main/scala/millfork/Main.scala @@ -449,6 +449,9 @@ object Main { boolean("-fipo", "-fno-ipo").action { (c, v) => c.changeFlag(CompilationFlag.InterproceduralOptimization, v) }.description("Interprocedural optimization.") + boolean("-foptimize-stdlib", "-fno-optimize-stdlib").action { (c, v) => + c.changeFlag(CompilationFlag.OptimizeStdlib, v) + }.description("Optimize standard library calls.") flag("-Os", "--size").action { c => c.changeFlag(CompilationFlag.OptimizeForSize, true). changeFlag(CompilationFlag.OptimizeForSpeed, false). @@ -468,9 +471,9 @@ object Main { flag("--dangerous-optimizations").action { c => c.changeFlag(CompilationFlag.DangerousOptimizations, true) }.description("Use dangerous optimizations (experimental).").hidden() - boolean("-fdangerous-optimizations", "-fnodangerous-optimizations").action { (c, v) => + boolean("-fdangerous-optimizations", "-fno-dangerous-optimizations").action { (c, v) => c.changeFlag(CompilationFlag.DangerousOptimizations, v) - }.description("Use dangerous optimizations (experimental).") + }.description("Use dangerous optimizations (experimental). Implies -fipo.") fluff("", "Warning options:", "") diff --git a/src/main/scala/millfork/compiler/AbstractStatementPreprocessor.scala b/src/main/scala/millfork/compiler/AbstractStatementPreprocessor.scala index 523454eb..57e0eee2 100644 --- a/src/main/scala/millfork/compiler/AbstractStatementPreprocessor.scala +++ b/src/main/scala/millfork/compiler/AbstractStatementPreprocessor.scala @@ -1,6 +1,6 @@ package millfork.compiler -import millfork.CompilationFlag +import millfork.{CompilationFlag, CpuFamily, node} import millfork.env._ import millfork.node._ import AbstractExpressionCompiler.getExpressionType @@ -38,6 +38,7 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements }) protected val nonreentrantVars: Set[String] = trackableVars -- reentrantVars + protected val optimizeStdlib: Boolean = ctx.options.flag(CompilationFlag.OptimizeStdlib) def apply(): List[ExecutableStatement] = { optimizeStmts(statements, Map())._1 @@ -59,6 +60,33 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements def optimizeStmt(stmt: ExecutableStatement, currentVarValues: VV): (ExecutableStatement, VV) = { var cv = currentVarValues val pos = stmt.position + // stdlib: + if (optimizeStdlib) { + stmt match { + case ExpressionStatement(FunctionCallExpression("putstrz", List(TextLiteralExpression(text)))) => + text.lastOption match { + case Some(LiteralExpression(0, _)) => + text.size match { + case 1 => + ctx.log.debug("Removing putstrz with empty argument", stmt.position) + return EmptyStatement(Nil) -> currentVarValues + case 2 => + ctx.log.debug("Replacing putstrz with putchar", stmt.position) + return ExpressionStatement(FunctionCallExpression("putchar", List(text.head))) -> currentVarValues + case 3 => + if (ctx.options.platform.cpuFamily == CpuFamily.M6502) { + ctx.log.debug("Replacing putstrz with putchar", stmt.position) + return IfStatement(FunctionCallExpression("==", List(LiteralExpression(1, 1), LiteralExpression(1, 1))), List( + ExpressionStatement(FunctionCallExpression("putchar", List(text.head))), + ExpressionStatement(FunctionCallExpression("putchar", List(text(1)))) + ), Nil) -> currentVarValues + } + case _ => + } + } + case _ => + } + } // generic warnings: stmt match { case ExpressionStatement(expr@FunctionCallExpression("strzlen" | "putstrz" | "strzcmp" | "strzcopy", params)) => @@ -190,6 +218,19 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements def optimizeExpr(expr: Expression, currentVarValues: VV): Expression = { val pos = expr.position + // stdlib: + if (optimizeStdlib) { + expr match { + case FunctionCallExpression("strzlen", List(TextLiteralExpression(text))) => + text.lastOption match { + case Some(LiteralExpression(0, _)) if text.size <= 256 => + ctx.log.debug("Replacing strzlen with constant argument", expr.position) + return LiteralExpression(text.size - 1, 1) + case _ => + } + case _ => + } + } // generic warnings: expr match { case FunctionCallExpression("*" | "*=", params) => diff --git a/src/main/scala/millfork/node/opt/UnusedFunctions.scala b/src/main/scala/millfork/node/opt/UnusedFunctions.scala index 65c2c7ad..87d46d4b 100644 --- a/src/main/scala/millfork/node/opt/UnusedFunctions.scala +++ b/src/main/scala/millfork/node/opt/UnusedFunctions.scala @@ -27,6 +27,10 @@ object UnusedFunctions extends NodeOptimization { ("*'=", 4, "__adc_decimal"), ) + private val functionsThatShouldBeKeptConditionally: List[(String, String)] = List( + "putstrz" -> "putchar" + ) + override def optimize(nodes: List[Node], options: CompilationOptions): List[Node] = { val aliases = nodes.flatMap{ case AliasDefinitionStatement(source, target, _) => Some(source -> target) @@ -37,7 +41,10 @@ object UnusedFunctions extends NodeOptimization { case v: FunctionDeclarationStatement => if (v.address.isDefined && v.statements.isDefined || v.interrupt || v.name == "main" || panicRequired && v.name == "_panic") Nil else List(v.name) case _ => Nil }.toSet - val allCalledFunctions = resolveAliases(aliases, getAllCalledFunctions(nodes).toSet) + var allCalledFunctions = resolveAliases(aliases, getAllCalledFunctions(nodes).toSet) + for((original, replacement) <- functionsThatShouldBeKeptConditionally) { + if (allCalledFunctions(original)) allCalledFunctions += replacement + } var unusedFunctions = allNormalFunctions -- allCalledFunctions val effectiveZpSize = options.platform.cpuFamily match { case CpuFamily.M6502 => options.zpRegisterSize diff --git a/src/test/resources/include/dummy_stdio.mfk b/src/test/resources/include/dummy_stdio.mfk new file mode 100644 index 00000000..d24cb465 --- /dev/null +++ b/src/test/resources/include/dummy_stdio.mfk @@ -0,0 +1,10 @@ +noinline void putchar(byte b) { } +noinline void putstrz(pointer p) { putchar(0) } +byte strzlen(pointer str) { + byte index + index = 0 + while str[index] != 0 { + index += 1 + } + return index +} diff --git a/src/test/scala/millfork/test/StatementOptimizationSuite.scala b/src/test/scala/millfork/test/StatementOptimizationSuite.scala index 0c11e6ec..97ad7bf3 100644 --- a/src/test/scala/millfork/test/StatementOptimizationSuite.scala +++ b/src/test/scala/millfork/test/StatementOptimizationSuite.scala @@ -55,4 +55,23 @@ class StatementOptimizationSuite extends FunSuite with Matchers { m.readByte(0xc009) should equal(56) } } + + + test("Stdlib optimization 1") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Intel8080, Cpu.Sharp)( + """ + | import stdio + | byte output @$c000 + | void main() { + | output = strzlen("test"z) + | putstrz(""z) + | putstrz("a"z) + | putstrz("bc"z) + | putstrz("def"z) + | } + """.stripMargin + ) { m => + m.readByte(0xc000) should equal(4) + } + } } diff --git a/src/test/scala/millfork/test/emu/EmuRun.scala b/src/test/scala/millfork/test/emu/EmuRun.scala index d89d8473..1c960c36 100644 --- a/src/test/scala/millfork/test/emu/EmuRun.scala +++ b/src/test/scala/millfork/test/emu/EmuRun.scala @@ -45,7 +45,9 @@ object EmuRun { private lazy val cachedZpregO: Option[Program]= preload("include/zp_reg.mfk") private lazy val cachedBcdO: Option[Program] = preload("include/bcd_6502.mfk") + private lazy val cachedStdioO: Option[Program] = preload("src/test/resources/include/dummy_stdio.mfk") def cachedZpreg: Program = synchronized { cachedZpregO.getOrElse(throw new IllegalStateException()) } + def cachedStdio: Program = synchronized { cachedStdioO.getOrElse(throw new IllegalStateException()) } def cachedBcd: Program = synchronized { cachedBcdO.getOrElse(throw new IllegalStateException()) } } @@ -133,6 +135,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], CompilationFlag.LenientTextEncoding -> true, CompilationFlag.EmitIllegals -> this.emitIllegals, CompilationFlag.InlineFunctions -> this.inline, + CompilationFlag.OptimizeStdlib -> this.inline, CompilationFlag.InterproceduralOptimization -> true, CompilationFlag.CompactReturnDispatchParams -> true, CompilationFlag.SoftwareStack -> softwareStack, @@ -172,6 +175,8 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], var tmp = unoptimized if(source.contains("import zp_reg")) tmp += EmuRun.cachedZpreg + if(source.contains("import stdio")) + tmp += EmuRun.cachedStdio if(!options.flag(CompilationFlag.DecimalMode) && (source.contains("+'") || source.contains("-'") || source.contains("<<'") || source.contains("*'"))) tmp += EmuRun.cachedBcd tmp diff --git a/src/test/scala/millfork/test/emu/EmuZ80Run.scala b/src/test/scala/millfork/test/emu/EmuZ80Run.scala index 91f590bb..50bcdc66 100644 --- a/src/test/scala/millfork/test/emu/EmuZ80Run.scala +++ b/src/test/scala/millfork/test/emu/EmuZ80Run.scala @@ -51,9 +51,12 @@ object EmuZ80Run { } } - private lazy val cache: mutable.Map[millfork.Cpu.Value, Option[Program]] = mutable.Map[millfork.Cpu.Value, Option[Program]]() + private lazy val cache: mutable.Map[(millfork.Cpu.Value, String), Option[Program]] = mutable.Map[(millfork.Cpu.Value, String), Option[Program]]() + private def get(cpu: millfork.Cpu.Value, path: String): Program = + synchronized { cache.getOrElseUpdate(cpu->path, preload(cpu, path)).getOrElse(throw new IllegalStateException()) } - def cachedMath(cpu: millfork.Cpu.Value): Program = synchronized { cache.getOrElseUpdate(cpu, preload(cpu, "include/i80_math.mfk")).getOrElse(throw new IllegalStateException()) } + def cachedMath(cpu: millfork.Cpu.Value): Program = get(cpu, "include/i80_math.mfk") + def cachedStdio(cpu: millfork.Cpu.Value): Program = get(cpu, "src/test/resources/include/dummy_stdio.mfk") } class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], assemblyOptimizations: List[AssemblyOptimization[ZLine]]) extends Matchers { @@ -75,6 +78,7 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio val platform = EmuPlatform.get(cpu) val extraFlags = Map( CompilationFlag.InlineFunctions -> this.inline, + CompilationFlag.OptimizeStdlib -> this.inline, CompilationFlag.OptimizeForSize -> this.optimizeForSize, CompilationFlag.EmitIllegals -> (cpu == millfork.Cpu.Z80), CompilationFlag.LenientTextEncoding -> true) @@ -96,6 +100,9 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio val withLibraries = { var tmp = unoptimized tmp += EmuZ80Run.cachedMath(cpu) + if (source.contains("import stdio")) { + tmp += EmuZ80Run.cachedStdio(cpu) + } tmp } val program = nodeOptimizations.foldLeft(withLibraries.applyImportantAliases)((p, opt) => p.applyNodeOptimization(opt, options))