1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-22 08:32:29 +00:00

Optimize some stdlib calls

This commit is contained in:
Karol Stasiak 2018-12-21 22:33:27 +01:00
parent 673727b973
commit e43fb39781
9 changed files with 107 additions and 7 deletions

View File

@ -148,6 +148,11 @@ See the [documentation about inlining](../abi/inlining.md). Computationally easy
It enables certain optimization similar to what inlining would enable, but without actual inlining. It enables certain optimization similar to what inlining would enable, but without actual inlining.
`.ini` equivalent: `ipo`. `.ini` equivalent: `ipo`.
* `-foptimize-stdlib`, `-fno-optimize-stdlib`
Whether should replace some standard library calls with constant parameters with more efficient variants.
Currently affects `putstrz` and `strzlen`, but may affect more functions in the future.
`.ini` equivalent: `optimize_stdlib`.
* `-Os`, `--size` Optimize for size, sacrificing some speed (experimental). * `-Os`, `--size` Optimize for size, sacrificing some speed (experimental).
* `-Of`, `--fast` Optimize for speed, even if it increases the size a bit (experimental). * `-Of`, `--fast` Optimize for speed, even if it increases the size a bit (experimental).

View File

@ -316,7 +316,9 @@ object CompilationFlag extends Enumeration {
UseIntelSyntaxForInput, UseIntelSyntaxForInput,
UseIntelSyntaxForOutput, UseIntelSyntaxForOutput,
// optimization options: // optimization options:
DangerousOptimizations, InlineFunctions, InterproceduralOptimization, OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed,
DangerousOptimizations, InlineFunctions, InterproceduralOptimization,
OptimizeStdlib,
// memory allocation options // memory allocation options
VariableOverlap, CompactReturnDispatchParams, LUnixRelocatableCode, VariableOverlap, CompactReturnDispatchParams, LUnixRelocatableCode,
// runtime check options // runtime check options
@ -353,6 +355,7 @@ object CompilationFlag extends Enumeration {
"output_intel_syntax" -> UseIntelSyntaxForOutput, "output_intel_syntax" -> UseIntelSyntaxForOutput,
"input_intel_syntax" -> UseIntelSyntaxForInput, "input_intel_syntax" -> UseIntelSyntaxForInput,
"ipo" -> InterproceduralOptimization, "ipo" -> InterproceduralOptimization,
"optimize_stdlib" -> OptimizeStdlib,
"inline" -> InlineFunctions, "inline" -> InlineFunctions,
"dangerous_optimizations" -> DangerousOptimizations, "dangerous_optimizations" -> DangerousOptimizations,
"decimal_mode" -> DecimalMode, "decimal_mode" -> DecimalMode,

View File

@ -449,6 +449,9 @@ object Main {
boolean("-fipo", "-fno-ipo").action { (c, v) => boolean("-fipo", "-fno-ipo").action { (c, v) =>
c.changeFlag(CompilationFlag.InterproceduralOptimization, v) c.changeFlag(CompilationFlag.InterproceduralOptimization, v)
}.description("Interprocedural optimization.") }.description("Interprocedural optimization.")
boolean("-foptimize-stdlib", "-fno-optimize-stdlib").action { (c, v) =>
c.changeFlag(CompilationFlag.OptimizeStdlib, v)
}.description("Optimize standard library calls.")
flag("-Os", "--size").action { c => flag("-Os", "--size").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, true). c.changeFlag(CompilationFlag.OptimizeForSize, true).
changeFlag(CompilationFlag.OptimizeForSpeed, false). changeFlag(CompilationFlag.OptimizeForSpeed, false).
@ -468,9 +471,9 @@ object Main {
flag("--dangerous-optimizations").action { c => flag("--dangerous-optimizations").action { c =>
c.changeFlag(CompilationFlag.DangerousOptimizations, true) c.changeFlag(CompilationFlag.DangerousOptimizations, true)
}.description("Use dangerous optimizations (experimental).").hidden() }.description("Use dangerous optimizations (experimental).").hidden()
boolean("-fdangerous-optimizations", "-fnodangerous-optimizations").action { (c, v) => boolean("-fdangerous-optimizations", "-fno-dangerous-optimizations").action { (c, v) =>
c.changeFlag(CompilationFlag.DangerousOptimizations, v) c.changeFlag(CompilationFlag.DangerousOptimizations, v)
}.description("Use dangerous optimizations (experimental).") }.description("Use dangerous optimizations (experimental). Implies -fipo.")
fluff("", "Warning options:", "") fluff("", "Warning options:", "")

View File

@ -1,6 +1,6 @@
package millfork.compiler package millfork.compiler
import millfork.CompilationFlag import millfork.{CompilationFlag, CpuFamily, node}
import millfork.env._ import millfork.env._
import millfork.node._ import millfork.node._
import AbstractExpressionCompiler.getExpressionType import AbstractExpressionCompiler.getExpressionType
@ -38,6 +38,7 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements
}) })
protected val nonreentrantVars: Set[String] = trackableVars -- reentrantVars protected val nonreentrantVars: Set[String] = trackableVars -- reentrantVars
protected val optimizeStdlib: Boolean = ctx.options.flag(CompilationFlag.OptimizeStdlib)
def apply(): List[ExecutableStatement] = { def apply(): List[ExecutableStatement] = {
optimizeStmts(statements, Map())._1 optimizeStmts(statements, Map())._1
@ -59,6 +60,33 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements
def optimizeStmt(stmt: ExecutableStatement, currentVarValues: VV): (ExecutableStatement, VV) = { def optimizeStmt(stmt: ExecutableStatement, currentVarValues: VV): (ExecutableStatement, VV) = {
var cv = currentVarValues var cv = currentVarValues
val pos = stmt.position val pos = stmt.position
// stdlib:
if (optimizeStdlib) {
stmt match {
case ExpressionStatement(FunctionCallExpression("putstrz", List(TextLiteralExpression(text)))) =>
text.lastOption match {
case Some(LiteralExpression(0, _)) =>
text.size match {
case 1 =>
ctx.log.debug("Removing putstrz with empty argument", stmt.position)
return EmptyStatement(Nil) -> currentVarValues
case 2 =>
ctx.log.debug("Replacing putstrz with putchar", stmt.position)
return ExpressionStatement(FunctionCallExpression("putchar", List(text.head))) -> currentVarValues
case 3 =>
if (ctx.options.platform.cpuFamily == CpuFamily.M6502) {
ctx.log.debug("Replacing putstrz with putchar", stmt.position)
return IfStatement(FunctionCallExpression("==", List(LiteralExpression(1, 1), LiteralExpression(1, 1))), List(
ExpressionStatement(FunctionCallExpression("putchar", List(text.head))),
ExpressionStatement(FunctionCallExpression("putchar", List(text(1))))
), Nil) -> currentVarValues
}
case _ =>
}
}
case _ =>
}
}
// generic warnings: // generic warnings:
stmt match { stmt match {
case ExpressionStatement(expr@FunctionCallExpression("strzlen" | "putstrz" | "strzcmp" | "strzcopy", params)) => case ExpressionStatement(expr@FunctionCallExpression("strzlen" | "putstrz" | "strzcmp" | "strzcopy", params)) =>
@ -190,6 +218,19 @@ abstract class AbstractStatementPreprocessor(ctx: CompilationContext, statements
def optimizeExpr(expr: Expression, currentVarValues: VV): Expression = { def optimizeExpr(expr: Expression, currentVarValues: VV): Expression = {
val pos = expr.position val pos = expr.position
// stdlib:
if (optimizeStdlib) {
expr match {
case FunctionCallExpression("strzlen", List(TextLiteralExpression(text))) =>
text.lastOption match {
case Some(LiteralExpression(0, _)) if text.size <= 256 =>
ctx.log.debug("Replacing strzlen with constant argument", expr.position)
return LiteralExpression(text.size - 1, 1)
case _ =>
}
case _ =>
}
}
// generic warnings: // generic warnings:
expr match { expr match {
case FunctionCallExpression("*" | "*=", params) => case FunctionCallExpression("*" | "*=", params) =>

View File

@ -27,6 +27,10 @@ object UnusedFunctions extends NodeOptimization {
("*'=", 4, "__adc_decimal"), ("*'=", 4, "__adc_decimal"),
) )
private val functionsThatShouldBeKeptConditionally: List[(String, String)] = List(
"putstrz" -> "putchar"
)
override def optimize(nodes: List[Node], options: CompilationOptions): List[Node] = { override def optimize(nodes: List[Node], options: CompilationOptions): List[Node] = {
val aliases = nodes.flatMap{ val aliases = nodes.flatMap{
case AliasDefinitionStatement(source, target, _) => Some(source -> target) case AliasDefinitionStatement(source, target, _) => Some(source -> target)
@ -37,7 +41,10 @@ object UnusedFunctions extends NodeOptimization {
case v: FunctionDeclarationStatement => if (v.address.isDefined && v.statements.isDefined || v.interrupt || v.name == "main" || panicRequired && v.name == "_panic") Nil else List(v.name) case v: FunctionDeclarationStatement => if (v.address.isDefined && v.statements.isDefined || v.interrupt || v.name == "main" || panicRequired && v.name == "_panic") Nil else List(v.name)
case _ => Nil case _ => Nil
}.toSet }.toSet
val allCalledFunctions = resolveAliases(aliases, getAllCalledFunctions(nodes).toSet) var allCalledFunctions = resolveAliases(aliases, getAllCalledFunctions(nodes).toSet)
for((original, replacement) <- functionsThatShouldBeKeptConditionally) {
if (allCalledFunctions(original)) allCalledFunctions += replacement
}
var unusedFunctions = allNormalFunctions -- allCalledFunctions var unusedFunctions = allNormalFunctions -- allCalledFunctions
val effectiveZpSize = options.platform.cpuFamily match { val effectiveZpSize = options.platform.cpuFamily match {
case CpuFamily.M6502 => options.zpRegisterSize case CpuFamily.M6502 => options.zpRegisterSize

View File

@ -0,0 +1,10 @@
noinline void putchar(byte b) { }
noinline void putstrz(pointer p) { putchar(0) }
byte strzlen(pointer str) {
byte index
index = 0
while str[index] != 0 {
index += 1
}
return index
}

View File

@ -55,4 +55,23 @@ class StatementOptimizationSuite extends FunSuite with Matchers {
m.readByte(0xc009) should equal(56) m.readByte(0xc009) should equal(56)
} }
} }
test("Stdlib optimization 1") {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Intel8080, Cpu.Sharp)(
"""
| import stdio
| byte output @$c000
| void main() {
| output = strzlen("test"z)
| putstrz(""z)
| putstrz("a"z)
| putstrz("bc"z)
| putstrz("def"z)
| }
""".stripMargin
) { m =>
m.readByte(0xc000) should equal(4)
}
}
} }

View File

@ -45,7 +45,9 @@ object EmuRun {
private lazy val cachedZpregO: Option[Program]= preload("include/zp_reg.mfk") private lazy val cachedZpregO: Option[Program]= preload("include/zp_reg.mfk")
private lazy val cachedBcdO: Option[Program] = preload("include/bcd_6502.mfk") private lazy val cachedBcdO: Option[Program] = preload("include/bcd_6502.mfk")
private lazy val cachedStdioO: Option[Program] = preload("src/test/resources/include/dummy_stdio.mfk")
def cachedZpreg: Program = synchronized { cachedZpregO.getOrElse(throw new IllegalStateException()) } def cachedZpreg: Program = synchronized { cachedZpregO.getOrElse(throw new IllegalStateException()) }
def cachedStdio: Program = synchronized { cachedStdioO.getOrElse(throw new IllegalStateException()) }
def cachedBcd: Program = synchronized { cachedBcdO.getOrElse(throw new IllegalStateException()) } def cachedBcd: Program = synchronized { cachedBcdO.getOrElse(throw new IllegalStateException()) }
} }
@ -133,6 +135,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization],
CompilationFlag.LenientTextEncoding -> true, CompilationFlag.LenientTextEncoding -> true,
CompilationFlag.EmitIllegals -> this.emitIllegals, CompilationFlag.EmitIllegals -> this.emitIllegals,
CompilationFlag.InlineFunctions -> this.inline, CompilationFlag.InlineFunctions -> this.inline,
CompilationFlag.OptimizeStdlib -> this.inline,
CompilationFlag.InterproceduralOptimization -> true, CompilationFlag.InterproceduralOptimization -> true,
CompilationFlag.CompactReturnDispatchParams -> true, CompilationFlag.CompactReturnDispatchParams -> true,
CompilationFlag.SoftwareStack -> softwareStack, CompilationFlag.SoftwareStack -> softwareStack,
@ -172,6 +175,8 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization],
var tmp = unoptimized var tmp = unoptimized
if(source.contains("import zp_reg")) if(source.contains("import zp_reg"))
tmp += EmuRun.cachedZpreg tmp += EmuRun.cachedZpreg
if(source.contains("import stdio"))
tmp += EmuRun.cachedStdio
if(!options.flag(CompilationFlag.DecimalMode) && (source.contains("+'") || source.contains("-'") || source.contains("<<'") || source.contains("*'"))) if(!options.flag(CompilationFlag.DecimalMode) && (source.contains("+'") || source.contains("-'") || source.contains("<<'") || source.contains("*'")))
tmp += EmuRun.cachedBcd tmp += EmuRun.cachedBcd
tmp tmp

View File

@ -51,9 +51,12 @@ object EmuZ80Run {
} }
} }
private lazy val cache: mutable.Map[millfork.Cpu.Value, Option[Program]] = mutable.Map[millfork.Cpu.Value, Option[Program]]() private lazy val cache: mutable.Map[(millfork.Cpu.Value, String), Option[Program]] = mutable.Map[(millfork.Cpu.Value, String), Option[Program]]()
private def get(cpu: millfork.Cpu.Value, path: String): Program =
synchronized { cache.getOrElseUpdate(cpu->path, preload(cpu, path)).getOrElse(throw new IllegalStateException()) }
def cachedMath(cpu: millfork.Cpu.Value): Program = synchronized { cache.getOrElseUpdate(cpu, preload(cpu, "include/i80_math.mfk")).getOrElse(throw new IllegalStateException()) } def cachedMath(cpu: millfork.Cpu.Value): Program = get(cpu, "include/i80_math.mfk")
def cachedStdio(cpu: millfork.Cpu.Value): Program = get(cpu, "src/test/resources/include/dummy_stdio.mfk")
} }
class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], assemblyOptimizations: List[AssemblyOptimization[ZLine]]) extends Matchers { class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], assemblyOptimizations: List[AssemblyOptimization[ZLine]]) extends Matchers {
@ -75,6 +78,7 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio
val platform = EmuPlatform.get(cpu) val platform = EmuPlatform.get(cpu)
val extraFlags = Map( val extraFlags = Map(
CompilationFlag.InlineFunctions -> this.inline, CompilationFlag.InlineFunctions -> this.inline,
CompilationFlag.OptimizeStdlib -> this.inline,
CompilationFlag.OptimizeForSize -> this.optimizeForSize, CompilationFlag.OptimizeForSize -> this.optimizeForSize,
CompilationFlag.EmitIllegals -> (cpu == millfork.Cpu.Z80), CompilationFlag.EmitIllegals -> (cpu == millfork.Cpu.Z80),
CompilationFlag.LenientTextEncoding -> true) CompilationFlag.LenientTextEncoding -> true)
@ -96,6 +100,9 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio
val withLibraries = { val withLibraries = {
var tmp = unoptimized var tmp = unoptimized
tmp += EmuZ80Run.cachedMath(cpu) tmp += EmuZ80Run.cachedMath(cpu)
if (source.contains("import stdio")) {
tmp += EmuZ80Run.cachedStdio(cpu)
}
tmp tmp
} }
val program = nodeOptimizations.foldLeft(withLibraries.applyImportantAliases)((p, opt) => p.applyNodeOptimization(opt, options)) val program = nodeOptimizations.foldLeft(withLibraries.applyImportantAliases)((p, opt) => p.applyNodeOptimization(opt, options))