1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-10 20:29:35 +00:00

Optimization refactoring:

– -Of should imply -finline
– certain optimizations should be controllable
– unused global symbols should be removed even if we're not optimizing
This commit is contained in:
Karol Stasiak 2018-12-27 01:18:29 +01:00
parent b77f9dd5f8
commit f12463aef0
13 changed files with 162 additions and 58 deletions

View File

@ -139,35 +139,68 @@ Use a software stack for stack variables.
## Optimization options
* `-O0` Disable all optimizations.
* `-O0` Disable all optimizations except unused global symbol removal.
* `-O`, `-O2` ... `-O8` Optimize code, various levels. For most code, anything above `-O4` doesn't improve it anymore.
* `-O9` Optimize code using superoptimizer (experimental). Computationally expensive, decent results.
* `-O9` Optimize code using superoptimizer (experimental). Computationally very expensive, decent results.
* `-finline`, `-fno-inline` Whether should inline functions automatically.
See the [documentation about inlining](../abi/inlining.md). Computationally easy, can give decent gains.
`.ini` equivalent: `inline`.
Default: no
* `-fipo`, `-fno-ipo` Whether should perform interprocedural optimization.
It enables certain optimization similar to what inlining would enable, but without actual inlining.
`.ini` equivalent: `ipo`.
Default: no.
* `-fregister-variables`, `-fno-register-variables` Whether should allow moving local variables into CPU registers.
Default: yes.
* `-foptimize-stdlib`, `-fno-optimize-stdlib`
Whether should replace some standard library calls with constant parameters with more efficient variants.
Currently affects `putstrz` and `strzlen`, but may affect more functions in the future.
`.ini` equivalent: `optimize_stdlib`.
Default: no.
* `-ffunction-fallthrough`, `-fno-function-fallthrough`
Whether should replace a tail call by simply putting one function after another.
`.ini` equivalent: `function_fallthrough`.
Default: yes.
* `-ffunction-deduplication`, `-fno-function-deduplication`
Whether identical functions should be merged into one function.
`.ini` equivalent: `function_deduplication`.
Default: yes.
* `-fsubroutine-extraction`, `-fno-subroutine-extraction`
Whether identical fragments of functions should be extracted into subroutines.
`.ini` equivalent: `subroutine_extraction`.
Default: no.
* `-Os`, `--size` Optimize for size, sacrificing some speed (experimental).
Also enables `-fcode-deduplication`.
* `-Of`, `--fast` Optimize for speed, even if it increases the size a bit (experimental).
Also enables `-finline`.
* `-Ob`, `--blast-processing` Optimize for speed, even if it increases the size a lot (experimental).
Enables `-finline` automatically.
Also enables `-finline`.
* `-Og`, `--optimize-debugging` Disables optimizations that make debugging harder.
Sets
`-fno-optimize-stdlib`,
`-fno-variable-overlap`,
`-fno-register-variables`,
`-fno-function-fallthrough`,
`-fno-function-deduplication`,
`-fno-subroutine-extraction`
automatically.
* `-fdangerous-optimizations` Use dangerous optimizations (experimental).
Dangerous optimizations are more likely to result in broken code.
Enables `-fipo` automatically.
Also enables `-fipo` and `-foptimize-stdlib` automatically.
`.ini` equivalent: `dangerous_optimizations`.
Note: for compatibility with versions 0.3.0 and earlier,

View File

@ -58,7 +58,7 @@ Default: the same as `encoding`.
* `prevent_jmp_indirect_bug` whether the compiler should try to avoid the indirect JMP bug,
default is `false` on 65C02-compatible or non-6502 processors and `true` elsewhere
* `compact_dispatch_params` whether parameter values in return dispatch statements may overlap other objects, default is `true`
* `compact_dispatch_params` whether parameter values in return dispatch statements may overlap other objects, default is `true`.
This may cause problems if the parameter table is stored next to a hardware register that has side effects when reading.
* `lunix` generate relocatable code for LUnix/LNG, default is `false`
@ -71,6 +71,12 @@ Default: the same as `encoding`.
* `ipo` - enable interprocedural optimization, default is `false`.
* `function_fallthrough` whether should replace a tail call by simply putting one function after another, default is `true`.
* `function_deduplication` whether identical functions should be merged into one function, default is `true`.
* `subroutine_extraction` whether identical fragments of functions should be extracted into subroutines, default is `false`.
* `lenient_encoding` - allow for automatic substitution of invalid characters in string literals using the default encodings, default is `false`.
* `use_shadow_registers_for_irq` use Z80 shadow registers in interrupt routines, default is `true` for Z80 and `false` otherwise

View File

@ -218,13 +218,13 @@ object Cpu extends Enumeration {
import CompilationFlag._
private val mosAlwaysDefaultFlags = Set(
VariableOverlap, CompactReturnDispatchParams
private val alwaysDefaultFlags = Set(
VariableOverlap, CompactReturnDispatchParams, FunctionFallthrough, RegisterVariables, FunctionDeduplication
)
private val i80AlwaysDefaultFlags = Set(
VariableOverlap, CompactReturnDispatchParams
)
private val mosAlwaysDefaultFlags = alwaysDefaultFlags
private val i80AlwaysDefaultFlags = alwaysDefaultFlags
def defaultFlags(x: Cpu.Value): Set[CompilationFlag.Value] = x match {
case StrictMos =>
@ -316,8 +316,9 @@ object CompilationFlag extends Enumeration {
UseIntelSyntaxForInput,
UseIntelSyntaxForOutput,
// optimization options:
OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed,
OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, OptimizeForDebugging,
DangerousOptimizations, InlineFunctions, InterproceduralOptimization,
FunctionFallthrough, RegisterVariables, FunctionDeduplication, SubroutineExtraction,
OptimizeStdlib,
// memory allocation options
VariableOverlap, CompactReturnDispatchParams, LUnixRelocatableCode,
@ -356,6 +357,9 @@ object CompilationFlag extends Enumeration {
"input_intel_syntax" -> UseIntelSyntaxForInput,
"ipo" -> InterproceduralOptimization,
"optimize_stdlib" -> OptimizeStdlib,
"function_fallthrough" -> FunctionFallthrough,
"function_deduplication" -> FunctionDeduplication,
"subroutine_extraction" -> SubroutineExtraction,
"inline" -> InlineFunctions,
"dangerous_optimizations" -> DangerousOptimizations,
"decimal_mode" -> DecimalMode,

View File

@ -0,0 +1,55 @@
package millfork
import millfork.error.Logger
/**
* @author Karol Stasiak
*/
case class Context(errorReporting: Logger,
inputFileNames: List[String],
outputFileName: Option[String] = None,
runFileName: Option[String] = None,
optimizationLevel: Option[Int] = None,
zpRegisterSize: Option[Int] = None,
platform: Option[String] = None,
outputAssembly: Boolean = false,
outputLabels: Boolean = false,
includePath: List[String] = Nil,
flags: Map[CompilationFlag.Value, Boolean] = Map(),
features: Map[String, Long] = Map(),
verbosity: Option[Int] = None) {
def changeFlag(f: CompilationFlag.Value, b: Boolean): Context = {
if (flags.contains(f)) {
if (flags(f) != b) {
errorReporting.error("Conflicting flags")
}
this
} else {
copy(flags = this.flags + (f -> b))
}
}
def filloutFlags(): Context = {
var addons = Map[CompilationFlag.Value, Boolean]()
if (flags.contains(CompilationFlag.OptimizeForSpeed)) {
addons += CompilationFlag.InlineFunctions -> true
}
if (flags.contains(CompilationFlag.OptimizeForSize)) {
addons += CompilationFlag.SubroutineExtraction -> true
}
if (flags.contains(CompilationFlag.DangerousOptimizations)) {
addons += CompilationFlag.InterproceduralOptimization -> true
addons += CompilationFlag.OptimizeStdlib -> true
}
if (flags.contains(CompilationFlag.OptimizeForDebugging)) {
addons += CompilationFlag.VariableOverlap -> false
addons += CompilationFlag.RegisterVariables -> false
addons += CompilationFlag.FunctionDeduplication -> false
addons += CompilationFlag.SubroutineExtraction -> false
addons += CompilationFlag.FunctionFallthrough -> false
addons += CompilationFlag.OptimizeStdlib -> false
}
copy(flags = flags ++ addons.filterKeys(k => !flags.contains(k)))
}
}

View File

@ -18,34 +18,7 @@ import millfork.node.StandardCallGraph
import millfork.output._
import millfork.parser.{MosSourceLoadingQueue, ZSourceLoadingQueue}
/**
* @author Karol Stasiak
*/
case class Context(errorReporting: Logger,
inputFileNames: List[String],
outputFileName: Option[String] = None,
runFileName: Option[String] = None,
optimizationLevel: Option[Int] = None,
zpRegisterSize: Option[Int] = None,
platform: Option[String] = None,
outputAssembly: Boolean = false,
outputLabels: Boolean = false,
includePath: List[String] = Nil,
flags: Map[CompilationFlag.Value, Boolean] = Map(),
features: Map[String, Long] = Map(),
verbosity: Option[Int] = None) {
def changeFlag(f: CompilationFlag.Value, b: Boolean): Context = {
if (flags.contains(f)) {
if (flags(f) != b) {
errorReporting.error("Conflicting flags")
}
this
} else {
copy(flags = this.flags + (f -> b))
}
}
}
object Main {
@ -77,7 +50,7 @@ object Main {
errorReporting.trace("This program comes with ABSOLUTELY NO WARRANTY.")
errorReporting.trace("This is free software, and you are welcome to redistribute it under certain conditions")
errorReporting.trace("You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/")
val c = fixMissingIncludePath(c0)
val c = fixMissingIncludePath(c0).filloutFlags()
if (c.includePath.isEmpty) {
errorReporting.warn("Failed to detect the default include directory, consider using the -I option")
}
@ -213,7 +186,7 @@ object Main {
val program = if (optLevel > 0) {
OptimizationPresets.NodeOpt.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options))
} else {
unoptimized
OptimizationPresets.NodeOpt0.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options))
}
val callGraph = new StandardCallGraph(program, options.log)
@ -265,7 +238,7 @@ object Main {
val program = if (optLevel > 0) {
OptimizationPresets.NodeOpt.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options))
} else {
unoptimized
OptimizationPresets.NodeOpt0.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options))
}
val callGraph = new StandardCallGraph(program, options.log)
@ -430,10 +403,10 @@ object Main {
}.description("Whether hardware decimal mode should be used (6502 only).")
boolean("-fvariable-overlap", "-fno-variable-overlap").action { (c, v) =>
c.changeFlag(CompilationFlag.VariableOverlap, v)
}.description("Whether variables should overlap if their scopes do not intersect.")
}.description("Whether variables should overlap if their scopes do not intersect. Enabled by default.")
boolean("-fcompact-dispatch-params", "-fno-compact-dispatch-params").action { (c, v) =>
c.changeFlag(CompilationFlag.CompactReturnDispatchParams, v)
}.description("Whether parameter values in return dispatch statements may overlap other objects.")
}.description("Whether parameter values in return dispatch statements may overlap other objects. Enabled by default.")
boolean("-fbounds-checking", "-fno-bounds-checking").action { (c, v) =>
c.changeFlag(CompilationFlag.VariableOverlap, v)
}.description("Whether should insert bounds checking on array access.")
@ -506,28 +479,42 @@ object Main {
boolean("-foptimize-stdlib", "-fno-optimize-stdlib").action { (c, v) =>
c.changeFlag(CompilationFlag.OptimizeStdlib, v)
}.description("Optimize standard library calls.")
boolean("-fsubroutine-extraction", "-fno-subroutine-extraction").action { (c, v) =>
c.changeFlag(CompilationFlag.SubroutineExtraction, v)
}.description("Extract identical code fragments into subroutines.")
boolean("-ffunction-fallthrough", "-fno-function-fallthrough").action { (c, v) =>
c.changeFlag(CompilationFlag.FunctionFallthrough, v)
}.description("Replace tail calls by simply putting one function after another. Enabled by default.")
boolean("-ffunction-deduplication", "-fno-function-deduplication").action { (c, v) =>
c.changeFlag(CompilationFlag.FunctionDeduplication, v)
}.description("Merge identical functions into one function. Enabled by default.")
boolean("-fregister-variables", "-fno-register-variables").action { (c, v) =>
c.changeFlag(CompilationFlag.RegisterVariables, v)
}.description("Allow moving local variables into CPU registers. Enabled by default.")
flag("-Os", "--size").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, true).
changeFlag(CompilationFlag.OptimizeForSpeed, false).
changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
}.description("Prefer smaller code even if it is slightly slower (experimental).")
}.description("Prefer smaller code even if it is slightly slower (experimental). Implies -fsubroutine-extraction.")
flag("-Of", "--fast").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, false).
changeFlag(CompilationFlag.OptimizeForSpeed, true).
changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
}.description("Prefer faster code even if it is slightly bigger (experimental).")
}.description("Prefer faster code even if it is slightly bigger (experimental). Implies -finline.")
flag("-Ob", "--blast-processing").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, false).
changeFlag(CompilationFlag.OptimizeForSpeed, true).
changeFlag(CompilationFlag.OptimizeForSonicSpeed, true).
changeFlag(CompilationFlag.InlineFunctions, true)
changeFlag(CompilationFlag.OptimizeForSonicSpeed, true)
}.description("Prefer faster code even if it is much bigger (experimental). Implies -finline.")
flag("--dangerous-optimizations").action { c =>
c.changeFlag(CompilationFlag.DangerousOptimizations, true)
}.description("Use dangerous optimizations (experimental).").hidden()
boolean("-fdangerous-optimizations", "-fno-dangerous-optimizations").action { (c, v) =>
c.changeFlag(CompilationFlag.DangerousOptimizations, v)
}.description("Use dangerous optimizations (experimental). Implies -fipo.")
}.description("Use dangerous optimizations (experimental). Implies -fipo and -foptimize-stdlib.")
flag("-Og", "--optimize-debugging").action { c =>
c.changeFlag(CompilationFlag.OptimizeForDebugging, true)
}.description("Disable optimizations that make debugging harder (experimental).")
fluff("", "Warning options:", "")

View File

@ -15,6 +15,10 @@ object OptimizationPresets {
UnusedLocalVariables,
UnusedGlobalVariables,
)
val NodeOpt0 = List(
UnusedFunctions,
UnusedGlobalVariables,
)
val AssOpt: List[AssemblyOptimization[AssemblyLine]] = List[AssemblyOptimization[AssemblyLine]](
UnusedLabelRemoval,
AlwaysGoodOptimizations.NonetAddition,

View File

@ -1,6 +1,6 @@
package millfork.assembly
import millfork.CompilationOptions
import millfork.{CompilationFlag, CompilationOptions}
import millfork.compiler.LabelGenerator
import millfork.env.{NormalFunction, ThingInMemory}
import millfork.error.Logger
@ -23,4 +23,6 @@ trait AssemblyOptimization[T <: AbstractCode] {
def name: String
def optimize(f: NormalFunction, code: List[T], context: OptimizationContext): List[T]
def requiredFlags: Set[CompilationFlag.Value] = Set.empty
}

View File

@ -17,6 +17,8 @@ import scala.util.control.TailCalls.{TailRec, done, tailcall}
*/
object VariableToRegisterOptimization extends AssemblyOptimization[AssemblyLine] {
override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables)
object CyclesAndBytes {
val Zero = CyclesAndBytes(0, 0)
}

View File

@ -13,6 +13,8 @@ import scala.collection.mutable.ListBuffer
*/
object ByteVariableToRegisterOptimization extends AssemblyOptimization[ZLine] {
override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables)
override def name = "Allocating variables to single registers"
object CyclesAndBytes {

View File

@ -14,6 +14,8 @@ import scala.collection.mutable.ListBuffer
*/
object WordVariableToRegisterOptimization extends AssemblyOptimization[ZLine] {
override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables)
override def name = "Allocating variables to register pairs"
object CyclesAndBytes {

View File

@ -163,7 +163,7 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program
def deduplicate(options: CompilationOptions, compiledFunctions: mutable.Map[String, CompiledFunction[T]]): Unit
def assemble(callGraph: CallGraph, optimizations: Seq[AssemblyOptimization[T]], options: CompilationOptions): AssemblerOutput = {
def assemble(callGraph: CallGraph, unfilteredOptimizations: Seq[AssemblyOptimization[T]], options: CompilationOptions): AssemblerOutput = {
mem.programName = options.outputFileName.getOrElse("MILLFORK")
val platform = options.platform
val variableAllocators = platform.variableAllocators
@ -174,6 +174,8 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program
zpOccupied(i + 1) = false
}
val optimizations = unfilteredOptimizations.filter(_.requiredFlags.forall(options.flag))
val assembly = mutable.ArrayBuffer[String]()
val inliningResult = inliningCalculator.calculate(

View File

@ -13,16 +13,20 @@ import scala.collection.mutable.ListBuffer
abstract class Deduplicate[T <: AbstractCode](env: Environment, options: CompilationOptions) {
def apply(compiledFunctions: mutable.Map[String, CompiledFunction[T]]): Unit = {
if (options.flag(CompilationFlag.OptimizeForSize)) {
if (options.flag(CompilationFlag.SubroutineExtraction)) {
runStage(compiledFunctions, extractCommonCode)
}
runStage(compiledFunctions, deduplicateIdenticalFunctions)
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
if (options.flag(CompilationFlag.FunctionDeduplication)) {
runStage(compiledFunctions, deduplicateIdenticalFunctions)
}
if (options.flag(CompilationFlag.FunctionFallthrough)) {
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
runStage(compiledFunctions, eliminateRemainingTrivialTailJumps)
}
fixDoubleRedirects(compiledFunctions)
// println(compiledFunctions.map {
// case (k, v) => k + " " + (v match {

View File

@ -144,6 +144,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization],
CompilationFlag.EmitNative65816Opcodes -> (platform.cpu == millfork.Cpu.Sixteen && native16),
CompilationFlag.Emit65CE02Opcodes -> (platform.cpu == millfork.Cpu.CE02),
CompilationFlag.EmitHudsonOpcodes -> (platform.cpu == millfork.Cpu.HuC6280),
CompilationFlag.SubroutineExtraction -> optimizeForSize,
CompilationFlag.OptimizeForSize -> optimizeForSize,
CompilationFlag.OptimizeForSpeed -> blastProcessing,
CompilationFlag.OptimizeForSonicSpeed -> blastProcessing