diff --git a/docs/api/command-line.md b/docs/api/command-line.md index f12f0205..ae4f33c1 100644 --- a/docs/api/command-line.md +++ b/docs/api/command-line.md @@ -139,35 +139,68 @@ Use a software stack for stack variables. ## Optimization options -* `-O0` – Disable all optimizations. +* `-O0` – Disable all optimizations except unused global symbol removal. * `-O`, `-O2` ... `-O8` – Optimize code, various levels. For most code, anything above `-O4` doesn't improve it anymore. -* `-O9` – Optimize code using superoptimizer (experimental). Computationally expensive, decent results. +* `-O9` – Optimize code using superoptimizer (experimental). Computationally very expensive, decent results. * `-finline`, `-fno-inline` – Whether should inline functions automatically. See the [documentation about inlining](../abi/inlining.md). Computationally easy, can give decent gains. `.ini` equivalent: `inline`. +Default: no * `-fipo`, `-fno-ipo` – Whether should perform interprocedural optimization. It enables certain optimization similar to what inlining would enable, but without actual inlining. `.ini` equivalent: `ipo`. +Default: no. + +* `-fregister-variables`, `-fno-register-variables` – Whether should allow moving local variables into CPU registers. +Default: yes. * `-foptimize-stdlib`, `-fno-optimize-stdlib` – Whether should replace some standard library calls with constant parameters with more efficient variants. Currently affects `putstrz` and `strzlen`, but may affect more functions in the future. `.ini` equivalent: `optimize_stdlib`. +Default: no. + +* `-ffunction-fallthrough`, `-fno-function-fallthrough` – +Whether should replace a tail call by simply putting one function after another. +`.ini` equivalent: `function_fallthrough`. +Default: yes. + +* `-ffunction-deduplication`, `-fno-function-deduplication` – +Whether identical functions should be merged into one function. +`.ini` equivalent: `function_deduplication`. +Default: yes. + +* `-fsubroutine-extraction`, `-fno-subroutine-extraction` – +Whether identical fragments of functions should be extracted into subroutines. +`.ini` equivalent: `subroutine_extraction`. +Default: no. * `-Os`, `--size` – Optimize for size, sacrificing some speed (experimental). +Also enables `-fcode-deduplication`. * `-Of`, `--fast` – Optimize for speed, even if it increases the size a bit (experimental). +Also enables `-finline`. * `-Ob`, `--blast-processing` – Optimize for speed, even if it increases the size a lot (experimental). -Enables `-finline` automatically. +Also enables `-finline`. + +* `-Og`, `--optimize-debugging` – Disables optimizations that make debugging harder. +Sets +`-fno-optimize-stdlib`, +`-fno-variable-overlap`, +`-fno-register-variables`, +`-fno-function-fallthrough`, +`-fno-function-deduplication`, +`-fno-subroutine-extraction` + automatically. * `-fdangerous-optimizations` – Use dangerous optimizations (experimental). Dangerous optimizations are more likely to result in broken code. -Enables `-fipo` automatically. +Also enables `-fipo` and `-foptimize-stdlib` automatically. `.ini` equivalent: `dangerous_optimizations`. Note: for compatibility with versions 0.3.0 and earlier, diff --git a/docs/api/custom-platform.md b/docs/api/custom-platform.md index 5970309a..680a030c 100644 --- a/docs/api/custom-platform.md +++ b/docs/api/custom-platform.md @@ -58,7 +58,7 @@ Default: the same as `encoding`. * `prevent_jmp_indirect_bug` – whether the compiler should try to avoid the indirect JMP bug, default is `false` on 65C02-compatible or non-6502 processors and `true` elsewhere - * `compact_dispatch_params` – whether parameter values in return dispatch statements may overlap other objects, default is `true` + * `compact_dispatch_params` – whether parameter values in return dispatch statements may overlap other objects, default is `true`. This may cause problems if the parameter table is stored next to a hardware register that has side effects when reading. * `lunix` – generate relocatable code for LUnix/LNG, default is `false` @@ -71,6 +71,12 @@ Default: the same as `encoding`. * `ipo` - enable interprocedural optimization, default is `false`. + * `function_fallthrough` – whether should replace a tail call by simply putting one function after another, default is `true`. + + * `function_deduplication` – whether identical functions should be merged into one function, default is `true`. + + * `subroutine_extraction` – whether identical fragments of functions should be extracted into subroutines, default is `false`. + * `lenient_encoding` - allow for automatic substitution of invalid characters in string literals using the default encodings, default is `false`. * `use_shadow_registers_for_irq` – use Z80 shadow registers in interrupt routines, default is `true` for Z80 and `false` otherwise diff --git a/src/main/scala/millfork/CompilationOptions.scala b/src/main/scala/millfork/CompilationOptions.scala index 52efbce4..587cf47b 100644 --- a/src/main/scala/millfork/CompilationOptions.scala +++ b/src/main/scala/millfork/CompilationOptions.scala @@ -218,13 +218,13 @@ object Cpu extends Enumeration { import CompilationFlag._ - private val mosAlwaysDefaultFlags = Set( - VariableOverlap, CompactReturnDispatchParams + private val alwaysDefaultFlags = Set( + VariableOverlap, CompactReturnDispatchParams, FunctionFallthrough, RegisterVariables, FunctionDeduplication ) - private val i80AlwaysDefaultFlags = Set( - VariableOverlap, CompactReturnDispatchParams - ) + private val mosAlwaysDefaultFlags = alwaysDefaultFlags + + private val i80AlwaysDefaultFlags = alwaysDefaultFlags def defaultFlags(x: Cpu.Value): Set[CompilationFlag.Value] = x match { case StrictMos => @@ -316,8 +316,9 @@ object CompilationFlag extends Enumeration { UseIntelSyntaxForInput, UseIntelSyntaxForOutput, // optimization options: - OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, + OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed, OptimizeForDebugging, DangerousOptimizations, InlineFunctions, InterproceduralOptimization, + FunctionFallthrough, RegisterVariables, FunctionDeduplication, SubroutineExtraction, OptimizeStdlib, // memory allocation options VariableOverlap, CompactReturnDispatchParams, LUnixRelocatableCode, @@ -356,6 +357,9 @@ object CompilationFlag extends Enumeration { "input_intel_syntax" -> UseIntelSyntaxForInput, "ipo" -> InterproceduralOptimization, "optimize_stdlib" -> OptimizeStdlib, + "function_fallthrough" -> FunctionFallthrough, + "function_deduplication" -> FunctionDeduplication, + "subroutine_extraction" -> SubroutineExtraction, "inline" -> InlineFunctions, "dangerous_optimizations" -> DangerousOptimizations, "decimal_mode" -> DecimalMode, diff --git a/src/main/scala/millfork/Context.scala b/src/main/scala/millfork/Context.scala new file mode 100644 index 00000000..aa36f5ff --- /dev/null +++ b/src/main/scala/millfork/Context.scala @@ -0,0 +1,55 @@ +package millfork + +import millfork.error.Logger + +/** + * @author Karol Stasiak + */ + +case class Context(errorReporting: Logger, + inputFileNames: List[String], + outputFileName: Option[String] = None, + runFileName: Option[String] = None, + optimizationLevel: Option[Int] = None, + zpRegisterSize: Option[Int] = None, + platform: Option[String] = None, + outputAssembly: Boolean = false, + outputLabels: Boolean = false, + includePath: List[String] = Nil, + flags: Map[CompilationFlag.Value, Boolean] = Map(), + features: Map[String, Long] = Map(), + verbosity: Option[Int] = None) { + def changeFlag(f: CompilationFlag.Value, b: Boolean): Context = { + if (flags.contains(f)) { + if (flags(f) != b) { + errorReporting.error("Conflicting flags") + } + this + } else { + copy(flags = this.flags + (f -> b)) + } + } + + def filloutFlags(): Context = { + var addons = Map[CompilationFlag.Value, Boolean]() + if (flags.contains(CompilationFlag.OptimizeForSpeed)) { + addons += CompilationFlag.InlineFunctions -> true + } + if (flags.contains(CompilationFlag.OptimizeForSize)) { + addons += CompilationFlag.SubroutineExtraction -> true + } + if (flags.contains(CompilationFlag.DangerousOptimizations)) { + addons += CompilationFlag.InterproceduralOptimization -> true + addons += CompilationFlag.OptimizeStdlib -> true + } + if (flags.contains(CompilationFlag.OptimizeForDebugging)) { + addons += CompilationFlag.VariableOverlap -> false + addons += CompilationFlag.RegisterVariables -> false + addons += CompilationFlag.FunctionDeduplication -> false + addons += CompilationFlag.SubroutineExtraction -> false + addons += CompilationFlag.FunctionFallthrough -> false + addons += CompilationFlag.OptimizeStdlib -> false + } + copy(flags = flags ++ addons.filterKeys(k => !flags.contains(k))) + } +} diff --git a/src/main/scala/millfork/Main.scala b/src/main/scala/millfork/Main.scala index 308f3003..05b3827c 100644 --- a/src/main/scala/millfork/Main.scala +++ b/src/main/scala/millfork/Main.scala @@ -18,34 +18,7 @@ import millfork.node.StandardCallGraph import millfork.output._ import millfork.parser.{MosSourceLoadingQueue, ZSourceLoadingQueue} -/** - * @author Karol Stasiak - */ -case class Context(errorReporting: Logger, - inputFileNames: List[String], - outputFileName: Option[String] = None, - runFileName: Option[String] = None, - optimizationLevel: Option[Int] = None, - zpRegisterSize: Option[Int] = None, - platform: Option[String] = None, - outputAssembly: Boolean = false, - outputLabels: Boolean = false, - includePath: List[String] = Nil, - flags: Map[CompilationFlag.Value, Boolean] = Map(), - features: Map[String, Long] = Map(), - verbosity: Option[Int] = None) { - def changeFlag(f: CompilationFlag.Value, b: Boolean): Context = { - if (flags.contains(f)) { - if (flags(f) != b) { - errorReporting.error("Conflicting flags") - } - this - } else { - copy(flags = this.flags + (f -> b)) - } - } -} object Main { @@ -77,7 +50,7 @@ object Main { errorReporting.trace("This program comes with ABSOLUTELY NO WARRANTY.") errorReporting.trace("This is free software, and you are welcome to redistribute it under certain conditions") errorReporting.trace("You should have received a copy of the GNU General Public License along with this program. If not, see https://www.gnu.org/licenses/") - val c = fixMissingIncludePath(c0) + val c = fixMissingIncludePath(c0).filloutFlags() if (c.includePath.isEmpty) { errorReporting.warn("Failed to detect the default include directory, consider using the -I option") } @@ -213,7 +186,7 @@ object Main { val program = if (optLevel > 0) { OptimizationPresets.NodeOpt.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options)) } else { - unoptimized + OptimizationPresets.NodeOpt0.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options)) } val callGraph = new StandardCallGraph(program, options.log) @@ -265,7 +238,7 @@ object Main { val program = if (optLevel > 0) { OptimizationPresets.NodeOpt.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options)) } else { - unoptimized + OptimizationPresets.NodeOpt0.foldLeft(unoptimized)((p, opt) => p.applyNodeOptimization(opt, options)) } val callGraph = new StandardCallGraph(program, options.log) @@ -430,10 +403,10 @@ object Main { }.description("Whether hardware decimal mode should be used (6502 only).") boolean("-fvariable-overlap", "-fno-variable-overlap").action { (c, v) => c.changeFlag(CompilationFlag.VariableOverlap, v) - }.description("Whether variables should overlap if their scopes do not intersect.") + }.description("Whether variables should overlap if their scopes do not intersect. Enabled by default.") boolean("-fcompact-dispatch-params", "-fno-compact-dispatch-params").action { (c, v) => c.changeFlag(CompilationFlag.CompactReturnDispatchParams, v) - }.description("Whether parameter values in return dispatch statements may overlap other objects.") + }.description("Whether parameter values in return dispatch statements may overlap other objects. Enabled by default.") boolean("-fbounds-checking", "-fno-bounds-checking").action { (c, v) => c.changeFlag(CompilationFlag.VariableOverlap, v) }.description("Whether should insert bounds checking on array access.") @@ -506,28 +479,42 @@ object Main { boolean("-foptimize-stdlib", "-fno-optimize-stdlib").action { (c, v) => c.changeFlag(CompilationFlag.OptimizeStdlib, v) }.description("Optimize standard library calls.") + boolean("-fsubroutine-extraction", "-fno-subroutine-extraction").action { (c, v) => + c.changeFlag(CompilationFlag.SubroutineExtraction, v) + }.description("Extract identical code fragments into subroutines.") + boolean("-ffunction-fallthrough", "-fno-function-fallthrough").action { (c, v) => + c.changeFlag(CompilationFlag.FunctionFallthrough, v) + }.description("Replace tail calls by simply putting one function after another. Enabled by default.") + boolean("-ffunction-deduplication", "-fno-function-deduplication").action { (c, v) => + c.changeFlag(CompilationFlag.FunctionDeduplication, v) + }.description("Merge identical functions into one function. Enabled by default.") + boolean("-fregister-variables", "-fno-register-variables").action { (c, v) => + c.changeFlag(CompilationFlag.RegisterVariables, v) + }.description("Allow moving local variables into CPU registers. Enabled by default.") flag("-Os", "--size").action { c => c.changeFlag(CompilationFlag.OptimizeForSize, true). changeFlag(CompilationFlag.OptimizeForSpeed, false). changeFlag(CompilationFlag.OptimizeForSonicSpeed, false) - }.description("Prefer smaller code even if it is slightly slower (experimental).") + }.description("Prefer smaller code even if it is slightly slower (experimental). Implies -fsubroutine-extraction.") flag("-Of", "--fast").action { c => c.changeFlag(CompilationFlag.OptimizeForSize, false). changeFlag(CompilationFlag.OptimizeForSpeed, true). changeFlag(CompilationFlag.OptimizeForSonicSpeed, false) - }.description("Prefer faster code even if it is slightly bigger (experimental).") + }.description("Prefer faster code even if it is slightly bigger (experimental). Implies -finline.") flag("-Ob", "--blast-processing").action { c => c.changeFlag(CompilationFlag.OptimizeForSize, false). changeFlag(CompilationFlag.OptimizeForSpeed, true). - changeFlag(CompilationFlag.OptimizeForSonicSpeed, true). - changeFlag(CompilationFlag.InlineFunctions, true) + changeFlag(CompilationFlag.OptimizeForSonicSpeed, true) }.description("Prefer faster code even if it is much bigger (experimental). Implies -finline.") flag("--dangerous-optimizations").action { c => c.changeFlag(CompilationFlag.DangerousOptimizations, true) }.description("Use dangerous optimizations (experimental).").hidden() boolean("-fdangerous-optimizations", "-fno-dangerous-optimizations").action { (c, v) => c.changeFlag(CompilationFlag.DangerousOptimizations, v) - }.description("Use dangerous optimizations (experimental). Implies -fipo.") + }.description("Use dangerous optimizations (experimental). Implies -fipo and -foptimize-stdlib.") + flag("-Og", "--optimize-debugging").action { c => + c.changeFlag(CompilationFlag.OptimizeForDebugging, true) + }.description("Disable optimizations that make debugging harder (experimental).") fluff("", "Warning options:", "") diff --git a/src/main/scala/millfork/OptimizationPresets.scala b/src/main/scala/millfork/OptimizationPresets.scala index fbecce56..a4c5dd57 100644 --- a/src/main/scala/millfork/OptimizationPresets.scala +++ b/src/main/scala/millfork/OptimizationPresets.scala @@ -15,6 +15,10 @@ object OptimizationPresets { UnusedLocalVariables, UnusedGlobalVariables, ) + val NodeOpt0 = List( + UnusedFunctions, + UnusedGlobalVariables, + ) val AssOpt: List[AssemblyOptimization[AssemblyLine]] = List[AssemblyOptimization[AssemblyLine]]( UnusedLabelRemoval, AlwaysGoodOptimizations.NonetAddition, diff --git a/src/main/scala/millfork/assembly/AssemblyOptimization.scala b/src/main/scala/millfork/assembly/AssemblyOptimization.scala index 9b23665b..11a85090 100644 --- a/src/main/scala/millfork/assembly/AssemblyOptimization.scala +++ b/src/main/scala/millfork/assembly/AssemblyOptimization.scala @@ -1,6 +1,6 @@ package millfork.assembly -import millfork.CompilationOptions +import millfork.{CompilationFlag, CompilationOptions} import millfork.compiler.LabelGenerator import millfork.env.{NormalFunction, ThingInMemory} import millfork.error.Logger @@ -23,4 +23,6 @@ trait AssemblyOptimization[T <: AbstractCode] { def name: String def optimize(f: NormalFunction, code: List[T], context: OptimizationContext): List[T] + + def requiredFlags: Set[CompilationFlag.Value] = Set.empty } diff --git a/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala index 6f4debdf..e1ba9d14 100644 --- a/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala +++ b/src/main/scala/millfork/assembly/mos/opt/VariableToRegisterOptimization.scala @@ -17,6 +17,8 @@ import scala.util.control.TailCalls.{TailRec, done, tailcall} */ object VariableToRegisterOptimization extends AssemblyOptimization[AssemblyLine] { + override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables) + object CyclesAndBytes { val Zero = CyclesAndBytes(0, 0) } diff --git a/src/main/scala/millfork/assembly/z80/opt/ByteVariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/ByteVariableToRegisterOptimization.scala index a743bc57..c0659c7c 100644 --- a/src/main/scala/millfork/assembly/z80/opt/ByteVariableToRegisterOptimization.scala +++ b/src/main/scala/millfork/assembly/z80/opt/ByteVariableToRegisterOptimization.scala @@ -13,6 +13,8 @@ import scala.collection.mutable.ListBuffer */ object ByteVariableToRegisterOptimization extends AssemblyOptimization[ZLine] { + override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables) + override def name = "Allocating variables to single registers" object CyclesAndBytes { diff --git a/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala b/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala index b6cc72ef..e458a3d4 100644 --- a/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala +++ b/src/main/scala/millfork/assembly/z80/opt/WordVariableToRegisterOptimization.scala @@ -14,6 +14,8 @@ import scala.collection.mutable.ListBuffer */ object WordVariableToRegisterOptimization extends AssemblyOptimization[ZLine] { + override def requiredFlags: Set[CompilationFlag.Value] = Set(CompilationFlag.RegisterVariables) + override def name = "Allocating variables to register pairs" object CyclesAndBytes { diff --git a/src/main/scala/millfork/output/AbstractAssembler.scala b/src/main/scala/millfork/output/AbstractAssembler.scala index f7e37d4a..d4d41042 100644 --- a/src/main/scala/millfork/output/AbstractAssembler.scala +++ b/src/main/scala/millfork/output/AbstractAssembler.scala @@ -163,7 +163,7 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program def deduplicate(options: CompilationOptions, compiledFunctions: mutable.Map[String, CompiledFunction[T]]): Unit - def assemble(callGraph: CallGraph, optimizations: Seq[AssemblyOptimization[T]], options: CompilationOptions): AssemblerOutput = { + def assemble(callGraph: CallGraph, unfilteredOptimizations: Seq[AssemblyOptimization[T]], options: CompilationOptions): AssemblerOutput = { mem.programName = options.outputFileName.getOrElse("MILLFORK") val platform = options.platform val variableAllocators = platform.variableAllocators @@ -174,6 +174,8 @@ abstract class AbstractAssembler[T <: AbstractCode](private val program: Program zpOccupied(i + 1) = false } + val optimizations = unfilteredOptimizations.filter(_.requiredFlags.forall(options.flag)) + val assembly = mutable.ArrayBuffer[String]() val inliningResult = inliningCalculator.calculate( diff --git a/src/main/scala/millfork/output/Deduplicate.scala b/src/main/scala/millfork/output/Deduplicate.scala index 042ac656..32360c86 100644 --- a/src/main/scala/millfork/output/Deduplicate.scala +++ b/src/main/scala/millfork/output/Deduplicate.scala @@ -13,16 +13,20 @@ import scala.collection.mutable.ListBuffer abstract class Deduplicate[T <: AbstractCode](env: Environment, options: CompilationOptions) { def apply(compiledFunctions: mutable.Map[String, CompiledFunction[T]]): Unit = { - if (options.flag(CompilationFlag.OptimizeForSize)) { + if (options.flag(CompilationFlag.SubroutineExtraction)) { runStage(compiledFunctions, extractCommonCode) } - runStage(compiledFunctions, deduplicateIdenticalFunctions) - runStage(compiledFunctions, eliminateTailJumps) - runStage(compiledFunctions, eliminateTailJumps) - runStage(compiledFunctions, eliminateTailJumps) - runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) - runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) - runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) + if (options.flag(CompilationFlag.FunctionDeduplication)) { + runStage(compiledFunctions, deduplicateIdenticalFunctions) + } + if (options.flag(CompilationFlag.FunctionFallthrough)) { + runStage(compiledFunctions, eliminateTailJumps) + runStage(compiledFunctions, eliminateTailJumps) + runStage(compiledFunctions, eliminateTailJumps) + runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) + runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) + runStage(compiledFunctions, eliminateRemainingTrivialTailJumps) + } fixDoubleRedirects(compiledFunctions) // println(compiledFunctions.map { // case (k, v) => k + " " + (v match { diff --git a/src/test/scala/millfork/test/emu/EmuRun.scala b/src/test/scala/millfork/test/emu/EmuRun.scala index 1c960c36..2ef8cda5 100644 --- a/src/test/scala/millfork/test/emu/EmuRun.scala +++ b/src/test/scala/millfork/test/emu/EmuRun.scala @@ -144,6 +144,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], CompilationFlag.EmitNative65816Opcodes -> (platform.cpu == millfork.Cpu.Sixteen && native16), CompilationFlag.Emit65CE02Opcodes -> (platform.cpu == millfork.Cpu.CE02), CompilationFlag.EmitHudsonOpcodes -> (platform.cpu == millfork.Cpu.HuC6280), + CompilationFlag.SubroutineExtraction -> optimizeForSize, CompilationFlag.OptimizeForSize -> optimizeForSize, CompilationFlag.OptimizeForSpeed -> blastProcessing, CompilationFlag.OptimizeForSonicSpeed -> blastProcessing