1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-11 12:29:46 +00:00

Simple loop unrolling; --blast-processing command line option

This commit is contained in:
Karol Stasiak 2018-02-26 12:22:10 +01:00
parent 6f9ee33514
commit 3c3ac9d70e
8 changed files with 216 additions and 17 deletions

View File

@ -2,15 +2,17 @@
## Current version
* **Breaking change!** Renamed `inline` to `macro`
* **Breaking change!** Renamed `inline` to `macro`.
* Added support for parameters for macros written in Millfork
* Added support for parameters for macros written in Millfork.
* Enabled calling macros with index expression parameters.
* Added optimizer hints: `inline`, `noinline`, `register`
* Added optimizer hints: `inline`, `noinline`, `register`.
* Added command line flags `--size`, `--fast`, `--blast-processing`.
* Added `*'=` and `<<<<` operators
* Added `*'=` and `<<<<` operators.
* Added return dispatch statements.

View File

@ -64,6 +64,12 @@ This may cause problems if the parameter table is stored next to a hardware regi
* `--inline` Inline functions automatically (experimental). See the [documentation about inlining](../abi/inlining.md). Computationally easy, can give decent gains.
* `--size` Optimize for size, sacrificing some speed (experimental).
* `--fast` Optimize for speed, even if it increases the size a bit (experimental).
* `--blast-processing` Optimize for speed, even if it increases the size a lot (experimental).
* `--detailed-flow` Use detailed flow analysis (experimental). Very computationally expensive and not that great.
* `--dangerous-optimizations` Use dangerous optimizations (experimental). Dangerous optimizations are more likely to result in broken code.

View File

@ -75,7 +75,7 @@ object CompilationFlag extends Enumeration {
// compilation options:
EmitIllegals, EmitCmosOpcodes, DecimalMode, ReadOnlyArrays, PreventJmpIndirectBug,
// optimization options:
DetailedFlowAnalysis, DangerousOptimizations, InlineFunctions, OptimizeForSize, OptimizeForSpeed,
DetailedFlowAnalysis, DangerousOptimizations, InlineFunctions, OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed,
// memory allocation options
VariableOverlap, CompactReturnDispatchParams,
// runtime check options

View File

@ -226,14 +226,22 @@ object Main {
flag("--inline").action { c =>
c.changeFlag(CompilationFlag.InlineFunctions, true)
}.description("Inline functions automatically.")
flag("-Of", "--fast").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, false)
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
}.description("Optimize for speed (experimental).")
flag("-Os", "--size").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, true)
c.changeFlag(CompilationFlag.OptimizeForSpeed, false)
}.description("Optimize for size (experimental).")
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
}.description("Optimize for size at cost of lower speed (experimental).")
flag("-Of", "--fast").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, false)
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
}.description("Optimize for speed at cost of bigger size (experimental).")
flag("-Ob", "--blast-processing").action { c =>
c.changeFlag(CompilationFlag.OptimizeForSize, false)
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, true)
c.changeFlag(CompilationFlag.InlineFunctions, true)
}.description("Optimize for speed at cost of much bigger size (experimental). Implies --inline.")
flag("--detailed-flow").action { c =>
c.changeFlag(CompilationFlag.DetailedFlowAnalysis, true)
}.description("Use detailed flow analysis (experimental).")

View File

@ -39,6 +39,7 @@ object OptimizationPresets {
LaterOptimizations.PointlessLoadAfterStore,
AlwaysGoodOptimizations.PointlessOperationAfterLoad,
AlwaysGoodOptimizations.IdempotentDuplicateRemoval,
LoopUnrolling.LoopUnrolling,
AlwaysGoodOptimizations.ConstantIndexPropagation,
AlwaysGoodOptimizations.PointlessLoadBeforeReturn,
AlwaysGoodOptimizations.PoinlessFlagChange,
@ -141,6 +142,7 @@ object OptimizationPresets {
AlwaysGoodOptimizations.IncrementingIndexRegistersAfterTransfer,
AlwaysGoodOptimizations.IndexComparisonOptimization,
AlwaysGoodOptimizations.IndexSequenceOptimization,
LoopUnrolling.LoopUnrolling,
AlwaysGoodOptimizations.MathOperationOnTwoIdenticalMemoryOperands,
AlwaysGoodOptimizations.ModificationOfJustWrittenValue,
AlwaysGoodOptimizations.NonetAddition,

View File

@ -0,0 +1,147 @@
package millfork.assembly.opt
import java.util.concurrent.atomic.AtomicInteger
import millfork.{CompilationFlag, CompilationOptions}
import millfork.assembly.AssemblyLine
import millfork.assembly.OpcodeClasses._
import millfork.assembly.Opcode._
import millfork.assembly.AddrMode._
import millfork.env.{Constant, Label, MemoryAddressConstant}
/**
* @author Karol Stasiak
*/
object LoopUnrolling {
object Unrolling extends Enumeration {
val X, Y, Var = Value
}
val counter = new AtomicInteger(40000)
def getNextLabel(prefix: String) = f".$prefix%s__${counter.getAndIncrement()}%05d"
private val Initialization = 634
private val Start = 453
private val End = 312
private val Skip = 1596
private val Back = 5473
private val Body = 6354
private val Step = 63546
private val BodyWithStep = 6355
def isFeasible(ctx: AssemblyMatchingContext, branchingSize: Int, index: Unrolling.Value): Boolean = {
if (!ctx.isExternallyLinearBlock(Body)) return false
val bodyCode = ctx.get[List[AssemblyLine]](Body)
val start = ctx.get[Int](Start)
val end = ctx.getOrDefault[Int](End, 0)
if (start == end) return true
val increasing = isIncreasing(ctx)
if (increasing != (start < end)) return false // overflow not supported
val count = Math.abs(start - end)
if (count > 32) return false
if (count > 8 && !ctx.compilationOptions.flag(CompilationFlag.OptimizeForSonicSpeed)) return false
if (count > 3 && !ctx.compilationOptions.flag(CompilationFlag.OptimizeForSpeed)) return false
val onlyUsedForArrayIndexing = index match {
case Unrolling.Var => false
case Unrolling.X => bodyCode.forall(line => !ConcernsX(line) || line.addrMode == AbsoluteX)
case Unrolling.Y => bodyCode.forall(line => !ConcernsY(line) || line.addrMode == AbsoluteY)
}
val stepSize = index match {
case Unrolling.Var => 3
case _ => 1
}
val cmpExists = ctx.getOrDefault[Int](End, -1) >= 0
val bodySize = bodyCode.map(_.sizeInBytes).sum
val sizeBefore = branchingSize + bodySize + stepSize + (if (cmpExists) 2 else 0)
val sizeAfter = count * (bodySize + (if (onlyUsedForArrayIndexing) 0 else stepSize))
if (sizeAfter <= sizeBefore) return true
if (!ctx.compilationOptions.flag(CompilationFlag.OptimizeForSpeed)) return false
if (ctx.compilationOptions.flag(CompilationFlag.OptimizeForSonicSpeed)) {
(sizeAfter - sizeBefore < 128) && (sizeAfter < sizeBefore * 32)
} else {
(sizeAfter - sizeBefore < 64) && (sizeAfter < sizeBefore * 8)
}
}
private def isIncreasing(ctx: AssemblyMatchingContext) = {
val opcode = ctx.get[List[AssemblyLine]](Step).head.opcode
opcode == INX || opcode == INY || opcode == INC || opcode == ISC
}
private def fixLabels(code: List[AssemblyLine]) = {
val localLabels = code.flatMap {
case AssemblyLine(LABEL, _, MemoryAddressConstant(Label(l)), _) => Some(l)
case _ => None
}.toSet
val labelPrefix = getNextLabel("ur")
code.map {
case s@AssemblyLine(_, _, MemoryAddressConstant(Label(l)), _) if localLabels(l) =>
s.copy(parameter = MemoryAddressConstant(Label(labelPrefix + l)))
case s => s
}
}
val LoopUnrolling = new RuleBasedAssemblyOptimization("Loop unrolling",
needsFlowInfo = FlowInfoRequirement.NoRequirement,
(Elidable & HasOpcode(LDX) & MatchNumericImmediate(Start)).capture(Initialization) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(Skip)) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesX)).*.capture(Body) ~
(Elidable & HasOpcodeIn(Set(DEX, INX))).capture(Step)
).capture(BodyWithStep) ~
(Elidable & HasOpcode(CPX) & MatchNumericImmediate(End)).? ~
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Skip)) ~
Where(ctx => isFeasible(ctx, 4, Unrolling.X)) ~~> { (code, ctx) =>
val start = ctx.get[Int](Start)
val end = ctx.getOrDefault[Int](End, 0)
val increasing = isIncreasing(ctx)
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
},
(Elidable & HasOpcode(LDX) & MatchNumericImmediate(Start)).capture(Initialization) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesX)).*.capture(Body) ~
(Elidable & HasOpcodeIn(Set(DEX, INX))).capture(Step)
).capture(BodyWithStep) ~
(Elidable & HasOpcode(CPX) & MatchNumericImmediate(End)).? ~
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
Where(ctx => isFeasible(ctx, 2, Unrolling.X)) ~~> { (code, ctx) =>
val start = ctx.get[Int](Start)
val end = ctx.getOrDefault[Int](End, 0)
val increasing = isIncreasing(ctx)
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
},
(Elidable & HasOpcode(LDY) & MatchNumericImmediate(Start)).capture(Initialization) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(Skip)) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesY)).*.capture(Body) ~
(Elidable & HasOpcodeIn(Set(DEY, INY))).capture(Step)
).capture(BodyWithStep) ~
(Elidable & HasOpcode(CPY) & MatchNumericImmediate(End)).? ~
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Skip)) ~
Where(ctx => isFeasible(ctx, 4, Unrolling.Y)) ~~> { (code, ctx) =>
val start = ctx.get[Int](Start)
val end = ctx.getOrDefault[Int](End, 0)
val increasing = isIncreasing(ctx)
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
},
(Elidable & HasOpcode(LDY) & MatchNumericImmediate(Start)).capture(Initialization) ~
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesY)).*.capture(Body) ~
(Elidable & HasOpcodeIn(Set(DEY, INY))).capture(Step)
).capture(BodyWithStep) ~
(Elidable & HasOpcode(CPY) & MatchNumericImmediate(End)).? ~
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
Where(ctx => isFeasible(ctx, 2, Unrolling.Y)) ~~> { (code, ctx) =>
val start = ctx.get[Int](Start)
val end = ctx.getOrDefault[Int](End, 0)
val increasing = isIncreasing(ctx)
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
},
)
}

View File

@ -41,7 +41,7 @@ class RuleBasedAssemblyOptimization(val name: String, val needsFlowInfo: FlowInf
case Nil => Nil
case head :: tail =>
for ((rule, index) <- rules.zipWithIndex) {
val ctx = new AssemblyMatchingContext
val ctx = new AssemblyMatchingContext(options)
rule.pattern.matchTo(ctx, code) match {
case Some(rest: List[(FlowInfo, AssemblyLine)]) =>
val matchedChunkToOptimize: List[AssemblyLine] = code.take(code.length - rest.length).map(_._2)
@ -69,7 +69,7 @@ class RuleBasedAssemblyOptimization(val name: String, val needsFlowInfo: FlowInf
}
}
class AssemblyMatchingContext {
class AssemblyMatchingContext(val compilationOptions: CompilationOptions) {
private val map = mutable.Map[Int, Any]()
override def toString: String = map.mkString(", ")
@ -101,7 +101,8 @@ class AssemblyMatchingContext {
}
}
def get[T: Manifest](i: Int): T = {
private def getImpl[T: Manifest](i: Int): AnyRef = {
if (!map.contains(i)) return null
val t = map(i)
val clazz = implicitly[Manifest[T]].runtimeClass match {
case java.lang.Integer.TYPE => classOf[java.lang.Integer]
@ -110,7 +111,7 @@ class AssemblyMatchingContext {
case x => x
}
if (clazz.isInstance(t)) {
t.asInstanceOf[T]
t.asInstanceOf[AnyRef]
} else {
if (i eq null) {
ErrorReporting.fatal(s"Value at index $i is null")
@ -120,6 +121,23 @@ class AssemblyMatchingContext {
}
}
def get[T: Manifest](i: Int): T = {
val v = getImpl[T](i)
if (v eq null) {
ErrorReporting.fatal(s"Value at index $i is null")
}
v.asInstanceOf[T]
}
def getOrDefault[T: Manifest](i: Int, defau: T): T = {
val v = getImpl[T](i)
if (v eq null) {
defau
} else {
v.asInstanceOf[T]
}
}
def isExternallyLinearBlock(i: Int): Boolean = {
val labels = mutable.Set[String]()
val jumps = mutable.Set[String]()
@ -749,6 +767,18 @@ case class MatchImmediate(i: Int) extends AssemblyLinePattern {
override def toString: String = s"(?<$i>#)"
}
case class MatchNumericImmediate(i: Int) extends AssemblyLinePattern {
override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean =
if (line.addrMode == AddrMode.Immediate) {
line.parameter.quickSimplify match {
case NumericConstant(value, _) => ctx.addObject(i, value.toInt & 0xff)
case _ => false
}
} else false
override def toString: String = s"(?<$i>#)"
}
case class DoesntChangeIndexingInAddrMode(i: Int) extends AssemblyLinePattern {
override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean =

View File

@ -147,9 +147,13 @@ class Assembler(private val program: Program, private val rootEnv: Environment)
val potentiallyInlineable: Map[String, Int] =
InliningCalculator.getPotentiallyInlineableFunctions(
program,
options.flags(CompilationFlag.InlineFunctions),
if (options.flags(CompilationFlag.OptimizeForSpeed)) 1.3 else 1.0,
if (options.flags(CompilationFlag.OptimizeForSpeed)) 8.0 else 1.2)
options.flags(CompilationFlag.InlineFunctions) || options.flags(CompilationFlag.OptimizeForSonicSpeed),
if (options.flags(CompilationFlag.OptimizeForSonicSpeed)) 4.0
else if (options.flags(CompilationFlag.OptimizeForSpeed)) 1.3
else 1.0,
if (options.flags(CompilationFlag.OptimizeForSonicSpeed)) 12.0
else if (options.flags(CompilationFlag.OptimizeForSpeed)) 8.0
else 1.2)
var inlinedFunctions = Map[String, List[AssemblyLine]]()
val compiledFunctions = mutable.Map[String, List[AssemblyLine]]()