mirror of
https://github.com/KarolS/millfork.git
synced 2025-01-11 12:29:46 +00:00
Simple loop unrolling; --blast-processing command line option
This commit is contained in:
parent
6f9ee33514
commit
3c3ac9d70e
10
CHANGELOG.md
10
CHANGELOG.md
@ -2,15 +2,17 @@
|
||||
|
||||
## Current version
|
||||
|
||||
* **Breaking change!** Renamed `inline` to `macro`
|
||||
* **Breaking change!** Renamed `inline` to `macro`.
|
||||
|
||||
* Added support for parameters for macros written in Millfork
|
||||
* Added support for parameters for macros written in Millfork.
|
||||
|
||||
* Enabled calling macros with index expression parameters.
|
||||
|
||||
* Added optimizer hints: `inline`, `noinline`, `register`
|
||||
* Added optimizer hints: `inline`, `noinline`, `register`.
|
||||
|
||||
* Added command line flags `--size`, `--fast`, `--blast-processing`.
|
||||
|
||||
* Added `*'=` and `<<<<` operators
|
||||
* Added `*'=` and `<<<<` operators.
|
||||
|
||||
* Added return dispatch statements.
|
||||
|
||||
|
@ -64,6 +64,12 @@ This may cause problems if the parameter table is stored next to a hardware regi
|
||||
|
||||
* `--inline` – Inline functions automatically (experimental). See the [documentation about inlining](../abi/inlining.md). Computationally easy, can give decent gains.
|
||||
|
||||
* `--size` – Optimize for size, sacrificing some speed (experimental).
|
||||
|
||||
* `--fast` – Optimize for speed, even if it increases the size a bit (experimental).
|
||||
|
||||
* `--blast-processing` – Optimize for speed, even if it increases the size a lot (experimental).
|
||||
|
||||
* `--detailed-flow` – Use detailed flow analysis (experimental). Very computationally expensive and not that great.
|
||||
|
||||
* `--dangerous-optimizations` – Use dangerous optimizations (experimental). Dangerous optimizations are more likely to result in broken code.
|
||||
|
@ -75,7 +75,7 @@ object CompilationFlag extends Enumeration {
|
||||
// compilation options:
|
||||
EmitIllegals, EmitCmosOpcodes, DecimalMode, ReadOnlyArrays, PreventJmpIndirectBug,
|
||||
// optimization options:
|
||||
DetailedFlowAnalysis, DangerousOptimizations, InlineFunctions, OptimizeForSize, OptimizeForSpeed,
|
||||
DetailedFlowAnalysis, DangerousOptimizations, InlineFunctions, OptimizeForSize, OptimizeForSpeed, OptimizeForSonicSpeed,
|
||||
// memory allocation options
|
||||
VariableOverlap, CompactReturnDispatchParams,
|
||||
// runtime check options
|
||||
|
@ -226,14 +226,22 @@ object Main {
|
||||
flag("--inline").action { c =>
|
||||
c.changeFlag(CompilationFlag.InlineFunctions, true)
|
||||
}.description("Inline functions automatically.")
|
||||
flag("-Of", "--fast").action { c =>
|
||||
c.changeFlag(CompilationFlag.OptimizeForSize, false)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
|
||||
}.description("Optimize for speed (experimental).")
|
||||
flag("-Os", "--size").action { c =>
|
||||
c.changeFlag(CompilationFlag.OptimizeForSize, true)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSpeed, false)
|
||||
}.description("Optimize for size (experimental).")
|
||||
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
|
||||
}.description("Optimize for size at cost of lower speed (experimental).")
|
||||
flag("-Of", "--fast").action { c =>
|
||||
c.changeFlag(CompilationFlag.OptimizeForSize, false)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, false)
|
||||
}.description("Optimize for speed at cost of bigger size (experimental).")
|
||||
flag("-Ob", "--blast-processing").action { c =>
|
||||
c.changeFlag(CompilationFlag.OptimizeForSize, false)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSpeed, true)
|
||||
c.changeFlag(CompilationFlag.OptimizeForSonicSpeed, true)
|
||||
c.changeFlag(CompilationFlag.InlineFunctions, true)
|
||||
}.description("Optimize for speed at cost of much bigger size (experimental). Implies --inline.")
|
||||
flag("--detailed-flow").action { c =>
|
||||
c.changeFlag(CompilationFlag.DetailedFlowAnalysis, true)
|
||||
}.description("Use detailed flow analysis (experimental).")
|
||||
|
@ -39,6 +39,7 @@ object OptimizationPresets {
|
||||
LaterOptimizations.PointlessLoadAfterStore,
|
||||
AlwaysGoodOptimizations.PointlessOperationAfterLoad,
|
||||
AlwaysGoodOptimizations.IdempotentDuplicateRemoval,
|
||||
LoopUnrolling.LoopUnrolling,
|
||||
AlwaysGoodOptimizations.ConstantIndexPropagation,
|
||||
AlwaysGoodOptimizations.PointlessLoadBeforeReturn,
|
||||
AlwaysGoodOptimizations.PoinlessFlagChange,
|
||||
@ -141,6 +142,7 @@ object OptimizationPresets {
|
||||
AlwaysGoodOptimizations.IncrementingIndexRegistersAfterTransfer,
|
||||
AlwaysGoodOptimizations.IndexComparisonOptimization,
|
||||
AlwaysGoodOptimizations.IndexSequenceOptimization,
|
||||
LoopUnrolling.LoopUnrolling,
|
||||
AlwaysGoodOptimizations.MathOperationOnTwoIdenticalMemoryOperands,
|
||||
AlwaysGoodOptimizations.ModificationOfJustWrittenValue,
|
||||
AlwaysGoodOptimizations.NonetAddition,
|
||||
|
147
src/main/scala/millfork/assembly/opt/LoopUnrolling.scala
Normal file
147
src/main/scala/millfork/assembly/opt/LoopUnrolling.scala
Normal file
@ -0,0 +1,147 @@
|
||||
package millfork.assembly.opt
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
|
||||
import millfork.{CompilationFlag, CompilationOptions}
|
||||
import millfork.assembly.AssemblyLine
|
||||
import millfork.assembly.OpcodeClasses._
|
||||
import millfork.assembly.Opcode._
|
||||
import millfork.assembly.AddrMode._
|
||||
import millfork.env.{Constant, Label, MemoryAddressConstant}
|
||||
|
||||
/**
|
||||
* @author Karol Stasiak
|
||||
*/
|
||||
object LoopUnrolling {
|
||||
|
||||
object Unrolling extends Enumeration {
|
||||
val X, Y, Var = Value
|
||||
}
|
||||
|
||||
val counter = new AtomicInteger(40000)
|
||||
|
||||
def getNextLabel(prefix: String) = f".$prefix%s__${counter.getAndIncrement()}%05d"
|
||||
|
||||
private val Initialization = 634
|
||||
private val Start = 453
|
||||
private val End = 312
|
||||
private val Skip = 1596
|
||||
private val Back = 5473
|
||||
private val Body = 6354
|
||||
private val Step = 63546
|
||||
private val BodyWithStep = 6355
|
||||
|
||||
|
||||
def isFeasible(ctx: AssemblyMatchingContext, branchingSize: Int, index: Unrolling.Value): Boolean = {
|
||||
if (!ctx.isExternallyLinearBlock(Body)) return false
|
||||
val bodyCode = ctx.get[List[AssemblyLine]](Body)
|
||||
val start = ctx.get[Int](Start)
|
||||
val end = ctx.getOrDefault[Int](End, 0)
|
||||
if (start == end) return true
|
||||
val increasing = isIncreasing(ctx)
|
||||
if (increasing != (start < end)) return false // overflow not supported
|
||||
val count = Math.abs(start - end)
|
||||
if (count > 32) return false
|
||||
if (count > 8 && !ctx.compilationOptions.flag(CompilationFlag.OptimizeForSonicSpeed)) return false
|
||||
if (count > 3 && !ctx.compilationOptions.flag(CompilationFlag.OptimizeForSpeed)) return false
|
||||
val onlyUsedForArrayIndexing = index match {
|
||||
case Unrolling.Var => false
|
||||
case Unrolling.X => bodyCode.forall(line => !ConcernsX(line) || line.addrMode == AbsoluteX)
|
||||
case Unrolling.Y => bodyCode.forall(line => !ConcernsY(line) || line.addrMode == AbsoluteY)
|
||||
}
|
||||
val stepSize = index match {
|
||||
case Unrolling.Var => 3
|
||||
case _ => 1
|
||||
}
|
||||
val cmpExists = ctx.getOrDefault[Int](End, -1) >= 0
|
||||
val bodySize = bodyCode.map(_.sizeInBytes).sum
|
||||
val sizeBefore = branchingSize + bodySize + stepSize + (if (cmpExists) 2 else 0)
|
||||
val sizeAfter = count * (bodySize + (if (onlyUsedForArrayIndexing) 0 else stepSize))
|
||||
if (sizeAfter <= sizeBefore) return true
|
||||
if (!ctx.compilationOptions.flag(CompilationFlag.OptimizeForSpeed)) return false
|
||||
if (ctx.compilationOptions.flag(CompilationFlag.OptimizeForSonicSpeed)) {
|
||||
(sizeAfter - sizeBefore < 128) && (sizeAfter < sizeBefore * 32)
|
||||
} else {
|
||||
(sizeAfter - sizeBefore < 64) && (sizeAfter < sizeBefore * 8)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def isIncreasing(ctx: AssemblyMatchingContext) = {
|
||||
val opcode = ctx.get[List[AssemblyLine]](Step).head.opcode
|
||||
opcode == INX || opcode == INY || opcode == INC || opcode == ISC
|
||||
}
|
||||
|
||||
private def fixLabels(code: List[AssemblyLine]) = {
|
||||
val localLabels = code.flatMap {
|
||||
case AssemblyLine(LABEL, _, MemoryAddressConstant(Label(l)), _) => Some(l)
|
||||
case _ => None
|
||||
}.toSet
|
||||
val labelPrefix = getNextLabel("ur")
|
||||
code.map {
|
||||
case s@AssemblyLine(_, _, MemoryAddressConstant(Label(l)), _) if localLabels(l) =>
|
||||
s.copy(parameter = MemoryAddressConstant(Label(labelPrefix + l)))
|
||||
case s => s
|
||||
}
|
||||
}
|
||||
|
||||
val LoopUnrolling = new RuleBasedAssemblyOptimization("Loop unrolling",
|
||||
needsFlowInfo = FlowInfoRequirement.NoRequirement,
|
||||
(Elidable & HasOpcode(LDX) & MatchNumericImmediate(Start)).capture(Initialization) ~
|
||||
(Elidable & HasOpcode(BEQ) & MatchParameter(Skip)) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
|
||||
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesX)).*.capture(Body) ~
|
||||
(Elidable & HasOpcodeIn(Set(DEX, INX))).capture(Step)
|
||||
).capture(BodyWithStep) ~
|
||||
(Elidable & HasOpcode(CPX) & MatchNumericImmediate(End)).? ~
|
||||
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Skip)) ~
|
||||
Where(ctx => isFeasible(ctx, 4, Unrolling.X)) ~~> { (code, ctx) =>
|
||||
val start = ctx.get[Int](Start)
|
||||
val end = ctx.getOrDefault[Int](End, 0)
|
||||
val increasing = isIncreasing(ctx)
|
||||
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
|
||||
},
|
||||
(Elidable & HasOpcode(LDX) & MatchNumericImmediate(Start)).capture(Initialization) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
|
||||
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesX)).*.capture(Body) ~
|
||||
(Elidable & HasOpcodeIn(Set(DEX, INX))).capture(Step)
|
||||
).capture(BodyWithStep) ~
|
||||
(Elidable & HasOpcode(CPX) & MatchNumericImmediate(End)).? ~
|
||||
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
|
||||
Where(ctx => isFeasible(ctx, 2, Unrolling.X)) ~~> { (code, ctx) =>
|
||||
val start = ctx.get[Int](Start)
|
||||
val end = ctx.getOrDefault[Int](End, 0)
|
||||
val increasing = isIncreasing(ctx)
|
||||
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
|
||||
},
|
||||
(Elidable & HasOpcode(LDY) & MatchNumericImmediate(Start)).capture(Initialization) ~
|
||||
(Elidable & HasOpcode(BEQ) & MatchParameter(Skip)) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
|
||||
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesY)).*.capture(Body) ~
|
||||
(Elidable & HasOpcodeIn(Set(DEY, INY))).capture(Step)
|
||||
).capture(BodyWithStep) ~
|
||||
(Elidable & HasOpcode(CPY) & MatchNumericImmediate(End)).? ~
|
||||
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Skip)) ~
|
||||
Where(ctx => isFeasible(ctx, 4, Unrolling.Y)) ~~> { (code, ctx) =>
|
||||
val start = ctx.get[Int](Start)
|
||||
val end = ctx.getOrDefault[Int](End, 0)
|
||||
val increasing = isIncreasing(ctx)
|
||||
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
|
||||
},
|
||||
(Elidable & HasOpcode(LDY) & MatchNumericImmediate(Start)).capture(Initialization) ~
|
||||
(Elidable & HasOpcode(LABEL) & MatchParameter(Back)) ~
|
||||
((Elidable & Not(HasOpcodeIn(Set(RTS, JSR, RTI))) & Not(ChangesY)).*.capture(Body) ~
|
||||
(Elidable & HasOpcodeIn(Set(DEY, INY))).capture(Step)
|
||||
).capture(BodyWithStep) ~
|
||||
(Elidable & HasOpcode(CPY) & MatchNumericImmediate(End)).? ~
|
||||
(Elidable & HasOpcode(BNE) & MatchParameter(Back)) ~
|
||||
Where(ctx => isFeasible(ctx, 2, Unrolling.Y)) ~~> { (code, ctx) =>
|
||||
val start = ctx.get[Int](Start)
|
||||
val end = ctx.getOrDefault[Int](End, 0)
|
||||
val increasing = isIncreasing(ctx)
|
||||
ctx.get[List[AssemblyLine]](Initialization) ++ (0 until Math.abs(start - end)).flatMap(_ => fixLabels(ctx.get[List[AssemblyLine]](BodyWithStep)))
|
||||
},
|
||||
)
|
||||
}
|
@ -41,7 +41,7 @@ class RuleBasedAssemblyOptimization(val name: String, val needsFlowInfo: FlowInf
|
||||
case Nil => Nil
|
||||
case head :: tail =>
|
||||
for ((rule, index) <- rules.zipWithIndex) {
|
||||
val ctx = new AssemblyMatchingContext
|
||||
val ctx = new AssemblyMatchingContext(options)
|
||||
rule.pattern.matchTo(ctx, code) match {
|
||||
case Some(rest: List[(FlowInfo, AssemblyLine)]) =>
|
||||
val matchedChunkToOptimize: List[AssemblyLine] = code.take(code.length - rest.length).map(_._2)
|
||||
@ -69,7 +69,7 @@ class RuleBasedAssemblyOptimization(val name: String, val needsFlowInfo: FlowInf
|
||||
}
|
||||
}
|
||||
|
||||
class AssemblyMatchingContext {
|
||||
class AssemblyMatchingContext(val compilationOptions: CompilationOptions) {
|
||||
private val map = mutable.Map[Int, Any]()
|
||||
|
||||
override def toString: String = map.mkString(", ")
|
||||
@ -101,7 +101,8 @@ class AssemblyMatchingContext {
|
||||
}
|
||||
}
|
||||
|
||||
def get[T: Manifest](i: Int): T = {
|
||||
private def getImpl[T: Manifest](i: Int): AnyRef = {
|
||||
if (!map.contains(i)) return null
|
||||
val t = map(i)
|
||||
val clazz = implicitly[Manifest[T]].runtimeClass match {
|
||||
case java.lang.Integer.TYPE => classOf[java.lang.Integer]
|
||||
@ -110,7 +111,7 @@ class AssemblyMatchingContext {
|
||||
case x => x
|
||||
}
|
||||
if (clazz.isInstance(t)) {
|
||||
t.asInstanceOf[T]
|
||||
t.asInstanceOf[AnyRef]
|
||||
} else {
|
||||
if (i eq null) {
|
||||
ErrorReporting.fatal(s"Value at index $i is null")
|
||||
@ -120,6 +121,23 @@ class AssemblyMatchingContext {
|
||||
}
|
||||
}
|
||||
|
||||
def get[T: Manifest](i: Int): T = {
|
||||
val v = getImpl[T](i)
|
||||
if (v eq null) {
|
||||
ErrorReporting.fatal(s"Value at index $i is null")
|
||||
}
|
||||
v.asInstanceOf[T]
|
||||
}
|
||||
|
||||
def getOrDefault[T: Manifest](i: Int, defau: T): T = {
|
||||
val v = getImpl[T](i)
|
||||
if (v eq null) {
|
||||
defau
|
||||
} else {
|
||||
v.asInstanceOf[T]
|
||||
}
|
||||
}
|
||||
|
||||
def isExternallyLinearBlock(i: Int): Boolean = {
|
||||
val labels = mutable.Set[String]()
|
||||
val jumps = mutable.Set[String]()
|
||||
@ -749,6 +767,18 @@ case class MatchImmediate(i: Int) extends AssemblyLinePattern {
|
||||
override def toString: String = s"(?<$i>#)"
|
||||
}
|
||||
|
||||
case class MatchNumericImmediate(i: Int) extends AssemblyLinePattern {
|
||||
override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean =
|
||||
if (line.addrMode == AddrMode.Immediate) {
|
||||
line.parameter.quickSimplify match {
|
||||
case NumericConstant(value, _) => ctx.addObject(i, value.toInt & 0xff)
|
||||
case _ => false
|
||||
}
|
||||
} else false
|
||||
|
||||
override def toString: String = s"(?<$i>#)"
|
||||
}
|
||||
|
||||
|
||||
case class DoesntChangeIndexingInAddrMode(i: Int) extends AssemblyLinePattern {
|
||||
override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean =
|
||||
|
@ -147,9 +147,13 @@ class Assembler(private val program: Program, private val rootEnv: Environment)
|
||||
val potentiallyInlineable: Map[String, Int] =
|
||||
InliningCalculator.getPotentiallyInlineableFunctions(
|
||||
program,
|
||||
options.flags(CompilationFlag.InlineFunctions),
|
||||
if (options.flags(CompilationFlag.OptimizeForSpeed)) 1.3 else 1.0,
|
||||
if (options.flags(CompilationFlag.OptimizeForSpeed)) 8.0 else 1.2)
|
||||
options.flags(CompilationFlag.InlineFunctions) || options.flags(CompilationFlag.OptimizeForSonicSpeed),
|
||||
if (options.flags(CompilationFlag.OptimizeForSonicSpeed)) 4.0
|
||||
else if (options.flags(CompilationFlag.OptimizeForSpeed)) 1.3
|
||||
else 1.0,
|
||||
if (options.flags(CompilationFlag.OptimizeForSonicSpeed)) 12.0
|
||||
else if (options.flags(CompilationFlag.OptimizeForSpeed)) 8.0
|
||||
else 1.2)
|
||||
|
||||
var inlinedFunctions = Map[String, List[AssemblyLine]]()
|
||||
val compiledFunctions = mutable.Map[String, List[AssemblyLine]]()
|
||||
|
Loading…
x
Reference in New Issue
Block a user