first vm peephole optimizer

2025-02-10 14:32:20 +00:00 · 2022-06-21 23:34:36 +02:00 · 2022-06-21 23:34:36 +02:00 · 69f3106062
commit 69f3106062
parent 8ab99f6129
4 changed files with 74 additions and 2 deletions
--- a/codeGenVirtual/src/prog8/codegen/virtual/AssemblyProgram.kt
+++ b/codeGenVirtual/src/prog8/codegen/virtual/AssemblyProgram.kt
@ -68,6 +68,7 @@ internal class AssemblyProgram(override val name: String,

    fun addGlobalInits(chunk: VmCodeChunk) = globalInits.addAll(chunk.lines)
    fun addBlock(block: VmCodeChunk) = blocks.add(block)
+    fun getBlocks(): List<VmCodeChunk> = blocks
 }

 internal sealed class VmCodeLine
--- a/codeGenVirtual/src/prog8/codegen/virtual/CodeGen.kt
+++ b/codeGenVirtual/src/prog8/codegen/virtual/CodeGen.kt
@ -62,6 +62,11 @@ class CodeGen(internal val program: PtProgram,
            vmprog.addBlock(translate(block))
        }

+        if(options.optimize) {
+            val optimizer = VmPeepholeOptimizer(vmprog, allocations)
+            optimizer.optimize()
+        }
+
        println("Vm codegen: amount of vm registers=${vmRegisters.peekNext()}")

        return vmprog
--- a/codeGenVirtual/src/prog8/codegen/virtual/VmPeepholeOptimizer.kt
+++ b/codeGenVirtual/src/prog8/codegen/virtual/VmPeepholeOptimizer.kt
@ -0,0 +1,63 @@
+package prog8.codegen.virtual
+
+import prog8.vm.Instruction
+import prog8.vm.Opcode
+
+internal class VmPeepholeOptimizer(private val vmprog: AssemblyProgram, private val allocations: VariableAllocator) {
+    fun optimize() {
+        vmprog.getBlocks().forEach { block ->
+            do {
+                val indexedInstructions = block.lines.withIndex()
+                    .filter { it.value is VmCodeInstruction }
+                    .map { IndexedValue(it.index, (it.value as VmCodeInstruction).ins)}
+                val changed = optimizeRemoveNops(block, indexedInstructions)
+                        || optimizeDoubleLoadsAndStores(block, indexedInstructions)
+                        // TODO other optimizations:
+                        //  useless arithmethic (div/mul by 1, add/sub 0, ...)
+                        //  useless logical (bitwise (x)or 0, bitwise and by ffff, shl followed by shr or vice versa (no carry)... )
+                        //  jump/branch to label immediately below
+                        //  branch instructions with reg1==reg2
+                        //  conditional set instructions with reg1==reg2
+                        //  push followed by pop to same target, or different target replace with load
+                        //  double sec, clc
+                        //  sec+clc or clc+sec
+                        //  move complex optimizations such as unused registers, ...
+            } while(changed)
+        }
+    }
+
+    private fun optimizeRemoveNops(block: VmCodeChunk, indexedInstructions: List<IndexedValue<Instruction>>): Boolean {
+        var changed = false
+        indexedInstructions.reversed().forEach { (idx, ins) ->
+            if (ins.opcode == Opcode.NOP) {
+                changed = true
+                block.lines.removeAt(idx)
+            }
+        }
+        return changed
+    }
+
+    private fun optimizeDoubleLoadsAndStores(block: VmCodeChunk, indexedInstructions: List<IndexedValue<Instruction>>): Boolean {
+        var changed = false
+        indexedInstructions.forEach { (idx, ins) ->
+
+            // TODO: detect multiple loads to the same target, only keep first
+            // TODO: detect multiple stores to the same target, only keep first
+            // TODO: detect multiple ffrom/fto to the same target, only keep first
+            // TODO: detect multiple sequential rnd with same reg1, only keep one
+            // TODO: double same xors/nots/negs, remove the pair completely as they cancel out
+            // TODO: multiple same ands, ors, only keep first
+        }
+        return changed
+    }
+}
+
+private interface ICodeChange { // TODO not used? remove?
+    fun perform(block: VmCodeChunk)
+
+    class Remove(val idx: Int): ICodeChange {
+        override fun perform(block: VmCodeChunk) {
+            block.lines.removeAt(idx)
+        }
+    }
+}
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,7 +3,10 @@ TODO

 For next release
 ^^^^^^^^^^^^^^^^
-...
+- add McCarthy evaluation to shortcircuit and/or expressions.  Both conditional expressions and assignments!
+- add some more optimizations in vmPeepholeOptimizer
+- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is.
+  this info is needed for more advanced optimizations and later code generation steps.


 Need help with
@ -17,7 +20,6 @@ Future Things and Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^
 Compiler:

- add McCarthy evaluation to shortcircuit and/or expressions. First do ifs by splitting them up? Then do expressions that compute a value?
 - vm: implement remaining sin/cos functions in math.p8
 - vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
 - vm: don't store symbol names in instructions to make optimizing the IR easier? but what about jumps to labels. And it's no longer readable by humans.
@ -26,6 +28,7 @@ Compiler:
 - when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
  It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
  So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.
+- generate WASM from the new ast (or from vm code?) to run prog8 on a browser canvas?
 - createAssemblyAndAssemble(): make it possible to actually get rid of the VarDecl nodes by fixing the rest of the code mentioned there.
  but probably better to rewrite the 6502 codegen on top of the new Ast.
 - simplifyConditionalExpression() should not split expression if it still results in stack-based evaluation, but how does it know?