IR: avoid redundant CMPI in if-expressions, remove dead stores

This commit is contained in:
Irmen de Jong
2026-04-02 22:20:58 +02:00
parent 92751d9285
commit 4739bd087a
5 changed files with 306 additions and 49 deletions
+21 -6
View File
@@ -409,22 +409,37 @@ gradle :languageServer:test -Dlsp.verbose=true
Note: By default, Gradle only shows failed tests. Passed and skipped tests are silent.
**Test filtering patterns:**
**⚠️ CRITICAL: Test Filtering Patterns - Read This First!**
Gradle's `--tests` filter has **strict rules** that are easy to get wrong:
| Pattern | Example | Works? |
|---------|---------|--------|
| Full class name | `--tests "prog8tests.compiler.TestOptimization"` | ✅ Yes |
| **Full class name** | `--tests "prog8tests.compiler.TestOptimization"` | ✅ **USE THIS** |
| Wildcard at END | `--tests "prog8tests.compiler.Test*"` | ✅ Yes |
| Wildcard on package | `--tests "prog8tests.compiler.*"` | ✅ Yes |
| Wildcard at START | `--tests "*TestOptimization"` | ❌ No |
| Test description | `--tests "*Optimization*inline*"` | ❌ No |
| **Wildcard at START** | `--tests "*TestOptimization"` | ❌ **FAILS SILENTLY** |
| Test description | `--tests "*Optimization*inline*"` | ❌ **FAILS SILENTLY** |
**✅ CORRECT - Always use exact fully qualified class name:**
```bash
gradle :compiler:test --tests "prog8tests.vm.TestCompilerVirtual"
gradle :compiler:test --tests "prog8tests.compiler.TestOptimization"
```
**❌ WRONG - These patterns FAIL (tests won't run, but gradle reports success):**
```bash
gradle :compiler:test --tests "*TestCompilerVirtual" # FAILS
gradle :compiler:test --tests "*TestOptimization*" # FAILS
gradle :compiler:test --tests "*Optimization*inline*" # FAILS
```
**Alternative with `-PtestFilter`** (supports wildcards anywhere):
```bash
gradle test -PtestFilter="*Optimization"
gradle test -PtestFilter="*Optimization" # Works with wildcards
```
**Why the restrictions?** Gradle's `--tests` filter requires the **fully qualified class name**. Wildcards work as **suffixes** only. KoTest test names like `"inline multi-value void"` are _descriptions_, not method names, so you cannot filter by them.
**Why the restrictions?** Gradle's `--tests` filter matches **fully qualified class names only**. Wildcards work as **suffixes** (e.g., `Test*`) but NOT as prefixes (e.g., `*Test`). KoTest test names like `"inline multi-value void"` are _descriptions_, not method names, so you cannot filter by them.
**To find failing tests after a run:**
```bash
@@ -229,10 +229,15 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
}
// TODO don't store condition as expression result but just use the flags, like a normal PtIfElse translation does
// Use status flags from condition expression when possible, avoiding redundant CMPI #0
val condTr = translateExpression(ifExpr.condition)
addToResult(result, condTr, condTr.resultReg, -1)
addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.BYTE, reg1=condTr.resultReg, immediate = 0), null)
// Check if the last instruction already sets status flags - if so, skip the CMPI #0
val lastInstr = condTr.chunks.lastOrNull()?.instructions?.lastOrNull()
val skipCmpi = lastInstr != null && lastInstr.opcode in OpcodesThatSetStatusbits
if (!skipCmpi) {
addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.BYTE, reg1=condTr.resultReg, immediate = 0), null)
}
addInstr(result, IRInstruction(Opcode.BSTEQ, labelSymbol = falseLabel), null)
if (irDt != IRDataType.FLOAT) {
@@ -3,6 +3,14 @@ package prog8.codegen.intermediate
import prog8.code.core.IErrorReporter
import prog8.intermediate.*
/**
* Represents what value a register currently holds for LOADR forwarding optimization.
*/
private sealed class RegisterContent {
data class Constant(val value: Any) : RegisterContent() // Int or Double
data class Register(val originalReg: Int) : RegisterContent()
}
class IRPeepholeOptimizer(private val irprog: IRProgram, private val retainSSA: Boolean) {
fun optimize(optimizationsEnabled: Boolean, errors: IErrorReporter) {
if(!optimizationsEnabled)
@@ -55,6 +63,8 @@ class IRPeepholeOptimizer(private val irprog: IRProgram, private val retainSSA:
|| cleanupPushPop(chunk1, indexedInstructions)
|| simplifyConstantReturns(chunk1, indexedInstructions)
|| removeNeedlessLoads(chunk1, indexedInstructions)
|| removeDeadStores(chunk1, indexedInstructions)
// || removeLoadrForwarding(chunk1, indexedInstructions) // DISABLED - needs debugging
|| removeNops(chunk1, indexedInstructions) // last time, in case one of the optimizers replaced something with a nop
} while (changed)
}
@@ -521,11 +531,11 @@ jump p8_label_gen_2
Opcode.ADDR -> optimizeImmediateLoadAssociative(Opcode.ADD)
Opcode.MULR -> optimizeImmediateLoadAssociative(Opcode.MUL)
Opcode.MULSR -> optimizeImmediateLoadAssociative(Opcode.MULS)
// Opcode.SUBR -> TODO("ir peephole Subr")
// Opcode.DIVR -> TODO("ir peephole Divr")
// Opcode.DIVSR -> TODO("ir peephole Divsr")
// Opcode.MODR -> TODO("ir peephole Modr")
// Opcode.DIVMODR -> TODO("ir peephole DivModr")
Opcode.SUBR -> optimizeImmediateLoadAssociative(Opcode.SUB)
Opcode.DIVR -> optimizeImmediateLoadAssociative(Opcode.DIV)
Opcode.DIVSR -> optimizeImmediateLoadAssociative(Opcode.DIVS)
Opcode.MODR -> optimizeImmediateLoadAssociative(Opcode.MOD)
// Opcode.DIVMODR - skipped, no immediate DIVMOD variant exists
else -> {}
}
}
@@ -593,4 +603,103 @@ jump p8_label_gen_2
}
return changed
}
}
private fun removeDeadStores(chunk: IRCodeChunk, indexedInstructions: List<IndexedValue<IRInstruction>>): Boolean {
// Detect and remove dead stores: when a register is written but never read before being overwritten.
// Example:
// LOAD r1, #5 <- dead store (r1 overwritten before use)
// LOAD r1, #10
// USE r1
// Track for each register: (index of last write, whether value was read since)
val pendingWrites = mutableMapOf<Int, Pair<Int, Boolean>>() // reg -> (writeIdx, wasRead)
val deadStores = mutableSetOf<Int>()
indexedInstructions.forEach { (idx, ins) ->
val formats = instructionFormats.getValue(ins.opcode)
val format = formats[ins.type] ?: formats[null]
// First, check if this instruction READS any registers
if(format?.reg1 == OperandDirection.READ || format?.reg1 == OperandDirection.READWRITE) {
val reg = ins.reg1 ?: ins.fpReg1
if(reg != null) {
val existing = pendingWrites[reg]
if(existing != null) {
pendingWrites[reg] = existing.first to true
}
}
}
if(format?.reg2 == OperandDirection.READ || format?.reg2 == OperandDirection.READWRITE) {
val reg = ins.reg2 ?: ins.fpReg2
if(reg != null) {
val existing = pendingWrites[reg]
if(existing != null) {
pendingWrites[reg] = existing.first to true
}
}
}
if(format?.reg3 == OperandDirection.READ || format?.reg3 == OperandDirection.READWRITE) {
val reg = ins.reg3
if(reg != null) {
val existing = pendingWrites[reg]
if(existing != null) {
pendingWrites[reg] = existing.first to true
}
}
}
// Then, check if this instruction WRITES to any registers
if(format?.reg1 == OperandDirection.WRITE || format?.reg1 == OperandDirection.READWRITE) {
val reg = ins.reg1 ?: ins.fpReg1
if(reg != null && reg != 0) {
// Check if previous write to this reg was dead (never read before this overwrite)
val existing = pendingWrites[reg]
if(existing != null && !existing.second) {
deadStores.add(existing.first)
}
// Record this new write as pending (not yet read)
pendingWrites[reg] = idx to false
}
}
}
// Any remaining pending writes that were never read are also dead
// (unless they're the final value needed - but we can't know that here)
// Actually, we should NOT remove these because they might be the final value
// Remove dead stores (in reverse order to preserve indices)
var changed = false
deadStores.sortedDescending().forEach { idx ->
if(idx < chunk.instructions.size) {
chunk.instructions.removeAt(idx)
changed = true
}
}
return changed
}
private fun isRegisterRead(ins: IRInstruction, reg: Int): Boolean {
// Check if the instruction reads from the given register
val formats = instructionFormats.getValue(ins.opcode)
val format = formats[ins.type] ?: formats[null]
val reg1Read = (format?.reg1 == OperandDirection.READ || format?.reg1 == OperandDirection.READWRITE) && (ins.reg1 == reg || ins.fpReg1 == reg)
val reg2Read = (format?.reg2 == OperandDirection.READ || format?.reg2 == OperandDirection.READWRITE) && (ins.reg2 == reg || ins.fpReg2 == reg)
val reg3Read = (format?.reg3 == OperandDirection.READ || format?.reg3 == OperandDirection.READWRITE) && (ins.reg3 == reg)
return reg1Read || reg2Read || reg3Read
}
//private fun removeLoadrForwarding(chunk: IRCodeChunk, indexedInstructions: List<IndexedValue<IRInstruction>>): Boolean {
// Forward LOADR instructions to their original source.
// Example:
// LOAD r1, #5
// LOADR r2, r1 -> LOAD r2, #5
// LOADR r3, r2 -> LOAD r3, #5
// todo: This needs more careful implementation considering:
// - Cross-chunk register usage
// - Function call side effects
// - Proper invalidation of register tracking
//}
}
+93 -35
View File
@@ -153,28 +153,31 @@ class TestIRPeepholeOpt: FunSpec({
}
test("remove useless div/mul, add/sub") {
// Use different registers for each test case to avoid dead store elimination removing them
val irProg = makeIRProgram(listOf(
IRInstruction(Opcode.DIV, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.DIVS, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.MUL, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.MOD, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.DIV, IRDataType.BYTE, reg1=42, immediate = 2),
IRInstruction(Opcode.DIVS, IRDataType.BYTE, reg1=42, immediate = 2),
IRInstruction(Opcode.MUL, IRDataType.BYTE, reg1=42, immediate = 2),
IRInstruction(Opcode.MOD, IRDataType.BYTE, reg1=42, immediate = 2),
IRInstruction(Opcode.ADD, IRDataType.BYTE, reg1=42, immediate = 0),
IRInstruction(Opcode.SUB, IRDataType.BYTE, reg1=42, immediate = 0)
IRInstruction(Opcode.DIV, IRDataType.BYTE, reg1=1, immediate = 1),
IRInstruction(Opcode.DIVS, IRDataType.BYTE, reg1=2, immediate = 1),
IRInstruction(Opcode.MUL, IRDataType.BYTE, reg1=3, immediate = 1),
IRInstruction(Opcode.MOD, IRDataType.BYTE, reg1=4, immediate = 1),
IRInstruction(Opcode.DIV, IRDataType.BYTE, reg1=5, immediate = 2),
IRInstruction(Opcode.DIVS, IRDataType.BYTE, reg1=6, immediate = 2),
IRInstruction(Opcode.MUL, IRDataType.BYTE, reg1=7, immediate = 2),
IRInstruction(Opcode.MOD, IRDataType.BYTE, reg1=8, immediate = 2),
IRInstruction(Opcode.ADD, IRDataType.BYTE, reg1=9, immediate = 0),
IRInstruction(Opcode.SUB, IRDataType.BYTE, reg1=10, immediate = 0)
))
irProg.chunks().single().instructions.size shouldBe 10
val opt = IRPeepholeOptimizer(irProg, false)
opt.optimize(true, ErrorReporterForTests())
// First 4 are removed (div/mul/mod by 1), last 2 become no-ops (add/sub 0)
irProg.chunks().single().instructions.size shouldBe 4
}
test("replace add/sub 1 by inc/dec") {
// Use different registers for each test case to avoid dead store elimination removing them
val irProg = makeIRProgram(listOf(
IRInstruction(Opcode.ADD, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.SUB, IRDataType.BYTE, reg1=42, immediate = 1)
IRInstruction(Opcode.ADD, IRDataType.BYTE, reg1=1, immediate = 1),
IRInstruction(Opcode.SUB, IRDataType.BYTE, reg1=2, immediate = 1)
))
irProg.chunks().single().instructions.size shouldBe 2
val opt = IRPeepholeOptimizer(irProg, false)
@@ -186,40 +189,51 @@ class TestIRPeepholeOpt: FunSpec({
}
test("remove useless and/or/xor") {
// Use different registers for each test case to avoid dead store elimination removing them
val irProg = makeIRProgram(listOf(
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=42, immediate = 255),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=42, immediate = 65535),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=42, immediate = 2147483647),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=42, immediate = -1),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=42, immediate = 0),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=42, immediate = 255),
IRInstruction(Opcode.OR, IRDataType.WORD, reg1=42, immediate = 65535),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=42, immediate = 2147483647),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=42, immediate = -1),
IRInstruction(Opcode.XOR, IRDataType.BYTE, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=42, immediate = 200),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=42, immediate = 60000),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=42, immediate = 1),
IRInstruction(Opcode.XOR, IRDataType.BYTE, reg1=42, immediate = 1)
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=1, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=2, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=3, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=4, immediate = 255),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=5, immediate = 65535),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=6, immediate = 2147483647),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=7, immediate = -1),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=8, immediate = 0),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=9, immediate = 255),
IRInstruction(Opcode.OR, IRDataType.WORD, reg1=10, immediate = 65535),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=11, immediate = 2147483647),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=12, immediate = -1),
IRInstruction(Opcode.XOR, IRDataType.BYTE, reg1=13, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=14, immediate = 200),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=15, immediate = 60000),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=16, immediate = 1),
IRInstruction(Opcode.XOR, IRDataType.BYTE, reg1=17, immediate = 1)
))
irProg.chunks().single().instructions.size shouldBe 17
val opt = IRPeepholeOptimizer(irProg, false)
opt.optimize(true, ErrorReporterForTests())
// After optimization:
// - AND #0 (3x) -> LOAD #0
// - AND #max (2x) -> removed
// - AND #other (3x) -> stays
// - OR #0 (1x) -> removed
// - OR #max (3x) -> LOAD #max
// - XOR #0 (1x) -> removed
// - other (3x) -> stays
// Total: 17 - 5 (removed) = 12, with 6 LOAD instructions
irProg.chunks().single().instructions.size shouldBe 12
irProg.chunks().single().instructions.count { it.opcode == Opcode.LOAD } shouldBe 6
}
test("replace and/or/xor by constant number") {
// Use different registers for each test case to avoid dead store elimination removing them
val irProg = makeIRProgram(listOf(
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=42, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=42, immediate = 0),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=42, immediate = 255),
IRInstruction(Opcode.OR, IRDataType.WORD, reg1=42, immediate = 65535),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=42, immediate = -1)
IRInstruction(Opcode.AND, IRDataType.BYTE, reg1=1, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.WORD, reg1=2, immediate = 0),
IRInstruction(Opcode.AND, IRDataType.LONG, reg1=3, immediate = 0),
IRInstruction(Opcode.OR, IRDataType.BYTE, reg1=4, immediate = 255),
IRInstruction(Opcode.OR, IRDataType.WORD, reg1=5, immediate = 65535),
IRInstruction(Opcode.OR, IRDataType.LONG, reg1=6, immediate = -1)
))
irProg.chunks().single().instructions.size shouldBe 6
val opt = IRPeepholeOptimizer(irProg, false)
@@ -239,4 +253,48 @@ class TestIRPeepholeOpt: FunSpec({
instr[4].immediate shouldBe 65535
instr[5].immediate shouldBe -1
}
test("replace register arithmetic by immediate operations") {
// Test the new optimization: LOAD + SUBR/DIVR/MODR → immediate operation
// Pattern: LOAD r1, #const followed by SUBR r2, r1 → SUB r2, #const
// SUBR optimization
val subrProg = makeIRProgram(listOf(
IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1=1, immediate = 5),
IRInstruction(Opcode.SUBR, IRDataType.BYTE, reg1=2, reg2=1)
))
subrProg.chunks().single().instructions.size shouldBe 2
val opt1 = IRPeepholeOptimizer(subrProg, false)
opt1.optimize(true, ErrorReporterForTests())
val subInstr = subrProg.chunks().single().instructions
subInstr.size shouldBe 1
subInstr[0].opcode shouldBe Opcode.SUB
subInstr[0].immediate shouldBe 5
// DIVR optimization
val divrProg = makeIRProgram(listOf(
IRInstruction(Opcode.LOAD, IRDataType.WORD, reg1=10, immediate = 100),
IRInstruction(Opcode.DIVR, IRDataType.WORD, reg1=20, reg2=10)
))
divrProg.chunks().single().instructions.size shouldBe 2
val opt2 = IRPeepholeOptimizer(divrProg, false)
opt2.optimize(true, ErrorReporterForTests())
val divInstr = divrProg.chunks().single().instructions
divInstr.size shouldBe 1
divInstr[0].opcode shouldBe Opcode.DIV
divInstr[0].immediate shouldBe 100
// MODR optimization
val modrProg = makeIRProgram(listOf(
IRInstruction(Opcode.LOAD, IRDataType.BYTE, reg1=30, immediate = 7),
IRInstruction(Opcode.MODR, IRDataType.BYTE, reg1=40, reg2=30)
))
modrProg.chunks().single().instructions.size shouldBe 2
val opt3 = IRPeepholeOptimizer(modrProg, false)
opt3.optimize(true, ErrorReporterForTests())
val modInstr = modrProg.chunks().single().instructions
modInstr.size shouldBe 1
modInstr[0].opcode shouldBe Opcode.MOD
modInstr[0].immediate shouldBe 7
}
})
+70
View File
@@ -4,6 +4,7 @@ import io.kotest.assertions.throwables.shouldThrow
import io.kotest.core.spec.style.FunSpec
import io.kotest.engine.spec.tempdir
import io.kotest.matchers.collections.shouldBeIn
import io.kotest.matchers.ints.shouldBeGreaterThan
import io.kotest.matchers.ints.shouldBeLessThanOrEqual
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
@@ -368,6 +369,75 @@ main {
instructions.last().opcode shouldBe Opcode.RETURN
}
test("if-expression skips redundant CMPI when condition sets flags") {
val src = """
main {
sub start() {
bool flag = true
cx16.r0L = if flag then 1 else 0
}
}"""
val result = compileText(VMTarget(), optimize=false, src, outputDir, writeAssembly=true)!!
val virtfile = result.compilationOptions.outputDir.resolve(result.compilerAst.name + ".p8ir")
val irProgram = IRFileReader().read(virtfile)
val start = irProgram.blocks[0].children[0] as IRSubroutine
val instructions = start.chunks.flatMap { c->c.instructions }
// Find LOADM for 'flag' variable - use contains match since scoped name may vary
val loadmIdx = instructions.indexOfFirst { it.opcode == Opcode.LOADM && it.labelSymbol?.contains("flag") == true }
loadmIdx shouldBeGreaterThan -1
// After LOADM, there should be a BSTEQ/BSTNE branch, not CMPI #0
val nextInstr = instructions[loadmIdx + 1]
nextInstr.opcode shouldBeIn setOf(Opcode.BSTEQ, Opcode.BSTNE)
}
test("if-statement skips redundant CMPI when condition sets flags") {
val src = """
main {
sub start() {
bool flag = true
if flag {
cx16.r0L = 1
} else {
cx16.r0L = 0
}
}
}"""
val result = compileText(VMTarget(), optimize=false, src, outputDir, writeAssembly=true)!!
val virtfile = result.compilationOptions.outputDir.resolve(result.compilerAst.name + ".p8ir")
val irProgram = IRFileReader().read(virtfile)
val start = irProgram.blocks[0].children[0] as IRSubroutine
val instructions = start.chunks.flatMap { c->c.instructions }
// Find LOADM for 'flag' variable - use contains match since scoped name may vary
val loadmIdx = instructions.indexOfFirst { it.opcode == Opcode.LOADM && it.labelSymbol?.contains("flag") == true }
loadmIdx shouldBeGreaterThan -1
// After LOADM, there should be a BSTEQ/BSTNE branch, not CMPI #0
val nextInstr = instructions[loadmIdx + 1]
nextInstr.opcode shouldBeIn setOf(Opcode.BSTEQ, Opcode.BSTNE)
}
test("if-expression keeps CMPI when condition doesn't set flags") {
val src = """
main {
sub start() {
sub getflag() -> bool { return true }
cx16.r0L = if getflag() then 1 else 0
}
}"""
val result = compileText(VMTarget(), optimize=false, src, outputDir, writeAssembly=true)!!
val virtfile = result.compilationOptions.outputDir.resolve(result.compilerAst.name + ".p8ir")
val irProgram = IRFileReader().read(virtfile)
val start = irProgram.blocks[0].children[0] as IRSubroutine
val instructions = start.chunks.flatMap { c->c.instructions }
// Should contain CMPI #0 after function call (since CALL doesn't set flags)
val cmpiInstr = instructions.find { it.opcode == Opcode.CMPI && it.immediate == 0 }
cmpiInstr shouldNotBe null
}
test("repeat counts (const)") {
val src="""
main {