IR: remove broken loadi/storei optimization

This commit is contained in:
Irmen de Jong
2026-01-10 21:43:58 +01:00
parent 34b228cfca
commit 5c7ac0efb7
3 changed files with 13 additions and 62 deletions

View File

@@ -55,7 +55,6 @@ class IRPeepholeOptimizer(private val irprog: IRProgram, private val retainSSA:
|| cleanupPushPop(chunk1, indexedInstructions)
|| simplifyConstantReturns(chunk1, indexedInstructions)
|| removeNeedlessLoads(chunk1, indexedInstructions)
|| useArrayIndexingInsteadOfAdds(chunk1, indexedInstructions)
|| loadfieldsAndStorefields(chunk1, indexedInstructions)
|| removeNops(chunk1, indexedInstructions) // last time, in case one of the optimizers replaced something with a nop
} while (changed)
@@ -597,60 +596,6 @@ jump p8_label_gen_2
return changed
}
private fun useArrayIndexingInsteadOfAdds(chunk: IRCodeChunk, indexedInstructions: List<IndexedValue<IRInstruction>>): Boolean {
var changed = false
indexedInstructions.reversed().forEach { (idx, ins) ->
if (ins.opcode == Opcode.ADD && ins.immediate!=null && idx>0) {
val load = indexedInstructions[idx-1].value
if((load.opcode==Opcode.LOAD) && load.labelSymbol!=null) {
val lastInstruction = indexedInstructions[idx+1].value
if(lastInstruction.opcode==Opcode.LOADI) {
if(lastInstruction.type== IRDataType.FLOAT) {
if(ins.reg1==lastInstruction.reg1!! && load.reg1==lastInstruction.reg1!!) {
val targetFpRegister = lastInstruction.fpReg1!!
val loadm = IRInstruction(Opcode.LOADM, lastInstruction.type, fpReg1 = targetFpRegister, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate)
chunk.instructions[idx-1] = loadm
chunk.instructions.removeAt(idx+1)
chunk.instructions.removeAt(idx)
changed = true
}
} else {
if(ins.reg1==lastInstruction.reg2!! && load.reg1==lastInstruction.reg2!!) {
val targetRegister = lastInstruction.reg1!!
val loadm = IRInstruction(Opcode.LOADM, lastInstruction.type, reg1=targetRegister, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate)
chunk.instructions[idx-1] = loadm
chunk.instructions.removeAt(idx+1)
chunk.instructions.removeAt(idx)
changed = true
}
}
} else if(lastInstruction.opcode==Opcode.STOREI) {
// TODO this optimization is never hit right now because of instruction sequence mismatch? (value evaluation is inbetween address and store)
val valueLoad = indexedInstructions[idx-2].value
val targetRegister = lastInstruction.reg1!!
if(lastInstruction.type==IRDataType.FLOAT) {
if(ins.reg1==lastInstruction.reg1!! && load.reg1==lastInstruction.reg1!!) {
TODO("peephole opt STOREI.float ${chunk.sourceLinesPositions}")
}
} else {
if(ins.reg1==lastInstruction.reg2!! && load.reg1==lastInstruction.reg2!!) {
if(valueLoad.opcode==Opcode.LOAD && valueLoad.reg1==targetRegister) {
val storem = IRInstruction(Opcode.STOREM, lastInstruction.type, reg1=valueLoad.reg1, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate)
chunk.instructions[idx-1] = storem
chunk.instructions.removeAt(idx+1)
chunk.instructions.removeAt(idx)
changed = true
}
}
}
}
}
}
}
return changed
}
private fun loadfieldsAndStorefields(chunk: IRCodeChunk, indexedInstructions: List<IndexedValue<IRInstruction>>): Boolean {
var changed = false
indexedInstructions.reversed().forEach { (idx, ins) ->

View File

@@ -72,8 +72,6 @@ IR/VM
- if instruction has both integer and float registers, the sequence of the registers is sometimes weird in the .p8ir file (float regs always at the end even when otherwise the target -integer- register is the first one in the list, for example.)
- rollback this exception?: "LOADI has an exception to allow reg1 and reg2 to be the same" + actual exception check in the check "reg1 must not be same as reg2"
- maybe change all branch instructions to have 2 exits (label if branch condition ture, and label if false) instead of 1, and get rid of the implicit "next code chunk" link between chunks.
- in funcPoke() and funcPokemon(): switch address and value evaluation, to allow easier STOREI peephole optimization later
- fix IR: TODO("peephole opt STOREI.float
- if float<0 / if word<0 uses sgn or load, but still use a bgt etc instruction after that with a #0 operand even though the sgn and load instructions sets the status bits already, so just use bstneg etc
- make multiple classes of registers and maybe also categorize by life time? , to prepare for better register allocation in the future
SYSCALL_ARGS, // Reserved for syscall arguments (r99000-99099, r99100-99199)
@@ -116,6 +114,8 @@ Libraries
Optimizations
-------------
- optimize expression: uwordvar + 256 and variants. (now uses 2 adds, can use 1 inc?)
- optimize successive additions/subtractions and multiplications (with const) into just a single add/sub, mul
- more optimized operator handling of different types, for example uword a ^ byte b now does a type cast of b to word first
- optimize longEqualsValue() for long const and variable operands to not assign needlessly to R14-R15.
- optimize optimizedBitwiseExpr() for long const and variable operands to not assign needlessly to R14-R15.

View File

@@ -10,28 +10,34 @@ main {
ubyte @shared @nozp ub,ub2,ub3,ub4 = 111
txt.print_ub(peek(&ub2 + 1))
txt.print_ub(peek(&ub2 + 2))
txt.spc()
poke(&ub2+1, 222)
txt.print_ub(peek(&ub2 + 1))
cx16.r0++
poke(&ub2+2, 222)
cx16.r0--
txt.print_ub(peek(&ub2 + 2))
txt.spc()
txt.print_ub(pokemon(&ub2 + 1, 99))
txt.print_ub(pokemon(&ub2 + 2, 99))
txt.spc()
txt.print_ub(peek(&ub2 + 1))
txt.print_ub(peek(&ub2 + 2))
txt.nl()
txt.nl()
txt.print_uw(peekw(&uw2 + 4))
txt.spc()
cx16.r0++
pokew(&uw2+ 4, 9999)
cx16.r0--
txt.print_uw(peekw(&uw2 + 4))
txt.nl()
txt.nl()
txt.print_f(peekf(&fv + sizeof(float)))
txt.spc()
cx16.r0++
pokef(&fv+ sizeof(float), 2.22222)
cx16.r0--
txt.print_f(peekf(&fv + sizeof(float)))
txt.nl()