diff --git a/codeGenIntermediate/src/prog8/codegen/intermediate/IRPeepholeOptimizer.kt b/codeGenIntermediate/src/prog8/codegen/intermediate/IRPeepholeOptimizer.kt index bb392df58..10262ef76 100644 --- a/codeGenIntermediate/src/prog8/codegen/intermediate/IRPeepholeOptimizer.kt +++ b/codeGenIntermediate/src/prog8/codegen/intermediate/IRPeepholeOptimizer.kt @@ -55,7 +55,6 @@ class IRPeepholeOptimizer(private val irprog: IRProgram, private val retainSSA: || cleanupPushPop(chunk1, indexedInstructions) || simplifyConstantReturns(chunk1, indexedInstructions) || removeNeedlessLoads(chunk1, indexedInstructions) - || useArrayIndexingInsteadOfAdds(chunk1, indexedInstructions) || loadfieldsAndStorefields(chunk1, indexedInstructions) || removeNops(chunk1, indexedInstructions) // last time, in case one of the optimizers replaced something with a nop } while (changed) @@ -597,60 +596,6 @@ jump p8_label_gen_2 return changed } - private fun useArrayIndexingInsteadOfAdds(chunk: IRCodeChunk, indexedInstructions: List>): Boolean { - var changed = false - indexedInstructions.reversed().forEach { (idx, ins) -> - if (ins.opcode == Opcode.ADD && ins.immediate!=null && idx>0) { - val load = indexedInstructions[idx-1].value - if((load.opcode==Opcode.LOAD) && load.labelSymbol!=null) { - val lastInstruction = indexedInstructions[idx+1].value - if(lastInstruction.opcode==Opcode.LOADI) { - if(lastInstruction.type== IRDataType.FLOAT) { - if(ins.reg1==lastInstruction.reg1!! && load.reg1==lastInstruction.reg1!!) { - val targetFpRegister = lastInstruction.fpReg1!! - val loadm = IRInstruction(Opcode.LOADM, lastInstruction.type, fpReg1 = targetFpRegister, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate) - chunk.instructions[idx-1] = loadm - chunk.instructions.removeAt(idx+1) - chunk.instructions.removeAt(idx) - changed = true - } - } else { - if(ins.reg1==lastInstruction.reg2!! && load.reg1==lastInstruction.reg2!!) { - val targetRegister = lastInstruction.reg1!! - val loadm = IRInstruction(Opcode.LOADM, lastInstruction.type, reg1=targetRegister, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate) - chunk.instructions[idx-1] = loadm - chunk.instructions.removeAt(idx+1) - chunk.instructions.removeAt(idx) - changed = true - } - } - } else if(lastInstruction.opcode==Opcode.STOREI) { - // TODO this optimization is never hit right now because of instruction sequence mismatch? (value evaluation is inbetween address and store) - val valueLoad = indexedInstructions[idx-2].value - val targetRegister = lastInstruction.reg1!! - if(lastInstruction.type==IRDataType.FLOAT) { - if(ins.reg1==lastInstruction.reg1!! && load.reg1==lastInstruction.reg1!!) { - TODO("peephole opt STOREI.float ${chunk.sourceLinesPositions}") - } - } else { - if(ins.reg1==lastInstruction.reg2!! && load.reg1==lastInstruction.reg2!!) { - if(valueLoad.opcode==Opcode.LOAD && valueLoad.reg1==targetRegister) { - val storem = IRInstruction(Opcode.STOREM, lastInstruction.type, reg1=valueLoad.reg1, labelSymbol = load.labelSymbol, symbolOffset = ins.immediate) - chunk.instructions[idx-1] = storem - chunk.instructions.removeAt(idx+1) - chunk.instructions.removeAt(idx) - changed = true - } - } - } - } - } - } - } - - return changed - } - private fun loadfieldsAndStorefields(chunk: IRCodeChunk, indexedInstructions: List>): Boolean { var changed = false indexedInstructions.reversed().forEach { (idx, ins) -> diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 825a21515..5c18cb001 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -72,8 +72,6 @@ IR/VM - if instruction has both integer and float registers, the sequence of the registers is sometimes weird in the .p8ir file (float regs always at the end even when otherwise the target -integer- register is the first one in the list, for example.) - rollback this exception?: "LOADI has an exception to allow reg1 and reg2 to be the same" + actual exception check in the check "reg1 must not be same as reg2" - maybe change all branch instructions to have 2 exits (label if branch condition ture, and label if false) instead of 1, and get rid of the implicit "next code chunk" link between chunks. -- in funcPoke() and funcPokemon(): switch address and value evaluation, to allow easier STOREI peephole optimization later -- fix IR: TODO("peephole opt STOREI.float - if float<0 / if word<0 uses sgn or load, but still use a bgt etc instruction after that with a #0 operand even though the sgn and load instructions sets the status bits already, so just use bstneg etc - make multiple classes of registers and maybe also categorize by life time? , to prepare for better register allocation in the future SYSCALL_ARGS, // Reserved for syscall arguments (r99000-99099, r99100-99199) @@ -116,6 +114,8 @@ Libraries Optimizations ------------- +- optimize expression: uwordvar + 256 and variants. (now uses 2 adds, can use 1 inc?) +- optimize successive additions/subtractions and multiplications (with const) into just a single add/sub, mul - more optimized operator handling of different types, for example uword a ^ byte b now does a type cast of b to word first - optimize longEqualsValue() for long const and variable operands to not assign needlessly to R14-R15. - optimize optimizedBitwiseExpr() for long const and variable operands to not assign needlessly to R14-R15. diff --git a/examples/test.p8 b/examples/test.p8 index ebac75f70..c7665b7d1 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -10,28 +10,34 @@ main { ubyte @shared @nozp ub,ub2,ub3,ub4 = 111 - txt.print_ub(peek(&ub2 + 1)) + txt.print_ub(peek(&ub2 + 2)) txt.spc() - poke(&ub2+1, 222) - txt.print_ub(peek(&ub2 + 1)) + cx16.r0++ + poke(&ub2+2, 222) + cx16.r0-- + txt.print_ub(peek(&ub2 + 2)) txt.spc() - txt.print_ub(pokemon(&ub2 + 1, 99)) + txt.print_ub(pokemon(&ub2 + 2, 99)) txt.spc() - txt.print_ub(peek(&ub2 + 1)) + txt.print_ub(peek(&ub2 + 2)) txt.nl() txt.nl() txt.print_uw(peekw(&uw2 + 4)) txt.spc() + cx16.r0++ pokew(&uw2+ 4, 9999) + cx16.r0-- txt.print_uw(peekw(&uw2 + 4)) txt.nl() txt.nl() txt.print_f(peekf(&fv + sizeof(float))) txt.spc() + cx16.r0++ pokef(&fv+ sizeof(float), 2.22222) + cx16.r0-- txt.print_f(peekf(&fv + sizeof(float))) txt.nl()