vm: some more peephole optimizations

This commit is contained in:
Irmen de Jong 2022-07-12 18:24:11 +02:00
parent edf12bec71
commit feb5c8be95
8 changed files with 133 additions and 33 deletions

View File

@ -11,7 +11,7 @@ sealed class PtExpression(val type: DataType, position: Position) : PtNode(posit
init {
if(type==DataType.BOOL)
throw java.lang.IllegalArgumentException("bool should have become ubyte @$position")
throw IllegalArgumentException("bool should have become ubyte @$position")
}
override fun printProperties() {
@ -134,7 +134,7 @@ class PtNumber(type: DataType, val number: Double, position: Position) : PtExpre
init {
if(type==DataType.BOOL)
throw java.lang.IllegalArgumentException("bool should have become ubyte @$position")
throw IllegalArgumentException("bool should have become ubyte @$position")
if(type!=DataType.FLOAT) {
val rounded = round(number)
if (rounded != number)

View File

@ -210,7 +210,7 @@ internal class BuiltinFuncGen(private val codeGen: CodeGen, private val exprGen:
DataType.ARRAY_UW -> Syscall.SORT_UWORD
DataType.ARRAY_W -> Syscall.SORT_WORD
DataType.STR -> Syscall.SORT_UBYTE
DataType.ARRAY_F -> throw java.lang.IllegalArgumentException("sorting a floating point array is not supported")
DataType.ARRAY_F -> throw IllegalArgumentException("sorting a floating point array is not supported")
else -> throw IllegalArgumentException("weird type to sort")
}
val code = VmCodeChunk()

View File

@ -2,6 +2,7 @@ package prog8.codegen.virtual
import prog8.vm.Instruction
import prog8.vm.Opcode
import prog8.vm.VmDataType
internal class VmOptimizerException(msg: String): Exception(msg)
@ -14,15 +15,13 @@ class VmPeepholeOptimizer(private val vmprog: AssemblyProgram, private val alloc
.filter { it.value is VmCodeInstruction }
.map { IndexedValue(it.index, (it.value as VmCodeInstruction).ins) }
val changed = removeNops(block, indexedInstructions)
|| removeDoubleLoadsAndStores(block, indexedInstructions)
// || removeUselessArithmetic(block, indexedInstructions) // TODO enable
|| removeDoubleLoadsAndStores(block, indexedInstructions) // TODO not yet implemented
|| removeUselessArithmetic(block, indexedInstructions)
|| removeWeirdBranches(block, indexedInstructions)
|| removeDoubleSecClc(block, indexedInstructions)
|| cleanupPushPop(block, indexedInstructions)
// TODO other optimizations:
// other useless logical?
// conditional set instructions with reg1==reg2
// move complex optimizations such as unused registers, ...
// more complex optimizations such as unused registers
} while(changed)
}
}
@ -79,7 +78,6 @@ class VmPeepholeOptimizer(private val vmprog: AssemblyProgram, private val alloc
private fun removeWeirdBranches(block: VmCodeChunk, indexedInstructions: List<IndexedValue<Instruction>>): Boolean {
// jump/branch to label immediately below
// branch instructions with reg1==reg2
var changed = false
indexedInstructions.reversed().forEach { (idx, ins) ->
if(ins.opcode==Opcode.JUMP && ins.labelSymbol!=null) {
@ -92,41 +90,60 @@ class VmPeepholeOptimizer(private val vmprog: AssemblyProgram, private val alloc
}
}
}
/*
beq reg1, reg2, location - jump to location in program given by location, if reg1 == reg2
bne reg1, reg2, location - jump to location in program given by location, if reg1 != reg2
blt reg1, reg2, location - jump to location in program given by location, if reg1 < reg2 (unsigned)
blts reg1, reg2, location - jump to location in program given by location, if reg1 < reg2 (signed)
ble reg1, reg2, location - jump to location in program given by location, if reg1 <= reg2 (unsigned)
bles reg1, reg2, location - jump to location in program given by location, if reg1 <= reg2 (signed)
bgt reg1, reg2, location - jump to location in program given by location, if reg1 > reg2 (unsigned)
bgts reg1, reg2, location - jump to location in program given by location, if reg1 > reg2 (signed)
bge reg1, reg2, location - jump to location in program given by location, if reg1 >= reg2 (unsigned)
bges reg1, reg2, location - jump to location in program given by location, if reg1 >= reg2 (signed)
*/
}
return changed
}
private fun removeUselessArithmetic(block: VmCodeChunk, indexedInstructions: List<IndexedValue<Instruction>>): Boolean {
// TODO this is hard to solve for the non-immediate instructions atm because the values are loaded into registers first
// note: this is hard to solve for the non-immediate instructions atm because the values are loaded into registers first
var changed = false
indexedInstructions.reversed().forEach { (idx, ins) ->
when (ins.opcode) {
Opcode.DIV, Opcode.DIVS, Opcode.MUL, Opcode.MOD -> {
TODO("remove div/mul by 1")
if (ins.value == 1) {
block.lines.removeAt(idx)
changed = true
}
}
Opcode.ADD, Opcode.SUB -> {
TODO("remove add/sub by 1 -> inc/dec, by 0->remove")
if (ins.value == 1) {
block.lines[idx] = VmCodeInstruction(
if (ins.opcode == Opcode.ADD) Opcode.INC else Opcode.DEC,
ins.type,
ins.reg1
)
changed = true
} else if (ins.value == 0) {
block.lines.removeAt(idx)
changed = true
}
}
Opcode.AND -> {
TODO("and 0 -> 0, and ffff -> remove")
if (ins.value == 0) {
block.lines[idx] = VmCodeInstruction(Opcode.LOAD, ins.type, reg1 = ins.reg1, value = 0)
changed = true
} else if (ins.value == 255 && ins.type == VmDataType.BYTE) {
block.lines.removeAt(idx)
changed = true
} else if (ins.value == 65535 && ins.type == VmDataType.WORD) {
block.lines.removeAt(idx)
changed = true
}
}
Opcode.OR -> {
TODO("or 0 -> remove, of ffff -> ffff")
if (ins.value == 0) {
block.lines.removeAt(idx)
changed = true
} else if ((ins.value == 255 && ins.type == VmDataType.BYTE) || (ins.value == 65535 && ins.type == VmDataType.WORD)) {
block.lines[idx] = VmCodeInstruction(Opcode.LOAD, ins.type, reg1 = ins.reg1, value = ins.value)
changed = true
}
}
Opcode.XOR -> {
TODO("xor 0 -> remove")
if (ins.value == 0) {
block.lines.removeAt(idx)
changed = true
}
}
else -> {}
}

View File

@ -1 +1 @@
8.3-dev
8.3

View File

@ -93,4 +93,78 @@ class TestVmPeepholeOpt: FunSpec({
(lines[0] as VmCodeInstruction).ins.reg1 shouldBe 222
(lines[0] as VmCodeInstruction).ins.reg2 shouldBe 99
}
test("remove useless div/mul, add/sub") {
val(asm, allocations) = makeVmProgram(listOf(
VmCodeInstruction(Opcode.DIV, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.DIVS, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.MUL, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.MOD, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.DIV, VmDataType.BYTE, reg1=42, value = 2),
VmCodeInstruction(Opcode.DIVS, VmDataType.BYTE, reg1=42, value = 2),
VmCodeInstruction(Opcode.MUL, VmDataType.BYTE, reg1=42, value = 2),
VmCodeInstruction(Opcode.MOD, VmDataType.BYTE, reg1=42, value = 2),
VmCodeInstruction(Opcode.ADD, VmDataType.BYTE, reg1=42, value = 0),
VmCodeInstruction(Opcode.SUB, VmDataType.BYTE, reg1=42, value = 0)
))
asm.lines().size shouldBe 10
val opt = VmPeepholeOptimizer(asm, allocations)
opt.optimize()
val lines = asm.lines()
lines.size shouldBe 4
}
test("replace add/sub 1 by inc/dec") {
val(asm, allocations) = makeVmProgram(listOf(
VmCodeInstruction(Opcode.ADD, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.SUB, VmDataType.BYTE, reg1=42, value = 1)
))
asm.lines().size shouldBe 2
val opt = VmPeepholeOptimizer(asm, allocations)
opt.optimize()
val lines = asm.lines()
lines.size shouldBe 2
(lines[0] as VmCodeInstruction).ins.opcode shouldBe Opcode.INC
(lines[1] as VmCodeInstruction).ins.opcode shouldBe Opcode.DEC
}
test("remove useless and/or/xor") {
val(asm, allocations) = makeVmProgram(listOf(
VmCodeInstruction(Opcode.AND, VmDataType.BYTE, reg1=42, value = 255),
VmCodeInstruction(Opcode.AND, VmDataType.WORD, reg1=42, value = 65535),
VmCodeInstruction(Opcode.OR, VmDataType.BYTE, reg1=42, value = 0),
VmCodeInstruction(Opcode.XOR, VmDataType.BYTE, reg1=42, value = 0),
VmCodeInstruction(Opcode.AND, VmDataType.BYTE, reg1=42, value = 200),
VmCodeInstruction(Opcode.AND, VmDataType.WORD, reg1=42, value = 60000),
VmCodeInstruction(Opcode.OR, VmDataType.BYTE, reg1=42, value = 1),
VmCodeInstruction(Opcode.XOR, VmDataType.BYTE, reg1=42, value = 1)
))
asm.lines().size shouldBe 8
val opt = VmPeepholeOptimizer(asm, allocations)
opt.optimize()
val lines = asm.lines()
lines.size shouldBe 4
}
test("replace and/or/xor by constant number") {
val(asm, allocations) = makeVmProgram(listOf(
VmCodeInstruction(Opcode.AND, VmDataType.BYTE, reg1=42, value = 0),
VmCodeInstruction(Opcode.AND, VmDataType.WORD, reg1=42, value = 0),
VmCodeInstruction(Opcode.OR, VmDataType.BYTE, reg1=42, value = 255),
VmCodeInstruction(Opcode.OR, VmDataType.WORD, reg1=42, value = 65535)
))
asm.lines().size shouldBe 4
val opt = VmPeepholeOptimizer(asm, allocations)
opt.optimize()
val lines = asm.lines()
lines.size shouldBe 4
(lines[0] as VmCodeInstruction).ins.opcode shouldBe Opcode.LOAD
(lines[1] as VmCodeInstruction).ins.opcode shouldBe Opcode.LOAD
(lines[2] as VmCodeInstruction).ins.opcode shouldBe Opcode.LOAD
(lines[3] as VmCodeInstruction).ins.opcode shouldBe Opcode.LOAD
(lines[0] as VmCodeInstruction).ins.value shouldBe 0
(lines[1] as VmCodeInstruction).ins.value shouldBe 0
(lines[2] as VmCodeInstruction).ins.value shouldBe 255
(lines[3] as VmCodeInstruction).ins.value shouldBe 65535
}
})

View File

@ -3,11 +3,9 @@ TODO
For next release
^^^^^^^^^^^^^^^^
- bool @shared bb = bb2 and true should not add typecast around bb2
...
Need help with
^^^^^^^^^^^^^^
- c128 target: various machine specific things (free zp locations, how banking works, getting the floating point routines working, ...)
@ -19,7 +17,6 @@ Future Things and Ideas
^^^^^^^^^^^^^^^^^^^^^^^
Compiler:
- add some more optimizations in vmPeepholeOptimizer
- on non-cx16 targets: have an option that if zeropage=FULL, moves the cx16 virtual registers to ZP (same location as on x16?)
needs the dynamic base address for the symbols in syslib.p8
also needs a trick to allocate them in ZP like Cx16Zeropage already does
@ -31,6 +28,7 @@ Compiler:
- vm: don't store symbol names in instructions to make optimizing the IR easier? but what about jumps to labels. And it's no longer readable by humans.
- vm: how to remove all unused subroutines? (in the 6502 assembly codegen, we let 64tass solve this for us)
- vm: rather than being able to jump to any 'address' (IPTR), use 'blocks' that have entry and exit points -> even better dead code elimination possible too
- vm: add ore optimizations in VmPeepholeOptimizer
- move the vm unit tests to codeGenVirtual module and remove virtualmachine dependency in the compiler module
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.

View File

@ -407,7 +407,18 @@ data class Instruction(
if(format.value && (value==null && labelSymbol==null))
throw IllegalArgumentException("$opcode: missing a value or labelsymbol")
if (fpReg1 != null || fpReg2 != null)
throw java.lang.IllegalArgumentException("$opcode: integer point instruction can't use floating point registers")
throw IllegalArgumentException("$opcode: integer point instruction can't use floating point registers")
}
if(opcode in setOf(Opcode.BEQ, Opcode.BNE, Opcode.BLT, Opcode.BLTS,
Opcode.BGT, Opcode.BGTS, Opcode.BLE, Opcode.BLES,
Opcode.BGE, Opcode.BGES,
Opcode.SEQ, Opcode.SNE, Opcode.SLT, Opcode.SLTS,
Opcode.SGT, Opcode.SGTS, Opcode.SLE, Opcode.SLES,
Opcode.SGE, Opcode.SGES)) {
if((type==VmDataType.FLOAT && fpReg1==fpReg2) || reg1==reg2) {
throw IllegalArgumentException("$opcode: reg1 and reg2 should be different")
}
}
}

View File

@ -231,7 +231,7 @@ class VirtualMachine(val memory: Memory, program: List<Instruction>) {
when(type) {
VmDataType.BYTE -> registers.setUB(reg, value.toUByte())
VmDataType.WORD -> registers.setUW(reg, value.toUShort())
VmDataType.FLOAT -> throw java.lang.IllegalArgumentException("attempt to set integer result register but float type")
VmDataType.FLOAT -> throw IllegalArgumentException("attempt to set integer result register but float type")
}
}