From 3681d6ee1cf2e9c33662e934171e7216d606ca27 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sun, 21 Jul 2024 20:42:48 +0200 Subject: [PATCH] optimize division by powers of 2 better (into bit shifts) --- codeCore/src/prog8/code/core/Conversions.kt | 2 +- .../cpu6502/assignment/AssignmentAsmGen.kt | 6 +- .../assignment/AugmentableAssignmentAsmGen.kt | 62 +++++------ .../prog8/codegen/intermediate/IRCodeGen.kt | 104 +++++++++++------- .../prog8/optimizer/ExpressionSimplifier.kt | 11 +- docs/source/todo.rst | 6 +- examples/test.p8 | 2 +- 7 files changed, 104 insertions(+), 89 deletions(-) diff --git a/codeCore/src/prog8/code/core/Conversions.kt b/codeCore/src/prog8/code/core/Conversions.kt index 995fe2edf..9befdf9e1 100644 --- a/codeCore/src/prog8/code/core/Conversions.kt +++ b/codeCore/src/prog8/code/core/Conversions.kt @@ -3,7 +3,7 @@ package prog8.code.core import kotlin.math.abs import kotlin.math.pow -val powersOfTwoFloat = (1..16).map { (2.0).pow(it) }.toTypedArray() +val powersOfTwoFloat = (0..16).map { (2.0).pow(it) }.toTypedArray() val negativePowersOfTwoFloat = powersOfTwoFloat.map { -it }.toTypedArray() val powersOfTwoInt = (0..16).map { 2.0.pow(it).toInt() }.toTypedArray() diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index cd60c7b13..8d14d0f0b 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -933,11 +933,7 @@ internal class AssignmentAsmGen(private val program: PtProgram, } private fun optimizedDivideExpr(expr: PtBinaryExpression, target: AsmAssignTarget): Boolean { - val constDivisor = expr.right.asConstInteger() - if(constDivisor in powersOfTwoInt) { - println("TODO optimize: divide ${expr.type} by power-of-2 ${constDivisor} at ${expr.position}") // TODO - } - + // replacing division by shifting is done in an optimizer step. when(expr.type) { DataType.UBYTE -> { assignExpressionToRegister(expr.left, RegisterOrPair.A, false) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt index a79af94f8..4f739f438 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt @@ -1477,9 +1477,7 @@ $shortcutLabel:""") asmgen.out(" lda $name | ldy #$value | jsr math.multiply_bytes | sta $name") } "/" -> { - if(value in powersOfTwoInt) { - println("TODO optimize: (u)byte division by power-of-2 $value") // TODO - } + // replacing division by shifting is done in an optimizer step. if (dt == DataType.UBYTE) asmgen.out(" lda $name | ldy #$value | jsr math.divmod_ub_asm | sty $name") else @@ -1828,36 +1826,36 @@ $shortcutLabel:""") } } "/" -> { - if(value==0) + // replacing division by shifting is done in an optimizer step. + if(value==0) { throw AssemblyError("division by zero") - else if (value in powersOfTwoInt) { - println("TODO optimize: (u)word division by power-of-2 $value") // TODO - } - if(dt==DataType.WORD) { - asmgen.out(""" - lda $lsb - ldy $msb - sta P8ZP_SCRATCH_W1 - sty P8ZP_SCRATCH_W1+1 - lda #<$value - ldy #>$value - jsr math.divmod_w_asm - sta $lsb - sty $msb - """) - } - else { - asmgen.out(""" - lda $lsb - ldy $msb - sta P8ZP_SCRATCH_W1 - sty P8ZP_SCRATCH_W1+1 - lda #<$value - ldy #>$value - jsr math.divmod_uw_asm - sta $lsb - sty $msb - """) + } else { + if(dt==DataType.WORD) { + asmgen.out(""" + lda $lsb + ldy $msb + sta P8ZP_SCRATCH_W1 + sty P8ZP_SCRATCH_W1+1 + lda #<$value + ldy #>$value + jsr math.divmod_w_asm + sta $lsb + sty $msb + """) + } + else { + asmgen.out(""" + lda $lsb + ldy $msb + sta P8ZP_SCRATCH_W1 + sty P8ZP_SCRATCH_W1+1 + lda #<$value + ldy #>$value + jsr math.divmod_uw_asm + sta $lsb + sty $msb + """) + } } } "%" -> { diff --git a/codeGenIntermediate/src/prog8/codegen/intermediate/IRCodeGen.kt b/codeGenIntermediate/src/prog8/codegen/intermediate/IRCodeGen.kt index a8bef8875..0198b12e1 100644 --- a/codeGenIntermediate/src/prog8/codegen/intermediate/IRCodeGen.kt +++ b/codeGenIntermediate/src/prog8/codegen/intermediate/IRCodeGen.kt @@ -783,19 +783,31 @@ class IRCodeGen( if(factor==1) return code val pow2 = powersOfTwoInt.indexOf(factor) - // TODO also try to optimize for signed division by powers of 2 - if(pow2==1 && !signed) { - code += IRInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift - } - else if(pow2>=1 &&!signed) { - // just shift multiple bits (unsigned) - val pow2reg = registers.nextFree() - code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2) - code += if(signed) - IRInstruction(Opcode.ASRN, dt, reg1=reg, reg2=pow2reg) - else - IRInstruction(Opcode.LSRN, dt, reg1=reg, reg2=pow2reg) + if(pow2>=0) { + if(signed) { + if(pow2==1) { + // simple single bit shift (signed) + code += IRInstruction(Opcode.ASR, dt, reg1=reg) + } else { + // just shift multiple bits (signed) + val pow2reg = registers.nextFree() + code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2) + code += IRInstruction(Opcode.ASRN, dt, reg1=reg, reg2=pow2reg) + } + } else { + if(pow2==1) { + // simple single bit shift (unsigned) + code += IRInstruction(Opcode.LSR, dt, reg1=reg) + } else { + // just shift multiple bits (unsigned) + val pow2reg = registers.nextFree() + code += IRInstruction(Opcode.LOAD, dt, reg1 = pow2reg, immediate = pow2) + code += IRInstruction(Opcode.LSRN, dt, reg1 = reg, reg2 = pow2reg) + } + } + return code } else { + // regular div code += if (factor == 0) { IRInstruction(Opcode.LOAD, dt, reg1=reg, immediate = 0xffff) } else { @@ -804,8 +816,8 @@ class IRCodeGen( else IRInstruction(Opcode.DIV, dt, reg1=reg, immediate = factor) } + return code } - return code } internal fun divideByConstInplace(dt: IRDataType, knownAddress: Int?, symbol: String?, factor: Int, signed: Boolean): IRCodeChunk { @@ -813,31 +825,47 @@ class IRCodeGen( if(factor==1) return code val pow2 = powersOfTwoInt.indexOf(factor) - // TODO also try to optimize for signed division by powers of 2 - if(pow2==1 && !signed) { - // just simple bit shift - code += if(knownAddress!=null) - IRInstruction(Opcode.LSRM, dt, address = knownAddress) - else - IRInstruction(Opcode.LSRM, dt, labelSymbol = symbol) + if(pow2>=0) { + // can do bit shift instead of division + if(signed) { + if(pow2==1) { + // just simple bit shift (signed) + code += if (knownAddress != null) + IRInstruction(Opcode.ASRM, dt, address = knownAddress) + else + IRInstruction(Opcode.ASRM, dt, labelSymbol = symbol) + } else { + // just shift multiple bits (signed) + val pow2reg = registers.nextFree() + code += IRInstruction(Opcode.LOAD, dt, reg1 = pow2reg, immediate = pow2) + code += if (knownAddress != null) + IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, address = knownAddress) + else + IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, labelSymbol = symbol) + } + } else { + if(pow2==1) { + // just simple bit shift (unsigned) + code += if(knownAddress!=null) + IRInstruction(Opcode.LSRM, dt, address = knownAddress) + else + IRInstruction(Opcode.LSRM, dt, labelSymbol = symbol) + } + else { + // just shift multiple bits (unsigned) + val pow2reg = registers.nextFree() + code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2) + code += if(knownAddress!=null) + IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, address = knownAddress) + else + IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, labelSymbol = symbol) + } + } + return code } - else if(pow2>=1 && !signed) { - // just shift multiple bits (unsigned) - val pow2reg = registers.nextFree() - code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2) - code += if(signed) { - if(knownAddress!=null) - IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, address = knownAddress) - else - IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, labelSymbol = symbol) - } - else { - if(knownAddress!=null) - IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, address = knownAddress) - else - IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, labelSymbol = symbol) - } - } else { + else + { + // regular div if (factor == 0) { val reg = registers.nextFree() code += IRInstruction(Opcode.LOAD, dt, reg1=reg, immediate = 0xffff) @@ -862,8 +890,8 @@ class IRCodeGen( IRInstruction(Opcode.DIVM, dt, reg1 = factorReg, labelSymbol = symbol) } } + return code } - return code } private fun translate(ifElse: PtIfElse): IRCodeChunks { diff --git a/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt b/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt index ef345de54..430f2a329 100644 --- a/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt +++ b/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt @@ -708,6 +708,7 @@ class ExpressionSimplifier(private val program: Program, private val options: Co return null val leftDt = leftIDt.getOr(DataType.UNDEFINED) when (cv) { + 0.0 -> return null // fall through to regular float division to properly deal with division by zero -1.0 -> { // '/' -> -left if (expr.operator == "/") { @@ -736,14 +737,10 @@ class ExpressionSimplifier(private val program: Program, private val options: Co } } in powersOfTwoFloat -> { - if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) { - // Unsigned number divided by a power of two => shift right - // Signed number can't simply be bitshifted in this case (due to rounding issues for negative values), - // so we leave that as is and let the code generator deal with it. - val numshifts = log2(cv).toInt() + val numshifts = powersOfTwoFloat.indexOf(cv) + if (leftDt in IntegerDatatypes) { + // division by a power of two => shift right (signed and unsigned) return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position) - } else { - println("TODO optimize: divide by power-of-2 $cv at ${expr.position}") // TODO } } } diff --git a/docs/source/todo.rst b/docs/source/todo.rst index e6fc06821..d8b453f33 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -5,11 +5,7 @@ See open issues on github. Re-generate the skeletons doc files. -optimize byte/word division by powers of 2 (and shift right?), it's now often still using divmod routine. (also % ?) - see the TODOs in inplacemodificationByteVariableWithLiteralval(), inplacemodificationSomeWordWithLiteralval(), optimizedDivideExpr(), - and finally in optimizeDivision() - and for IR: see divideByConst() / divideByConstInplace() in IRCodeGen - +optimize signed word bit shifting?: 1 shift right of AX signed word: stx P8ZP_SCRATCH_B1 cpx #$80 diff --git a/examples/test.p8 b/examples/test.p8 index 77e2a19ee..9043f958a 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -61,7 +61,7 @@ main { } sub unsigned() { - txt.print("unsigned\n") + txt.print("\nunsigned\n") ubyte @shared ubvalue = 88 uword @shared uwvalue = 8888