added optimized case for signed division by 2

2024-12-24 01:29:28 +00:00 · 2022-07-24 13:20:38 +02:00 · 2022-07-24 13:20:38 +02:00 · 046dceb5c2
commit 046dceb5c2
parent dcc1f00048
6 changed files with 70 additions and 40 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/ExpressionsAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/ExpressionsAsmGen.kt
@ -519,14 +519,39 @@ internal class ExpressionsAsmGen(private val program: Program,
                    val rightVal = expr.right.constValue(program)?.number?.toInt()
                    if(rightVal!=null && rightVal==2) {
                        translateExpressionInternal(expr.left)
-                        // shifting only yields the correct rounded result on unsinged numbers
-                        if(leftDt==DataType.UBYTE) {
-                            asmgen.out("  lsr  P8ESTACK_LO+1,x")
-                            return
-                        } else if(leftDt==DataType.UWORD) {
-                            asmgen.out("  lsr  P8ESTACK_HI+1,x |  ror  P8ESTACK_LO+1,x")
-                            return
+                        when (leftDt) {
+                            DataType.UBYTE -> {
+                                asmgen.out("  lsr  P8ESTACK_LO+1,x")
+                            }
+                            DataType.UWORD -> {
+                                asmgen.out("  lsr  P8ESTACK_HI+1,x |  ror  P8ESTACK_LO+1,x")
+                            }
+                            DataType.BYTE -> {
+                                // signed divide using shift needs adjusting of negative value to get correct rounding towards zero
+                                asmgen.out("""
+                                    lda  P8ESTACK_LO+1,x
+                                    bpl  +
+                                    inc  P8ESTACK_LO+1,x
+                                    lda  P8ESTACK_LO+1,x
+                                   asl  a
+                                    ror  P8ESTACK_LO+1,x""")
+                            }
+                            DataType.WORD -> {
+                                // signed divide using shift needs adjusting of negative value to get correct rounding towards zero
+                                asmgen.out("""
+                                    lda  P8ESTACK_HI+1,x
+                                    bpl  ++
+                                    inc  P8ESTACK_LO+1,x
+                                    bne  +
+                                    inc  P8ESTACK_HI+1,x
+                                   lda  P8ESTACK_HI+1,x
+                                   asl  a
+                                    ror  P8ESTACK_HI+1,x
+                                    ror  P8ESTACK_LO+1,x""")
+                            }
+                            else -> throw AssemblyError("weird dt")
                        }
+                        return
                    }
                }
            }
--- a/codeGenVirtual/src/prog8/codegen/virtual/CodeGen.kt
+++ b/codeGenVirtual/src/prog8/codegen/virtual/CodeGen.kt
@ -512,11 +512,7 @@ class CodeGen(internal val program: PtProgram,
            return code
        val pow2 = powersOfTwo.indexOf(factor)
        if(pow2==1 && !signed) {
-            // just shift 1 bit
-            code += if(signed)
-                VmCodeInstruction(Opcode.ASR, dt, reg1=reg)
-            else
-                VmCodeInstruction(Opcode.LSR, dt, reg1=reg)
+            code += VmCodeInstruction(Opcode.LSR, dt, reg1=reg)     // simple single bit shift
        }
        else if(pow2>=1 &&!signed) {
            // just shift multiple bits
@ -545,11 +541,7 @@ class CodeGen(internal val program: PtProgram,
            return code
        val pow2 = powersOfTwo.indexOf(factor)
        if(pow2==1 && !signed) {
-            // just shift 1 bit
-            code += if(signed)
-                VmCodeInstruction(Opcode.ASRM, dt, value=address)
-            else
-                VmCodeInstruction(Opcode.LSRM, dt, value=address)
+            code += VmCodeInstruction(Opcode.LSRM, dt, value=address)       // just simple bit shift
        }
        else if(pow2>=1 && !signed) {
            // just shift multiple bits
--- a/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt
+++ b/codeOptimizers/src/prog8/optimizer/ExpressionSimplifier.kt
@ -21,6 +21,7 @@ import kotlin.math.pow

 class ExpressionSimplifier(private val program: Program) : AstWalker() {
    private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
+    private val negativePowersOfTwo = powersOfTwo.map { -it }.toSet()

    override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
        val mods = mutableListOf<IAstModification>()
@ -469,7 +470,9 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
                }
                in powersOfTwo -> {
                    if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
-                        // unsigned number divided by a power of two => shift right
+                        // Unsigned number divided by a power of two => shift right
+                        // Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
+                        // so we leave that as is and let the code generator deal with it.
                        val numshifts = log2(cv).toInt()
                        return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
                    }
@ -529,6 +532,14 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
                        return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
                    }
                }
+                in negativePowersOfTwo -> {
+                    if (leftValue.inferType(program).isInteger) {
+                        // times a negative power of two => negate, then shift
+                        val numshifts = log2(-cv).toInt()
+                        val negation = PrefixExpression("-", expr2.left, expr.position)
+                        return BinaryExpression(negation, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
+                    }
+                }
            }
        }
        // no need to check for left val constant (because of associativity)
--- a/docs/source/programming.rst
+++ b/docs/source/programming.rst
@ -236,6 +236,11 @@ Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 fo
 The signed integers integers are in the range -128..127 for bytes,
 and -32768..32767 for words.

+.. caution::
+    Doing math on signed integers can result in code that is a lot larger and slower than
+    when using unsigned integers. Make sure you really need the signed numbers, otherwise
+    stick to unsigned integers for efficiency.
+

 Boolean values
 ^^^^^^^^^^^^^^
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,16 +3,12 @@ TODO

 For next release
 ^^^^^^^^^^^^^^^^
- Add optimized signed word division for factors of 2 (bit shifting but this time with correct rounding)
-    CodeGen divideByConst() and divideByConstInplace()
-    ExpressionsAsmGen translateExpression()
-    ExpressionSimplifier optimizeDivision() ?
-
 - add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
  (can be done on C64 only for now)   Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
  Adapt the code in AstPreprocessor that relocates the registers as well.
 - for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
  DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
+
 ...


@ -26,8 +22,7 @@ Need help with
 Future Things and Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^
 Compiler:
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is.
-  this info is needed for more advanced optimizations and later code generation steps.
+- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. This info is needed for more advanced optimizations and later code generation steps.
 - vm: implement remaining sin/cos functions in math.p8
 - vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
 - vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
@ -36,8 +31,8 @@ Compiler:
 - vm: add ore optimizations in VmPeepholeOptimizer
 - see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
  this is documented behavior to now loop around but it's too easy to forget about!
-    Lot of work because of so many special cases in ForLoopsAsmgen.....
-    How is it for the vm target? -> just 2 special cases in CodeGen.
+  Lot of work because of so many special cases in ForLoopsAsmgen.....
+  How is it for the vm target? -> just 2 special cases in CodeGen.
 - when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
  It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
  So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.
--- a/examples/test.p8
+++ b/examples/test.p8
@ -8,22 +8,24 @@ main {
        txt.nl()
    }

-    ; TODO test with new optimized division routines.
-
    sub start() {
-        byte qq = 1
-        byte bb = -51
-        derp((bb*qq)/-4, 1,2,3,4)
-        bb /= -4
-        txt.print_b(bb)
+        word qq = 1
+        word bb = -5051
+        derp((bb*qq)/-2, 1,2,3,4)
+        bb /= -2
+        txt.print_w(bb)
        txt.nl()
-        bb = 51
-        bb /= -4
-        txt.print_b(bb)
+        bb  = -5051
+        bb = -bb/2
+        txt.print_w(bb)
        txt.nl()
-        ubyte ubb = 51
-        ubb /= 4
-        txt.print_ub(ubb)
+        bb = 5051
+        bb /= -2
+        txt.print_w(bb)
+        txt.nl()
+        uword ubb = 5051
+        ubb /= 2
+        txt.print_uw(ubb)
        txt.nl()
    }
 }