additional optimization to the bresenham line routines

2025-03-21 13:29:38 +00:00 · 2021-03-10 18:49:40 +01:00 · 2021-03-10 18:49:40 +01:00 · 1c1da8e38e
commit 1c1da8e38e
parent 50a306f492
3 changed files with 12 additions and 12 deletions
--- a/compiler/res/prog8lib/c64/graphics.p8
+++ b/compiler/res/prog8lib/c64/graphics.p8
@ -39,22 +39,20 @@ graphics {
            swap(x1, x2)
            swap(y1, y2)
        }
-        word @zp dx = (x2-x1) as word
-        word @zp dy = (y2-y1) as word
+        word @zp dx = (x2 as word)-x1
+        word @zp dy = (y2 as word)-y1

        if dx==0 {
-            vertical_line(x1, y1, abs(dy)+1 as ubyte)
+            vertical_line(x1, y1, abs(dy) as ubyte +1)
            return
        }
        if dy==0 {
            if x1>x2
                x1=x2
-            horizontal_line(x1, y1, abs(dx)+1 as uword)
+            horizontal_line(x1, y1, abs(dx) as uword +1)
            return
        }

-        ; TODO rewrite the rest in optimized assembly
-
        word @zp d = 0
        ubyte positive_ix = true
        if dx < 0 {
--- a/compiler/res/prog8lib/cx16/gfx2.p8
+++ b/compiler/res/prog8lib/cx16/gfx2.p8
@ -404,7 +404,7 @@ _done
                ; TODO also mostly usable for lores 4c?
                void addr_mul_24_for_highres_4c(y, x)      ; 24 bits result is in r0 and r1L (highest byte)

-                ; TODO optimize the loop in pure assembly
+                ; TODO optimize this vertical line loop in pure assembly
                color &= 3
                color <<= gfx2.plot.shift4c[lsb(x) & 3]
                ubyte mask = gfx2.plot.mask4c[lsb(x) & 3]
@ -438,21 +438,20 @@ _done
            swap(x1, x2)
            swap(y1, y2)
        }
-        word @zp dx = x2-x1 as word
-        word @zp dy = y2-y1 as word
+        word @zp dx = (x2 as word)-x1
+        word @zp dy = (y2 as word)-y1

        if dx==0 {
-            vertical_line(x1, y1, abs(dy)+1 as uword, color)
+            vertical_line(x1, y1, abs(dy) as uword +1, color)
            return
        }
        if dy==0 {
            if x1>x2
                x1=x2
-            horizontal_line(x1, y1, abs(dx)+1 as uword, color)
+            horizontal_line(x1, y1, abs(dx) as uword +1, color)
            return
        }

-        ; TODO rewrite the rest in optimized assembly (or reuse GRAPH_draw_line if we can get the FB replacement vector layer working)
        word @zp d = 0
        cx16.r13 = true      ; 'positive_ix'
        if dx < 0 {
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -2,6 +2,9 @@
 TODO
 ====

+- add a cmp(x,y) function that returns no value but only sets the status bits based off the comparison (can be used with a conditional jump afterwards)
+- optimize comparisons followed by a conditional jump ; try to not have to jsr to the comparison routines. (so if/while/do-until are faster)
+
 - optimize several inner loops in gfx2
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
 - optimize swap of two memread values with index, using the same pointer expression/variable, like swap(@(ptr+1), @(ptr+2))