optimized plasma examples even more

2025-08-18 19:33:18 +00:00 · 2023-09-05 00:23:50 +02:00
parent b125901717
commit 9fca978725
3 changed files with 20 additions and 14 deletions
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,6 +1,9 @@
 TODO
 ====
 - fix compiler crash on "unroll for x in 0 to txt.DEFAULT_WIDTH-1"
 - fix compiler error on "unroll txt.DEFAULT_WIDTH"
 - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
 - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
 - IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
--- a/examples/c64/plasma.p8
+++ b/examples/c64/plasma.p8
@@ -8,7 +8,7 @@
 ;  Cleanup and porting to C by Ullrich von Bassewitz.
 ;  See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
 ;
-;  Converted to prog8 by Irmen de Jong.
+;  Optimized and Converted to prog8 by Irmen de Jong.
 main {
@@ -73,14 +73,14 @@ main {
        ubyte @zp x
        ubyte @zp y
-        for y in 24 downto 0 {
+        for y in 0 to 24 {
            ybuf[y] = math.sin8u(c1a) + math.sin8u(c1b)
            c1a += 4
            c1b += 9
        }
        c1A += 3
        c1B -= 5
-        for x in 39 downto 0 {
+        for x in 0 to 39 {
            xbuf[x] = math.sin8u(c2a) + math.sin8u(c2b)
            c2a += 3
            c2b += 7
@@ -89,17 +89,17 @@ main {
        c2B -= 3
        for y in 0 to 24 {
            ubyte @zp @shared yvalue = ybuf[y]
            for x in 0 to 39 {
-                @(screen+x) = xbuf[x] + ybuf[y]
+                ; @(screen+x) = xbuf[x] + yvalue
 ; max optimized asm is this: (achieving ~21 fps on the C64):
-;                %asm {{
+                %asm {{
-;                     ldy  p8_y
+                     lda  p8_yvalue
-;                     lda  p8_ybuf,y
+                     ldy  p8_x
-;                     ldy  p8_x
+                     clc
-;                     clc
+                     adc  p8_xbuf,y
-;                     adc  p8_xbuf,y
+                     sta  (p8_screen),y
-;                     sta  (p8_screen),y
+                 }}
 ;                 }}
            }
            screen += 40
        }
--- a/examples/cx16/plasma.p8
+++ b/examples/cx16/plasma.p8
@@ -9,7 +9,7 @@
 ;  Cleanup and porting to C by Ullrich von Bassewitz.
 ;  See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
 ;
-;  Converted to prog8 by Irmen de Jong.
+;  Optimized and Converted to prog8 by Irmen de Jong.
 main {
@@ -77,11 +77,14 @@ main {
        c2A += 2
        c2B -= 3
        ; sys.waitvsync()    ; if you put this in it will run at 30 fps synced which looks really nice and smooth
        ; use vera auto increment writes to avoid slow txt.setchr(x, y, xbuf[x] + ybuf[y])
        for y in 0 to txt.DEFAULT_HEIGHT-1 {
            cx16.vaddr_autoincr(1, VERA_TXTSCREEN + y*$0100, 0, 2)
            ubyte @zp yvalue = ybuf[y]
            for x in 0 to txt.DEFAULT_WIDTH-1 {
-                cx16.VERA_DATA0 = xbuf[x] + ybuf[y]
+                cx16.VERA_DATA0 = xbuf[x] + yvalue
            }
        }
    }