optimized plasma examples even more

This commit is contained in:
Irmen de Jong
2023-09-05 00:23:50 +02:00
parent b125901717
commit 9fca978725
3 changed files with 20 additions and 14 deletions

View File

@@ -1,6 +1,9 @@
TODO TODO
==== ====
- fix compiler crash on "unroll for x in 0 to txt.DEFAULT_WIDTH-1"
- fix compiler error on "unroll txt.DEFAULT_WIDTH"
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?? - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction - IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction

View File

@@ -8,7 +8,7 @@
; Cleanup and porting to C by Ullrich von Bassewitz. ; Cleanup and porting to C by Ullrich von Bassewitz.
; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c ; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
; ;
; Converted to prog8 by Irmen de Jong. ; Optimized and Converted to prog8 by Irmen de Jong.
main { main {
@@ -73,14 +73,14 @@ main {
ubyte @zp x ubyte @zp x
ubyte @zp y ubyte @zp y
for y in 24 downto 0 { for y in 0 to 24 {
ybuf[y] = math.sin8u(c1a) + math.sin8u(c1b) ybuf[y] = math.sin8u(c1a) + math.sin8u(c1b)
c1a += 4 c1a += 4
c1b += 9 c1b += 9
} }
c1A += 3 c1A += 3
c1B -= 5 c1B -= 5
for x in 39 downto 0 { for x in 0 to 39 {
xbuf[x] = math.sin8u(c2a) + math.sin8u(c2b) xbuf[x] = math.sin8u(c2a) + math.sin8u(c2b)
c2a += 3 c2a += 3
c2b += 7 c2b += 7
@@ -89,17 +89,17 @@ main {
c2B -= 3 c2B -= 3
for y in 0 to 24 { for y in 0 to 24 {
ubyte @zp @shared yvalue = ybuf[y]
for x in 0 to 39 { for x in 0 to 39 {
@(screen+x) = xbuf[x] + ybuf[y] ; @(screen+x) = xbuf[x] + yvalue
; max optimized asm is this: (achieving ~21 fps on the C64): ; max optimized asm is this: (achieving ~21 fps on the C64):
; %asm {{ %asm {{
; ldy p8_y lda p8_yvalue
; lda p8_ybuf,y ldy p8_x
; ldy p8_x clc
; clc adc p8_xbuf,y
; adc p8_xbuf,y sta (p8_screen),y
; sta (p8_screen),y }}
; }}
} }
screen += 40 screen += 40
} }

View File

@@ -9,7 +9,7 @@
; Cleanup and porting to C by Ullrich von Bassewitz. ; Cleanup and porting to C by Ullrich von Bassewitz.
; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c ; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
; ;
; Converted to prog8 by Irmen de Jong. ; Optimized and Converted to prog8 by Irmen de Jong.
main { main {
@@ -77,11 +77,14 @@ main {
c2A += 2 c2A += 2
c2B -= 3 c2B -= 3
; sys.waitvsync() ; if you put this in it will run at 30 fps synced which looks really nice and smooth
; use vera auto increment writes to avoid slow txt.setchr(x, y, xbuf[x] + ybuf[y]) ; use vera auto increment writes to avoid slow txt.setchr(x, y, xbuf[x] + ybuf[y])
for y in 0 to txt.DEFAULT_HEIGHT-1 { for y in 0 to txt.DEFAULT_HEIGHT-1 {
cx16.vaddr_autoincr(1, VERA_TXTSCREEN + y*$0100, 0, 2) cx16.vaddr_autoincr(1, VERA_TXTSCREEN + y*$0100, 0, 2)
ubyte @zp yvalue = ybuf[y]
for x in 0 to txt.DEFAULT_WIDTH-1 { for x in 0 to txt.DEFAULT_WIDTH-1 {
cx16.VERA_DATA0 = xbuf[x] + ybuf[y] cx16.VERA_DATA0 = xbuf[x] + yvalue
} }
} }
} }