optimized plasma examples even more

This commit is contained in:
Irmen de Jong 2023-09-05 00:23:50 +02:00
parent b125901717
commit 9fca978725
3 changed files with 20 additions and 14 deletions

View File

@ -1,6 +1,9 @@
TODO
====
- fix compiler crash on "unroll for x in 0 to txt.DEFAULT_WIDTH-1"
- fix compiler error on "unroll txt.DEFAULT_WIDTH"
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction

View File

@ -8,7 +8,7 @@
; Cleanup and porting to C by Ullrich von Bassewitz.
; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
;
; Converted to prog8 by Irmen de Jong.
; Optimized and Converted to prog8 by Irmen de Jong.
main {
@ -73,14 +73,14 @@ main {
ubyte @zp x
ubyte @zp y
for y in 24 downto 0 {
for y in 0 to 24 {
ybuf[y] = math.sin8u(c1a) + math.sin8u(c1b)
c1a += 4
c1b += 9
}
c1A += 3
c1B -= 5
for x in 39 downto 0 {
for x in 0 to 39 {
xbuf[x] = math.sin8u(c2a) + math.sin8u(c2b)
c2a += 3
c2b += 7
@ -89,17 +89,17 @@ main {
c2B -= 3
for y in 0 to 24 {
ubyte @zp @shared yvalue = ybuf[y]
for x in 0 to 39 {
@(screen+x) = xbuf[x] + ybuf[y]
; @(screen+x) = xbuf[x] + yvalue
; max optimized asm is this: (achieving ~21 fps on the C64):
; %asm {{
; ldy p8_y
; lda p8_ybuf,y
; ldy p8_x
; clc
; adc p8_xbuf,y
; sta (p8_screen),y
; }}
%asm {{
lda p8_yvalue
ldy p8_x
clc
adc p8_xbuf,y
sta (p8_screen),y
}}
}
screen += 40
}

View File

@ -9,7 +9,7 @@
; Cleanup and porting to C by Ullrich von Bassewitz.
; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
;
; Converted to prog8 by Irmen de Jong.
; Optimized and Converted to prog8 by Irmen de Jong.
main {
@ -77,11 +77,14 @@ main {
c2A += 2
c2B -= 3
; sys.waitvsync() ; if you put this in it will run at 30 fps synced which looks really nice and smooth
; use vera auto increment writes to avoid slow txt.setchr(x, y, xbuf[x] + ybuf[y])
for y in 0 to txt.DEFAULT_HEIGHT-1 {
cx16.vaddr_autoincr(1, VERA_TXTSCREEN + y*$0100, 0, 2)
ubyte @zp yvalue = ybuf[y]
for x in 0 to txt.DEFAULT_WIDTH-1 {
cx16.VERA_DATA0 = xbuf[x] + ybuf[y]
cx16.VERA_DATA0 = xbuf[x] + yvalue
}
}
}