improve plasma example

This commit is contained in:
Irmen de Jong 2023-09-04 20:19:04 +02:00
parent 525a9b5036
commit 7e5a9474fe
3 changed files with 55 additions and 30 deletions

View File

@ -1,6 +1,9 @@
TODO
====
- add special optimization for @(screen+i) = xbuf[x] + ybuf[y] and @(screen+i) = xbuf[x] - ybuf[y]
(noticable in plasma.p8 and cube examples?)
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm??
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction

View File

@ -1,5 +1,4 @@
%import syslib
%import test_stack
%import textio
%import math
@ -7,10 +6,11 @@
; which is (w)2001 by groepaz/hitmen
;
; Cleanup and porting to C by Ullrich von Bassewitz.
; See https://github.com/cc65/cc65/tree/master/samples/cbm/plasma.c
;
; Converted to prog8 by Irmen de Jong.
main {
const uword SCREEN1 = $E000
const uword SCREEN2 = $E400
@ -22,27 +22,39 @@ main {
sub start() {
txt.color(1)
txt.clear_screen()
txt.print("creating charset...\n")
txt.print("creating charset...\n\nwhile running, press key to stop.\n\n")
makechar()
ubyte block = c64.CIA2PRA
; ubyte v = cbm.VMCSB
ubyte v = c64.VMCSB
c64.CIA2PRA = (block & $FC) | (lsb(SCREEN1 >> 14) ^ $03)
repeat {
uword frames = 0
cbm.SETTIM(0,0,0)
while cbm.GETIN()==0 {
doplasma(SCREEN1)
c64.VMCSB = PAGE1
doplasma(SCREEN2)
c64.VMCSB = PAGE2
frames += 2
}
; restore screen (if you want)
;c64.VMCSB = v
;c64.CIA2PRA = block
;txt.print("done!\n")
;test_stack.test()
;repeat {
;}
uword jiffies = cbm.RDTIM16()
; restore screen and displays stats
c64.VMCSB = v
c64.CIA2PRA = block
txt.print("time in jiffies: ")
txt.print_uw(jiffies)
txt.print("\nframes: ")
txt.print_uw(frames)
uword fps = (frames*60)/jiffies
txt.print("\nfps: ")
txt.print_uw(fps)
txt.print("\ndone!\n")
repeat {
}
}
; several variables outside of doplasma to make them retain their value
@ -51,7 +63,7 @@ main {
ubyte c2A
ubyte c2B
sub doplasma(uword screen) {
sub doplasma(uword @zp screen) {
ubyte[40] xbuf
ubyte[25] ybuf
ubyte c1a = c1A
@ -78,17 +90,19 @@ main {
for y in 24 downto 0 {
for x in 39 downto 0 {
; using a temp var here to enable expression optimization that can't be done on a 'problematic' ROM/RAM memory location
ubyte @zp cc = xbuf[x] + ybuf[y]
@(screen+x) = cc
; this is the fastest way to do this inner part:
; split the array expression to avoid a prog8 temporary var inefficiency
; this pure prog8 version achieves ~17 fps
ubyte @zp tmp = ybuf[y]
@(screen+x) = xbuf[x] + tmp
; prog8 at this time needs a temp variable to calculate the above expression.
; in optimized asm, this is the fastest way to do this line (achieving ~21 fps on the C64):
; %asm {{
; ldy y
; lda ybuf,y
; ldy x
; ldy p8_y
; lda p8_ybuf,y
; ldy p8_x
; clc
; adc xbuf,y
; sta (screen),y
; adc p8_xbuf,y
; sta (p8_screen),y
; }}
}
screen += 40
@ -99,16 +113,15 @@ main {
ubyte[8] bittab = [ $01, $02, $04, $08, $10, $20, $40, $80 ]
ubyte c
for c in 0 to 255 {
ubyte @zp s = math.sin8u(c)
ubyte @zp s = math.sin8u(c) ; chance
ubyte i
; for all the pixels in the 8x8 character grid, determine (with a rnd chance) if they should be on or off
for i in 0 to 7 {
ubyte b=0
ubyte @zp ii
for ii in 0 to 7 {
; use 16 bit rng for a bit more randomness instead of the 8-bit rng
if math.rnd() > s {
if math.rnd() > s
b |= bittab[ii]
}
}
@(CHARSET + i + c*$0008) = b
}

View File

@ -4,10 +4,19 @@
main {
sub start() {
byte bb = 20
word ww= 300
ww += bb*3
txt.print_w(ww) ; 240
ubyte[5] xx = [11,22,33,44,55]
ubyte[5] yy = [101,102,103,104,105]
ubyte i=3
ubyte j = 4
uword screen
ubyte result = xx[i] + yy[j] ; TODO optimize to use add addr,y
txt.print_ub(result) ; 149
txt.nl()
result = xx[i] + yy[i] ; TODO optimize to use add addr,y
txt.print_ub(result) ; 148
txt.nl()
@(screen+i) = xx[i] + yy[i] ; TODO why is this using P8ZP_SCRATCH_B1?
; ubyte index = 100
; ubyte[] t_index = [1,2,3,4,5]