From 931e55fb5df1acf7b63e90ef82487cb512d18b14 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Sun, 17 May 2020 00:17:25 -0400 Subject: [PATCH] memories: add fast plotting to sierpinski --- gr-sim/hellmood_memories/hellmood_gr.c | 106 ++++++------- gr-sim/hellmood_memories/hellmood_simple.c | 175 ++++----------------- hellmood_memories/Makefile | 2 +- hellmood_memories/fast_plot.s | 76 ++++----- hellmood_memories/multiply_s8x8.s | 2 - hellmood_memories/multiply_u8x8.s | 3 + hellmood_memories/sier.s | 31 +++- 7 files changed, 150 insertions(+), 245 deletions(-) diff --git a/gr-sim/hellmood_memories/hellmood_gr.c b/gr-sim/hellmood_memories/hellmood_gr.c index a73886d0..13518a6f 100644 --- a/gr-sim/hellmood_memories/hellmood_gr.c +++ b/gr-sim/hellmood_memories/hellmood_gr.c @@ -295,18 +295,37 @@ static int fx2(int xx, int yy, int xprime) { /* parallax checkerboard */ static int fx3(int xx,int yy,int xprime) { - return 0; -} -#if 0 - dx=((yy&0xff)<<8) | (xprime&0xff); + unsigned short ax,bx,cx,dx; + int cf=0,zf=0,result; cx=frame; // mov cx,bp ; set init point to time bx=-16; // mov bx,-16 ; limit to 16 iterations fx3L: - cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer - ax=819; // mov ax,819 ; magic, related to Rrrola - imul_16(cx); // imul cx ; get X',Y' in DX + cx=cx+(yy*40)+xx; +// cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer +// ax=819; // mov ax,819 ; magic, related to Rrrola + // 819 * 320 = 0x3ffc0 + // 819 * 1 = 0x333 + // want Z*1 = 0x40 + // 0x333 = cccd/40 (/64) + + // 40=$28 48= $30 + // 1 = $0001 + // 39 =$0027 + // 40= $0100 6.4 + + // 65536/40 = 1638.4 / 40 = + // 28f5 + + ax=0x28f5; + + result=ax*cx; +// ax=result&0xffff; + //dx=(result>>16); + dx=(result>>24); + + //imul_16(cx); // imul cx ; get X',Y' in DX cf=dx&1; // ror dx,1 ; set carry flag on "hit" dx=dx>>1; if (cf) { @@ -328,69 +347,46 @@ fx3L: return ax; } -#endif + /* sierpinski rotozoomer */ static int fx4(int xx, int yy, int xprime) { - return 0; -} -#if 0 - unsigned char dl,dh,bh,al; - dx=((yy&0xff)<<8) | (xprime&0xff); + unsigned char dh,bh; + unsigned short color,t,xsext; + int temp; - dl=dx&0xff; dh=(dx>>8)&0xff; + t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero + t=t<<3; // sal cx,3 ; speed up by factor of 8! - cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero - cx=cx<<3; // sal cx,3 ; speed up by factor of 8! - ax=(dh&0xff); // movzx ax,dh ; get X into AL - // movsx dx,dl ; get Y into DL - if (dl&0x80) { - dx|=0xff00; - } - else { - dx&=0x00ff; - } + yy*=4; - bx=ax; // mov bx,ax ; save X in BX - imul_16_bx(cx); // imul bx,cx ; BX=X*T + /* sign extend X */ + xsext=xprime*8; // get X into DL + if (xsext&0x80*8) xsext|=0xff00*8; + else xsext&=0x00ff*8; - /* bl=bx&0xff; */ bh=(bx>>8)&0xff; - dl=dx&0xff; dh=(dx>>8)&0xff; + temp=yy*t; // temp=Y*T + bh=(temp>>8)+xsext; // bh=((y*t)/256)+X - bh=bh+dl; // add bh,dl ; bh=x*t/256+Y + temp=xsext*t; // temp=X*T + dh=(temp>>8)&0xff; // dh=(X*T/256) - imul_16_dx(cx); // imul dx,cx ; dx=Y*T + color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X) - dl=dx&0xff; dh=(dx>>8)&0xff; + // and al,252 ; thicker sierpinksi - // sub al,dh ; al=X-Y*T/256 - al=ax&0xff; // ah=(ax>>8)&0xff; - al=al-dh; + if ((color&252)==0) { + color=0x9; // otherwise: a nice orange + } + else { + color=0; // leave black if not sierpinksi + } - // and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y) - al=al&bh; - al=al&252; // and al,252 ; thicker sierpinksi - if (al==0) zf=1; - else zf=0; - cf=0; of=0; - // salc ; set pixel value to black - if (cf==0) al=0; - else al=0xff; + return color; -/* NOTE: remove the line below and the background becomes a rainbow */ - ax=al; - // jnz fx4q ; leave black if not sierpinksi - if (zf==0) goto fx4q; - - ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange - ax|=0x2a; -fx4q: - ; - return ax; } -#endif /* raycast bent tunnel */ static int fx5(int xx, int yy, int xprime) { @@ -543,7 +539,7 @@ int main(int argc, char **argv) { // frame=0x13; - frame=2*512; + frame=3*512; while(1) { for(yy=0;yy<48;yy++) { diff --git a/gr-sim/hellmood_memories/hellmood_simple.c b/gr-sim/hellmood_memories/hellmood_simple.c index 96be8771..828186ba 100644 --- a/gr-sim/hellmood_memories/hellmood_simple.c +++ b/gr-sim/hellmood_memories/hellmood_simple.c @@ -307,87 +307,6 @@ static void imul_16(short value) { } -/* signed multiply */ -static void imul_16_bx(short value) { - - int result; - short src; - - src=bx; - - result=src*value; - -// printf("imul: %d*%d=%d ",src,value,result); - - bx=(result&0xffff); - - if (bx==result) { - cf=0; - of=0; - } - else { - cf=1; - of=1; - } - -} - -/* signed multiply */ -static void imul_16_dx(short value) { - - int result; - short src; - - src=dx; - - result=src*value; - -// printf("imul: %d*%d=%d ",src,value,result); - - dx=(result&0xffff); - - if (dx==result) { - cf=0; - of=0; - } - else { - cf=1; - of=1; - } - -} - - - -#if 0 - -/* unsigned divide */ -static void div_8(unsigned char value) { - - unsigned char r,q; - unsigned int result,remainder; - -// printf("Dividing %d (%x) by %d (%x): ",ax,ax,value,value); - - if (value==0) { - printf("Divide by zero!\n"); - return; - } - - result=ax/value; - remainder=ax%value; - - q=result; - r=remainder; - -// printf("Result: q=%d r=%d\n",q,r); - - ax=(r<<8)|(q&0xff); - -} - -#endif - static void push(int value) { //printf("Pushing %x\n",value); stack[sp]=value; @@ -468,92 +387,62 @@ static int fx2(int xx, int yy, int xprime) { /* parallax checkerboard */ static int fx3(int xx,int yy,int xprime) { - dx=((yy&0xff)<<8) | (xprime&0xff); + unsigned short color; cx=frame; // mov cx,bp ; set init point to time bx=-16; // mov bx,-16 ; limit to 16 iterations fx3L: cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer - ax=819; // mov ax,819 ; magic, related to Rrrola +// ax=0x333; // mov ax,819 ; magic, related to Rrrola + ax=0xcccd/64; imul_16(cx); // imul cx ; get X',Y' in DX - cf=dx&1; // ror dx,1 ; set carry flag on "hit" - dx=dx>>1; - if (cf) { - dx|=0x8000; - } - else { - dx&=0x7fff; - } bx++; // inc bx ; increment iteration count - if (bx==0) zf=1;// does not affect carry flag - else zf=0; + + // check bottom bit of top word of multiply // ja fx3L ; loop until "hit" or "iter=max" // jump above, if cf==0 and zf==0 - if ((cf==0) && (zf==0)) goto fx3L; - ax=bx+31; // lea ax,[bx+32] ; map value to standard gray scale + if ((bx!=0) && ((dx&1)==0)) goto fx3L; + + color=bx+31; // lea ax,[bx+32] ; map value to standard gray scale //printf("%d %d\n",ax,bx); - return ax; + return color; } /* sierpinski rotozoomer */ static int fx4(int xx, int yy, int xprime) { - unsigned char dl,dh,bh,al; + unsigned char dh,bh; + unsigned short color,t,xsext; + int temp; - dx=((yy&0xff)<<8) | (xprime&0xff); + t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero + t=t<<3; // sal cx,3 ; speed up by factor of 8! - dl=dx&0xff; dh=(dx>>8)&0xff; + /* sign extend X */ + xsext=xprime; // get X into DL + if (xsext&0x80) xsext|=0xff00; + else xsext&=0x00ff; - cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero - cx=cx<<3; // sal cx,3 ; speed up by factor of 8! - ax=(dh&0xff); // movzx ax,dh ; get X into AL - // movsx dx,dl ; get Y into DL - if (dl&0x80) { - dx|=0xff00; + temp=yy*t; // temp=Y*T + bh=(temp>>8)+xsext; // bh=((y*t)/256)+X + + temp=xsext*t; // temp=X*T + dh=(temp>>8)&0xff; // dh=(X*T/256) + + color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X) + + // and al,252 ; thicker sierpinksi + if ((color&252)==0) { + color=0x2a; // otherwise: a nice orange } else { - dx&=0x00ff; + color=0; // leave black if not sierpinksi } - bx=ax; // mov bx,ax ; save X in BX - imul_16_bx(cx); // imul bx,cx ; BX=X*T - - /* bl=bx&0xff; */ bh=(bx>>8)&0xff; - dl=dx&0xff; dh=(dx>>8)&0xff; - - bh=bh+dl; // add bh,dl ; bh=x*t/256+Y - - imul_16_dx(cx); // imul dx,cx ; dx=Y*T - - dl=dx&0xff; dh=(dx>>8)&0xff; - - // sub al,dh ; al=X-Y*T/256 - al=ax&0xff; // ah=(ax>>8)&0xff; - al=al-dh; - - // and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y) - al=al&bh; - al=al&252; // and al,252 ; thicker sierpinksi - if (al==0) zf=1; - else zf=0; - cf=0; of=0; - // salc ; set pixel value to black - if (cf==0) al=0; - else al=0xff; - -/* NOTE: remove the line below and the background becomes a rainbow */ - ax=al; - // jnz fx4q ; leave black if not sierpinksi - if (zf==0) goto fx4q; - - ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange - ax|=0x2a; -fx4q: - ; - return ax; + return color; } @@ -740,7 +629,7 @@ int main(int argc, char **argv) { // frame=0x13; es=0xa000-10; - frame=2*512; + frame=3*512; while(1) { for(yy=0;yy<200;yy++) { diff --git a/hellmood_memories/Makefile b/hellmood_memories/Makefile index a363bba5..7c187ca8 100644 --- a/hellmood_memories/Makefile +++ b/hellmood_memories/Makefile @@ -48,7 +48,7 @@ plane.o: plane.s SIER: sier.o ld65 -o SIER sier.o -C ../linker_scripts/apple2_1000.inc -sier.o: sier.s +sier.o: sier.s fast_plot.s ca65 -o sier.o sier.s -l sier.lst ### diff --git a/hellmood_memories/fast_plot.s b/hellmood_memories/fast_plot.s index f600eaac..4598d4f5 100644 --- a/hellmood_memories/fast_plot.s +++ b/hellmood_memories/fast_plot.s @@ -2,62 +2,64 @@ ; roughly $4D (77) cycles ; note: ROM plot routine takes $5D (93) cycles + +; original, with SMC code = 77 cycles +; using indirect-Y/GBASL = 69 cycles +; split gr_offsets table = 61 cycles + + ; color in COLOR ; horiz=y, vert=A (A trashed, XY Saved) - - fast_plot: stx TEMPX ; 3 - ror ; get low bit in carry ; 2 - php ; store for later ; 3 - asl ; 2 + lsr ; get low bit in carry ; 2 tax ; 2 - lda gr_offsets,X ; 4+ - sta plot_smc+1 ; 4 - sta load_old_smc+1 ; 4 - lda gr_offsets+1,X ; 4+ - sta plot_smc+2 ; 4 - sta load_old_smc+2 ; 4 + lda gr_offsets_l,X ; 4+ + sta GBASL ; 3 + lda gr_offsets_h,X ; 4+ + sta GBASH ; 3 ;=== - ; 36 + ; 21 -;4->3,4->3,4->0,4->0, 4->5, 5->6 - -load_old_smc: - lda $400,Y ; 4+ - plp ; 4 - bcs plot_bottom ; 2/3t - ;===== - ; 11 -plot_top: - and #$f0 ; 2 - sta TEMP ; 3 lda COLOR ; 3 - and #$0f ; 2 - ora TEMP ; 3 - jmp plot_smc ; 3 - ;==== - ; 16 + bcs plot_top ; 2/3t plot_bottom: - and #$0f ; 2 + and #$0f ; 3 sta TEMP ; 3 - lda COLOR ; 3 - and #$f0 ; 2 + lda #$f0 ; 2 + and (GBASL),Y ; 5 + jmp plot_common ; 3 +plot_top: + and #$f0 ; 3 + sta TEMP ; 3 + lda #$0f ; 2 + and (GBASL),Y ; 5 + +plot_common: ora TEMP ; 3 -plot_smc: - sta $400,Y ; 5 + sta (GBASL),Y ; 6 + ;==== + ; 31 ldx TEMPX ; 3 rts ; 6 ;==== - ; 14 + ; 9 ;.align $100 gr_offsets: - .word $400,$480,$500,$580,$600,$680,$700,$780 - .word $428,$4a8,$528,$5a8,$628,$6a8,$728,$7a8 - .word $450,$4d0,$550,$5d0,$650,$6d0,$750,$7d0 + +gr_offsets_l: + .byte $00,$80,$00,$80,$00,$80,$00,$80 + .byte $28,$a8,$28,$a8,$28,$a8,$28,$a8 + .byte $50,$d0,$50,$d0,$50,$d0,$50,$d0 + +gr_offsets_h: + .byte $4,$4,$5,$5,$6,$6,$7,$7 + .byte $4,$4,$5,$5,$6,$6,$7,$7 + .byte $4,$4,$5,$5,$6,$6,$7,$7 + gr_offsets_end: ;.assert >gr_offsets = >gr_offsets_end, error, "gr_offsets crosses page" diff --git a/hellmood_memories/multiply_s8x8.s b/hellmood_memories/multiply_s8x8.s index bed139fe..c04f449e 100644 --- a/hellmood_memories/multiply_s8x8.s +++ b/hellmood_memories/multiply_s8x8.s @@ -1,7 +1,6 @@ ;================================================= ; M1 * M2 multiply_s8x8: - stx TEMP ; save as we trash it lda M2 eor M1 ; calc if we need to adjust at end @@ -50,6 +49,5 @@ negate_result: done_result: sta M2 - ldx TEMP rts diff --git a/hellmood_memories/multiply_u8x8.s b/hellmood_memories/multiply_u8x8.s index 15645500..562da868 100644 --- a/hellmood_memories/multiply_u8x8.s +++ b/hellmood_memories/multiply_u8x8.s @@ -33,6 +33,8 @@ ; Result: M2:M1 ; multiply_u8x8: + stx TEMP + lda M1 sta sm1a+1 ; 3 sta sm3a+1 ; 3 @@ -53,4 +55,5 @@ sm4a: sbc square2_hi,X sta M2 + ldx TEMP rts diff --git a/hellmood_memories/sier.s b/hellmood_memories/sier.s index ea465ba5..969dd264 100644 --- a/hellmood_memories/sier.s +++ b/hellmood_memories/sier.s @@ -3,9 +3,10 @@ ; by deater (Vince Weaver) ; Zero Page -BASL = $28 -BASH = $29 +GBASL = $26 +GBASH = $27 H2 = $2C +MASK = $2F COLOR = $30 BH = $EF @@ -165,7 +166,8 @@ color_done: sta COLOR txa ; A==Y1 ; 1 - jsr PLOT ; (X2,Y1) ; 3 +; jsr PLOT ; (X2,Y1) ; 3 + jsr fast_plot dey ; 1 bpl xloop ; 2 @@ -176,6 +178,13 @@ color_done: bmi sierpinski_forever ; 2 +; for grabbing screenshot +;check_key: +; bit KEYPRESS +; bpl check_key +; jmp sierpinski_forever ; 2 + + @@ -260,18 +269,26 @@ genxloop: +.include "fast_plot.s" .include "multiply_tables.s" ;.include "multiply_s16x16.s" .include "multiply_u16x16_mod.s" ; original -; + plot is $57 cycles (87), so 167040 to draw screen +; + plot is $5d cycles (93), so 178560 to draw screen ; + $169 (361) to calculate -; + 693120+167040 = 860k to draw screen (~1fps) +; + 693120+178560 = 872k to draw screen (~1fps) ; with lookup table: -; + plot is $57 cycles (87), so 167040 to draw screen +; + plot is $5d cycles (93), so 178560 to draw screen ; + $3a (58) to calculate ; + $2e5c (11,868) to setup tables -; + 111360+167040+11868=290k (~3fps) +; + 111360+178560+11868=290k (~3fps) + +; with lookup table+inline fast_plot +; + plot is $31 cycles (49), so 94080 to draw screen +; + $3a (58) to calculate +; + $2e5c (11,868) to setup tables +; + 111360+94080+11868=217k (~4fps) +