memories: add fast plotting to sierpinski

This commit is contained in:
Vince Weaver 2020-05-17 00:17:25 -04:00
parent 749850b960
commit 931e55fb5d
7 changed files with 150 additions and 245 deletions

View File

@ -295,18 +295,37 @@ static int fx2(int xx, int yy, int xprime) {
/* parallax checkerboard */
static int fx3(int xx,int yy,int xprime) {
return 0;
}
#if 0
dx=((yy&0xff)<<8) | (xprime&0xff);
unsigned short ax,bx,cx,dx;
int cf=0,zf=0,result;
cx=frame; // mov cx,bp ; set init point to time
bx=-16; // mov bx,-16 ; limit to 16 iterations
fx3L:
cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
ax=819; // mov ax,819 ; magic, related to Rrrola
imul_16(cx); // imul cx ; get X',Y' in DX
cx=cx+(yy*40)+xx;
// cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
// ax=819; // mov ax,819 ; magic, related to Rrrola
// 819 * 320 = 0x3ffc0
// 819 * 1 = 0x333
// want Z*1 = 0x40
// 0x333 = cccd/40 (/64)
// 40=$28 48= $30
// 1 = $0001
// 39 =$0027
// 40= $0100 6.4
// 65536/40 = 1638.4 / 40 =
// 28f5
ax=0x28f5;
result=ax*cx;
// ax=result&0xffff;
//dx=(result>>16);
dx=(result>>24);
//imul_16(cx); // imul cx ; get X',Y' in DX
cf=dx&1; // ror dx,1 ; set carry flag on "hit"
dx=dx>>1;
if (cf) {
@ -328,69 +347,46 @@ fx3L:
return ax;
}
#endif
/* sierpinski rotozoomer */
static int fx4(int xx, int yy, int xprime) {
return 0;
}
#if 0
unsigned char dl,dh,bh,al;
dx=((yy&0xff)<<8) | (xprime&0xff);
unsigned char dh,bh;
unsigned short color,t,xsext;
int temp;
dl=dx&0xff; dh=(dx>>8)&0xff;
t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
t=t<<3; // sal cx,3 ; speed up by factor of 8!
cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
cx=cx<<3; // sal cx,3 ; speed up by factor of 8!
ax=(dh&0xff); // movzx ax,dh ; get X into AL
// movsx dx,dl ; get Y into DL
if (dl&0x80) {
dx|=0xff00;
}
else {
dx&=0x00ff;
}
yy*=4;
bx=ax; // mov bx,ax ; save X in BX
imul_16_bx(cx); // imul bx,cx ; BX=X*T
/* sign extend X */
xsext=xprime*8; // get X into DL
if (xsext&0x80*8) xsext|=0xff00*8;
else xsext&=0x00ff*8;
/* bl=bx&0xff; */ bh=(bx>>8)&0xff;
dl=dx&0xff; dh=(dx>>8)&0xff;
temp=yy*t; // temp=Y*T
bh=(temp>>8)+xsext; // bh=((y*t)/256)+X
bh=bh+dl; // add bh,dl ; bh=x*t/256+Y
temp=xsext*t; // temp=X*T
dh=(temp>>8)&0xff; // dh=(X*T/256)
imul_16_dx(cx); // imul dx,cx ; dx=Y*T
color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X)
dl=dx&0xff; dh=(dx>>8)&0xff;
// and al,252 ; thicker sierpinksi
// sub al,dh ; al=X-Y*T/256
al=ax&0xff; // ah=(ax>>8)&0xff;
al=al-dh;
if ((color&252)==0) {
color=0x9; // otherwise: a nice orange
}
else {
color=0; // leave black if not sierpinksi
}
// and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y)
al=al&bh;
al=al&252; // and al,252 ; thicker sierpinksi
if (al==0) zf=1;
else zf=0;
cf=0; of=0;
// salc ; set pixel value to black
if (cf==0) al=0;
else al=0xff;
return color;
/* NOTE: remove the line below and the background becomes a rainbow */
ax=al;
// jnz fx4q ; leave black if not sierpinksi
if (zf==0) goto fx4q;
ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange
ax|=0x2a;
fx4q:
;
return ax;
}
#endif
/* raycast bent tunnel */
static int fx5(int xx, int yy, int xprime) {
@ -543,7 +539,7 @@ int main(int argc, char **argv) {
// frame=0x13;
frame=2*512;
frame=3*512;
while(1) {
for(yy=0;yy<48;yy++) {

View File

@ -307,87 +307,6 @@ static void imul_16(short value) {
}
/* signed multiply */
static void imul_16_bx(short value) {
int result;
short src;
src=bx;
result=src*value;
// printf("imul: %d*%d=%d ",src,value,result);
bx=(result&0xffff);
if (bx==result) {
cf=0;
of=0;
}
else {
cf=1;
of=1;
}
}
/* signed multiply */
static void imul_16_dx(short value) {
int result;
short src;
src=dx;
result=src*value;
// printf("imul: %d*%d=%d ",src,value,result);
dx=(result&0xffff);
if (dx==result) {
cf=0;
of=0;
}
else {
cf=1;
of=1;
}
}
#if 0
/* unsigned divide */
static void div_8(unsigned char value) {
unsigned char r,q;
unsigned int result,remainder;
// printf("Dividing %d (%x) by %d (%x): ",ax,ax,value,value);
if (value==0) {
printf("Divide by zero!\n");
return;
}
result=ax/value;
remainder=ax%value;
q=result;
r=remainder;
// printf("Result: q=%d r=%d\n",q,r);
ax=(r<<8)|(q&0xff);
}
#endif
static void push(int value) {
//printf("Pushing %x\n",value);
stack[sp]=value;
@ -468,92 +387,62 @@ static int fx2(int xx, int yy, int xprime) {
/* parallax checkerboard */
static int fx3(int xx,int yy,int xprime) {
dx=((yy&0xff)<<8) | (xprime&0xff);
unsigned short color;
cx=frame; // mov cx,bp ; set init point to time
bx=-16; // mov bx,-16 ; limit to 16 iterations
fx3L:
cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
ax=819; // mov ax,819 ; magic, related to Rrrola
// ax=0x333; // mov ax,819 ; magic, related to Rrrola
ax=0xcccd/64;
imul_16(cx); // imul cx ; get X',Y' in DX
cf=dx&1; // ror dx,1 ; set carry flag on "hit"
dx=dx>>1;
if (cf) {
dx|=0x8000;
}
else {
dx&=0x7fff;
}
bx++; // inc bx ; increment iteration count
if (bx==0) zf=1;// does not affect carry flag
else zf=0;
// check bottom bit of top word of multiply
// ja fx3L ; loop until "hit" or "iter=max"
// jump above, if cf==0 and zf==0
if ((cf==0) && (zf==0)) goto fx3L;
ax=bx+31; // lea ax,[bx+32] ; map value to standard gray scale
if ((bx!=0) && ((dx&1)==0)) goto fx3L;
color=bx+31; // lea ax,[bx+32] ; map value to standard gray scale
//printf("%d %d\n",ax,bx);
return ax;
return color;
}
/* sierpinski rotozoomer */
static int fx4(int xx, int yy, int xprime) {
unsigned char dl,dh,bh,al;
unsigned char dh,bh;
unsigned short color,t,xsext;
int temp;
dx=((yy&0xff)<<8) | (xprime&0xff);
t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
t=t<<3; // sal cx,3 ; speed up by factor of 8!
dl=dx&0xff; dh=(dx>>8)&0xff;
/* sign extend X */
xsext=xprime; // get X into DL
if (xsext&0x80) xsext|=0xff00;
else xsext&=0x00ff;
cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
cx=cx<<3; // sal cx,3 ; speed up by factor of 8!
ax=(dh&0xff); // movzx ax,dh ; get X into AL
// movsx dx,dl ; get Y into DL
if (dl&0x80) {
dx|=0xff00;
temp=yy*t; // temp=Y*T
bh=(temp>>8)+xsext; // bh=((y*t)/256)+X
temp=xsext*t; // temp=X*T
dh=(temp>>8)&0xff; // dh=(X*T/256)
color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X)
// and al,252 ; thicker sierpinksi
if ((color&252)==0) {
color=0x2a; // otherwise: a nice orange
}
else {
dx&=0x00ff;
color=0; // leave black if not sierpinksi
}
bx=ax; // mov bx,ax ; save X in BX
imul_16_bx(cx); // imul bx,cx ; BX=X*T
/* bl=bx&0xff; */ bh=(bx>>8)&0xff;
dl=dx&0xff; dh=(dx>>8)&0xff;
bh=bh+dl; // add bh,dl ; bh=x*t/256+Y
imul_16_dx(cx); // imul dx,cx ; dx=Y*T
dl=dx&0xff; dh=(dx>>8)&0xff;
// sub al,dh ; al=X-Y*T/256
al=ax&0xff; // ah=(ax>>8)&0xff;
al=al-dh;
// and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y)
al=al&bh;
al=al&252; // and al,252 ; thicker sierpinksi
if (al==0) zf=1;
else zf=0;
cf=0; of=0;
// salc ; set pixel value to black
if (cf==0) al=0;
else al=0xff;
/* NOTE: remove the line below and the background becomes a rainbow */
ax=al;
// jnz fx4q ; leave black if not sierpinksi
if (zf==0) goto fx4q;
ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange
ax|=0x2a;
fx4q:
;
return ax;
return color;
}
@ -740,7 +629,7 @@ int main(int argc, char **argv) {
// frame=0x13;
es=0xa000-10;
frame=2*512;
frame=3*512;
while(1) {
for(yy=0;yy<200;yy++) {

View File

@ -48,7 +48,7 @@ plane.o: plane.s
SIER: sier.o
ld65 -o SIER sier.o -C ../linker_scripts/apple2_1000.inc
sier.o: sier.s
sier.o: sier.s fast_plot.s
ca65 -o sier.o sier.s -l sier.lst
###

View File

@ -2,62 +2,64 @@
; roughly $4D (77) cycles
; note: ROM plot routine takes $5D (93) cycles
; original, with SMC code = 77 cycles
; using indirect-Y/GBASL = 69 cycles
; split gr_offsets table = 61 cycles
; color in COLOR
; horiz=y, vert=A (A trashed, XY Saved)
fast_plot:
stx TEMPX ; 3
ror ; get low bit in carry ; 2
php ; store for later ; 3
asl ; 2
lsr ; get low bit in carry ; 2
tax ; 2
lda gr_offsets,X ; 4+
sta plot_smc+1 ; 4
sta load_old_smc+1 ; 4
lda gr_offsets+1,X ; 4+
sta plot_smc+2 ; 4
sta load_old_smc+2 ; 4
lda gr_offsets_l,X ; 4+
sta GBASL ; 3
lda gr_offsets_h,X ; 4+
sta GBASH ; 3
;===
; 36
; 21
;4->3,4->3,4->0,4->0, 4->5, 5->6
load_old_smc:
lda $400,Y ; 4+
plp ; 4
bcs plot_bottom ; 2/3t
;=====
; 11
plot_top:
and #$f0 ; 2
sta TEMP ; 3
lda COLOR ; 3
and #$0f ; 2
ora TEMP ; 3
jmp plot_smc ; 3
;====
; 16
bcs plot_top ; 2/3t
plot_bottom:
and #$0f ; 2
and #$0f ; 3
sta TEMP ; 3
lda COLOR ; 3
and #$f0 ; 2
lda #$f0 ; 2
and (GBASL),Y ; 5
jmp plot_common ; 3
plot_top:
and #$f0 ; 3
sta TEMP ; 3
lda #$0f ; 2
and (GBASL),Y ; 5
plot_common:
ora TEMP ; 3
plot_smc:
sta $400,Y ; 5
sta (GBASL),Y ; 6
;====
; 31
ldx TEMPX ; 3
rts ; 6
;====
; 14
; 9
;.align $100
gr_offsets:
.word $400,$480,$500,$580,$600,$680,$700,$780
.word $428,$4a8,$528,$5a8,$628,$6a8,$728,$7a8
.word $450,$4d0,$550,$5d0,$650,$6d0,$750,$7d0
gr_offsets_l:
.byte $00,$80,$00,$80,$00,$80,$00,$80
.byte $28,$a8,$28,$a8,$28,$a8,$28,$a8
.byte $50,$d0,$50,$d0,$50,$d0,$50,$d0
gr_offsets_h:
.byte $4,$4,$5,$5,$6,$6,$7,$7
.byte $4,$4,$5,$5,$6,$6,$7,$7
.byte $4,$4,$5,$5,$6,$6,$7,$7
gr_offsets_end:
;.assert >gr_offsets = >gr_offsets_end, error, "gr_offsets crosses page"

View File

@ -1,7 +1,6 @@
;=================================================
; M1 * M2
multiply_s8x8:
stx TEMP ; save as we trash it
lda M2
eor M1 ; calc if we need to adjust at end
@ -50,6 +49,5 @@ negate_result:
done_result:
sta M2
ldx TEMP
rts

View File

@ -33,6 +33,8 @@
; Result: M2:M1
;
multiply_u8x8:
stx TEMP
lda M1
sta sm1a+1 ; 3
sta sm3a+1 ; 3
@ -53,4 +55,5 @@ sm4a:
sbc square2_hi,X
sta M2
ldx TEMP
rts

View File

@ -3,9 +3,10 @@
; by deater (Vince Weaver) <vince@deater.net>
; Zero Page
BASL = $28
BASH = $29
GBASL = $26
GBASH = $27
H2 = $2C
MASK = $2F
COLOR = $30
BH = $EF
@ -165,7 +166,8 @@ color_done:
sta COLOR
txa ; A==Y1 ; 1
jsr PLOT ; (X2,Y1) ; 3
; jsr PLOT ; (X2,Y1) ; 3
jsr fast_plot
dey ; 1
bpl xloop ; 2
@ -176,6 +178,13 @@ color_done:
bmi sierpinski_forever ; 2
; for grabbing screenshot
;check_key:
; bit KEYPRESS
; bpl check_key
; jmp sierpinski_forever ; 2
@ -260,18 +269,26 @@ genxloop:
.include "fast_plot.s"
.include "multiply_tables.s"
;.include "multiply_s16x16.s"
.include "multiply_u16x16_mod.s"
; original
; + plot is $57 cycles (87), so 167040 to draw screen
; + plot is $5d cycles (93), so 178560 to draw screen
; + $169 (361) to calculate
; + 693120+167040 = 860k to draw screen (~1fps)
; + 693120+178560 = 872k to draw screen (~1fps)
; with lookup table:
; + plot is $57 cycles (87), so 167040 to draw screen
; + plot is $5d cycles (93), so 178560 to draw screen
; + $3a (58) to calculate
; + $2e5c (11,868) to setup tables
; + 111360+167040+11868=290k (~3fps)
; + 111360+178560+11868=290k (~3fps)
; with lookup table+inline fast_plot
; + plot is $31 cycles (49), so 94080 to draw screen
; + $3a (58) to calculate
; + $2e5c (11,868) to setup tables
; + 111360+94080+11868=217k (~4fps)