mirror of
https://github.com/deater/dos33fsprogs.git
synced 2024-12-27 02:31:00 +00:00
memories: add fast plotting to sierpinski
This commit is contained in:
parent
749850b960
commit
931e55fb5d
@ -295,18 +295,37 @@ static int fx2(int xx, int yy, int xprime) {
|
||||
|
||||
/* parallax checkerboard */
|
||||
static int fx3(int xx,int yy,int xprime) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
dx=((yy&0xff)<<8) | (xprime&0xff);
|
||||
unsigned short ax,bx,cx,dx;
|
||||
int cf=0,zf=0,result;
|
||||
|
||||
cx=frame; // mov cx,bp ; set init point to time
|
||||
bx=-16; // mov bx,-16 ; limit to 16 iterations
|
||||
fx3L:
|
||||
cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
|
||||
ax=819; // mov ax,819 ; magic, related to Rrrola
|
||||
imul_16(cx); // imul cx ; get X',Y' in DX
|
||||
cx=cx+(yy*40)+xx;
|
||||
// cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
|
||||
// ax=819; // mov ax,819 ; magic, related to Rrrola
|
||||
// 819 * 320 = 0x3ffc0
|
||||
// 819 * 1 = 0x333
|
||||
// want Z*1 = 0x40
|
||||
// 0x333 = cccd/40 (/64)
|
||||
|
||||
// 40=$28 48= $30
|
||||
// 1 = $0001
|
||||
// 39 =$0027
|
||||
// 40= $0100 6.4
|
||||
|
||||
// 65536/40 = 1638.4 / 40 =
|
||||
// 28f5
|
||||
|
||||
ax=0x28f5;
|
||||
|
||||
result=ax*cx;
|
||||
// ax=result&0xffff;
|
||||
//dx=(result>>16);
|
||||
dx=(result>>24);
|
||||
|
||||
//imul_16(cx); // imul cx ; get X',Y' in DX
|
||||
cf=dx&1; // ror dx,1 ; set carry flag on "hit"
|
||||
dx=dx>>1;
|
||||
if (cf) {
|
||||
@ -328,69 +347,46 @@ fx3L:
|
||||
|
||||
return ax;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* sierpinski rotozoomer */
|
||||
static int fx4(int xx, int yy, int xprime) {
|
||||
return 0;
|
||||
}
|
||||
#if 0
|
||||
unsigned char dl,dh,bh,al;
|
||||
|
||||
dx=((yy&0xff)<<8) | (xprime&0xff);
|
||||
unsigned char dh,bh;
|
||||
unsigned short color,t,xsext;
|
||||
int temp;
|
||||
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
|
||||
t=t<<3; // sal cx,3 ; speed up by factor of 8!
|
||||
|
||||
cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
|
||||
cx=cx<<3; // sal cx,3 ; speed up by factor of 8!
|
||||
ax=(dh&0xff); // movzx ax,dh ; get X into AL
|
||||
// movsx dx,dl ; get Y into DL
|
||||
if (dl&0x80) {
|
||||
dx|=0xff00;
|
||||
}
|
||||
else {
|
||||
dx&=0x00ff;
|
||||
}
|
||||
yy*=4;
|
||||
|
||||
bx=ax; // mov bx,ax ; save X in BX
|
||||
imul_16_bx(cx); // imul bx,cx ; BX=X*T
|
||||
/* sign extend X */
|
||||
xsext=xprime*8; // get X into DL
|
||||
if (xsext&0x80*8) xsext|=0xff00*8;
|
||||
else xsext&=0x00ff*8;
|
||||
|
||||
/* bl=bx&0xff; */ bh=(bx>>8)&0xff;
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
temp=yy*t; // temp=Y*T
|
||||
bh=(temp>>8)+xsext; // bh=((y*t)/256)+X
|
||||
|
||||
bh=bh+dl; // add bh,dl ; bh=x*t/256+Y
|
||||
temp=xsext*t; // temp=X*T
|
||||
dh=(temp>>8)&0xff; // dh=(X*T/256)
|
||||
|
||||
imul_16_dx(cx); // imul dx,cx ; dx=Y*T
|
||||
color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X)
|
||||
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
// and al,252 ; thicker sierpinksi
|
||||
|
||||
// sub al,dh ; al=X-Y*T/256
|
||||
al=ax&0xff; // ah=(ax>>8)&0xff;
|
||||
al=al-dh;
|
||||
if ((color&252)==0) {
|
||||
color=0x9; // otherwise: a nice orange
|
||||
}
|
||||
else {
|
||||
color=0; // leave black if not sierpinksi
|
||||
}
|
||||
|
||||
// and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y)
|
||||
al=al&bh;
|
||||
al=al&252; // and al,252 ; thicker sierpinksi
|
||||
if (al==0) zf=1;
|
||||
else zf=0;
|
||||
cf=0; of=0;
|
||||
// salc ; set pixel value to black
|
||||
if (cf==0) al=0;
|
||||
else al=0xff;
|
||||
return color;
|
||||
|
||||
/* NOTE: remove the line below and the background becomes a rainbow */
|
||||
ax=al;
|
||||
// jnz fx4q ; leave black if not sierpinksi
|
||||
if (zf==0) goto fx4q;
|
||||
|
||||
ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange
|
||||
ax|=0x2a;
|
||||
fx4q:
|
||||
;
|
||||
return ax;
|
||||
}
|
||||
|
||||
#endif
|
||||
/* raycast bent tunnel */
|
||||
static int fx5(int xx, int yy, int xprime) {
|
||||
|
||||
@ -543,7 +539,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
// frame=0x13;
|
||||
|
||||
frame=2*512;
|
||||
frame=3*512;
|
||||
|
||||
while(1) {
|
||||
for(yy=0;yy<48;yy++) {
|
||||
|
@ -307,87 +307,6 @@ static void imul_16(short value) {
|
||||
|
||||
}
|
||||
|
||||
/* signed multiply */
|
||||
static void imul_16_bx(short value) {
|
||||
|
||||
int result;
|
||||
short src;
|
||||
|
||||
src=bx;
|
||||
|
||||
result=src*value;
|
||||
|
||||
// printf("imul: %d*%d=%d ",src,value,result);
|
||||
|
||||
bx=(result&0xffff);
|
||||
|
||||
if (bx==result) {
|
||||
cf=0;
|
||||
of=0;
|
||||
}
|
||||
else {
|
||||
cf=1;
|
||||
of=1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* signed multiply */
|
||||
static void imul_16_dx(short value) {
|
||||
|
||||
int result;
|
||||
short src;
|
||||
|
||||
src=dx;
|
||||
|
||||
result=src*value;
|
||||
|
||||
// printf("imul: %d*%d=%d ",src,value,result);
|
||||
|
||||
dx=(result&0xffff);
|
||||
|
||||
if (dx==result) {
|
||||
cf=0;
|
||||
of=0;
|
||||
}
|
||||
else {
|
||||
cf=1;
|
||||
of=1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
/* unsigned divide */
|
||||
static void div_8(unsigned char value) {
|
||||
|
||||
unsigned char r,q;
|
||||
unsigned int result,remainder;
|
||||
|
||||
// printf("Dividing %d (%x) by %d (%x): ",ax,ax,value,value);
|
||||
|
||||
if (value==0) {
|
||||
printf("Divide by zero!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
result=ax/value;
|
||||
remainder=ax%value;
|
||||
|
||||
q=result;
|
||||
r=remainder;
|
||||
|
||||
// printf("Result: q=%d r=%d\n",q,r);
|
||||
|
||||
ax=(r<<8)|(q&0xff);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void push(int value) {
|
||||
//printf("Pushing %x\n",value);
|
||||
stack[sp]=value;
|
||||
@ -468,92 +387,62 @@ static int fx2(int xx, int yy, int xprime) {
|
||||
/* parallax checkerboard */
|
||||
static int fx3(int xx,int yy,int xprime) {
|
||||
|
||||
dx=((yy&0xff)<<8) | (xprime&0xff);
|
||||
unsigned short color;
|
||||
|
||||
cx=frame; // mov cx,bp ; set init point to time
|
||||
bx=-16; // mov bx,-16 ; limit to 16 iterations
|
||||
fx3L:
|
||||
cx=cx+(yy*320)+xx; // add cx, di ; offset by screenpointer
|
||||
ax=819; // mov ax,819 ; magic, related to Rrrola
|
||||
// ax=0x333; // mov ax,819 ; magic, related to Rrrola
|
||||
ax=0xcccd/64;
|
||||
imul_16(cx); // imul cx ; get X',Y' in DX
|
||||
cf=dx&1; // ror dx,1 ; set carry flag on "hit"
|
||||
dx=dx>>1;
|
||||
if (cf) {
|
||||
dx|=0x8000;
|
||||
}
|
||||
else {
|
||||
dx&=0x7fff;
|
||||
}
|
||||
|
||||
bx++; // inc bx ; increment iteration count
|
||||
if (bx==0) zf=1;// does not affect carry flag
|
||||
else zf=0;
|
||||
|
||||
// check bottom bit of top word of multiply
|
||||
// ja fx3L ; loop until "hit" or "iter=max"
|
||||
// jump above, if cf==0 and zf==0
|
||||
if ((cf==0) && (zf==0)) goto fx3L;
|
||||
|
||||
ax=bx+31; // lea ax,[bx+32] ; map value to standard gray scale
|
||||
if ((bx!=0) && ((dx&1)==0)) goto fx3L;
|
||||
|
||||
color=bx+31; // lea ax,[bx+32] ; map value to standard gray scale
|
||||
//printf("%d %d\n",ax,bx);
|
||||
|
||||
return ax;
|
||||
return color;
|
||||
}
|
||||
|
||||
/* sierpinski rotozoomer */
|
||||
static int fx4(int xx, int yy, int xprime) {
|
||||
|
||||
unsigned char dl,dh,bh,al;
|
||||
unsigned char dh,bh;
|
||||
unsigned short color,t,xsext;
|
||||
int temp;
|
||||
|
||||
dx=((yy&0xff)<<8) | (xprime&0xff);
|
||||
t=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
|
||||
t=t<<3; // sal cx,3 ; speed up by factor of 8!
|
||||
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
/* sign extend X */
|
||||
xsext=xprime; // get X into DL
|
||||
if (xsext&0x80) xsext|=0xff00;
|
||||
else xsext&=0x00ff;
|
||||
|
||||
cx=frame-2048; // lea cx,[bp-2048] ; center time to pass zero
|
||||
cx=cx<<3; // sal cx,3 ; speed up by factor of 8!
|
||||
ax=(dh&0xff); // movzx ax,dh ; get X into AL
|
||||
// movsx dx,dl ; get Y into DL
|
||||
if (dl&0x80) {
|
||||
dx|=0xff00;
|
||||
temp=yy*t; // temp=Y*T
|
||||
bh=(temp>>8)+xsext; // bh=((y*t)/256)+X
|
||||
|
||||
temp=xsext*t; // temp=X*T
|
||||
dh=(temp>>8)&0xff; // dh=(X*T/256)
|
||||
|
||||
color=(yy-dh)&bh; // color=(Y-(X*T/256))&(Y*T/256+X)
|
||||
|
||||
// and al,252 ; thicker sierpinksi
|
||||
if ((color&252)==0) {
|
||||
color=0x2a; // otherwise: a nice orange
|
||||
}
|
||||
else {
|
||||
dx&=0x00ff;
|
||||
color=0; // leave black if not sierpinksi
|
||||
}
|
||||
|
||||
bx=ax; // mov bx,ax ; save X in BX
|
||||
imul_16_bx(cx); // imul bx,cx ; BX=X*T
|
||||
|
||||
/* bl=bx&0xff; */ bh=(bx>>8)&0xff;
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
|
||||
bh=bh+dl; // add bh,dl ; bh=x*t/256+Y
|
||||
|
||||
imul_16_dx(cx); // imul dx,cx ; dx=Y*T
|
||||
|
||||
dl=dx&0xff; dh=(dx>>8)&0xff;
|
||||
|
||||
// sub al,dh ; al=X-Y*T/256
|
||||
al=ax&0xff; // ah=(ax>>8)&0xff;
|
||||
al=al-dh;
|
||||
|
||||
// and al,bh ; AL=(X-Y*T/256)&(x*T/256+Y)
|
||||
al=al&bh;
|
||||
al=al&252; // and al,252 ; thicker sierpinksi
|
||||
if (al==0) zf=1;
|
||||
else zf=0;
|
||||
cf=0; of=0;
|
||||
// salc ; set pixel value to black
|
||||
if (cf==0) al=0;
|
||||
else al=0xff;
|
||||
|
||||
/* NOTE: remove the line below and the background becomes a rainbow */
|
||||
ax=al;
|
||||
// jnz fx4q ; leave black if not sierpinksi
|
||||
if (zf==0) goto fx4q;
|
||||
|
||||
ax=ax&0xff00; // mov al,0x2a ; otherwise: a nice orange
|
||||
ax|=0x2a;
|
||||
fx4q:
|
||||
;
|
||||
return ax;
|
||||
return color;
|
||||
}
|
||||
|
||||
|
||||
@ -740,7 +629,7 @@ int main(int argc, char **argv) {
|
||||
// frame=0x13;
|
||||
es=0xa000-10;
|
||||
|
||||
frame=2*512;
|
||||
frame=3*512;
|
||||
|
||||
while(1) {
|
||||
for(yy=0;yy<200;yy++) {
|
||||
|
@ -48,7 +48,7 @@ plane.o: plane.s
|
||||
SIER: sier.o
|
||||
ld65 -o SIER sier.o -C ../linker_scripts/apple2_1000.inc
|
||||
|
||||
sier.o: sier.s
|
||||
sier.o: sier.s fast_plot.s
|
||||
ca65 -o sier.o sier.s -l sier.lst
|
||||
|
||||
###
|
||||
|
@ -2,62 +2,64 @@
|
||||
; roughly $4D (77) cycles
|
||||
; note: ROM plot routine takes $5D (93) cycles
|
||||
|
||||
|
||||
; original, with SMC code = 77 cycles
|
||||
; using indirect-Y/GBASL = 69 cycles
|
||||
; split gr_offsets table = 61 cycles
|
||||
|
||||
|
||||
; color in COLOR
|
||||
; horiz=y, vert=A (A trashed, XY Saved)
|
||||
|
||||
|
||||
fast_plot:
|
||||
stx TEMPX ; 3
|
||||
|
||||
ror ; get low bit in carry ; 2
|
||||
php ; store for later ; 3
|
||||
asl ; 2
|
||||
lsr ; get low bit in carry ; 2
|
||||
tax ; 2
|
||||
lda gr_offsets,X ; 4+
|
||||
sta plot_smc+1 ; 4
|
||||
sta load_old_smc+1 ; 4
|
||||
lda gr_offsets+1,X ; 4+
|
||||
sta plot_smc+2 ; 4
|
||||
sta load_old_smc+2 ; 4
|
||||
lda gr_offsets_l,X ; 4+
|
||||
sta GBASL ; 3
|
||||
lda gr_offsets_h,X ; 4+
|
||||
sta GBASH ; 3
|
||||
;===
|
||||
; 36
|
||||
; 21
|
||||
|
||||
;4->3,4->3,4->0,4->0, 4->5, 5->6
|
||||
|
||||
load_old_smc:
|
||||
lda $400,Y ; 4+
|
||||
plp ; 4
|
||||
bcs plot_bottom ; 2/3t
|
||||
;=====
|
||||
; 11
|
||||
plot_top:
|
||||
and #$f0 ; 2
|
||||
sta TEMP ; 3
|
||||
lda COLOR ; 3
|
||||
and #$0f ; 2
|
||||
ora TEMP ; 3
|
||||
jmp plot_smc ; 3
|
||||
;====
|
||||
; 16
|
||||
bcs plot_top ; 2/3t
|
||||
plot_bottom:
|
||||
and #$0f ; 2
|
||||
and #$0f ; 3
|
||||
sta TEMP ; 3
|
||||
lda COLOR ; 3
|
||||
and #$f0 ; 2
|
||||
lda #$f0 ; 2
|
||||
and (GBASL),Y ; 5
|
||||
jmp plot_common ; 3
|
||||
plot_top:
|
||||
and #$f0 ; 3
|
||||
sta TEMP ; 3
|
||||
lda #$0f ; 2
|
||||
and (GBASL),Y ; 5
|
||||
|
||||
plot_common:
|
||||
ora TEMP ; 3
|
||||
plot_smc:
|
||||
sta $400,Y ; 5
|
||||
sta (GBASL),Y ; 6
|
||||
;====
|
||||
; 31
|
||||
|
||||
ldx TEMPX ; 3
|
||||
rts ; 6
|
||||
;====
|
||||
; 14
|
||||
; 9
|
||||
;.align $100
|
||||
|
||||
gr_offsets:
|
||||
.word $400,$480,$500,$580,$600,$680,$700,$780
|
||||
.word $428,$4a8,$528,$5a8,$628,$6a8,$728,$7a8
|
||||
.word $450,$4d0,$550,$5d0,$650,$6d0,$750,$7d0
|
||||
|
||||
gr_offsets_l:
|
||||
.byte $00,$80,$00,$80,$00,$80,$00,$80
|
||||
.byte $28,$a8,$28,$a8,$28,$a8,$28,$a8
|
||||
.byte $50,$d0,$50,$d0,$50,$d0,$50,$d0
|
||||
|
||||
gr_offsets_h:
|
||||
.byte $4,$4,$5,$5,$6,$6,$7,$7
|
||||
.byte $4,$4,$5,$5,$6,$6,$7,$7
|
||||
.byte $4,$4,$5,$5,$6,$6,$7,$7
|
||||
|
||||
gr_offsets_end:
|
||||
|
||||
;.assert >gr_offsets = >gr_offsets_end, error, "gr_offsets crosses page"
|
||||
|
@ -1,7 +1,6 @@
|
||||
;=================================================
|
||||
; M1 * M2
|
||||
multiply_s8x8:
|
||||
stx TEMP ; save as we trash it
|
||||
|
||||
lda M2
|
||||
eor M1 ; calc if we need to adjust at end
|
||||
@ -50,6 +49,5 @@ negate_result:
|
||||
done_result:
|
||||
sta M2
|
||||
|
||||
ldx TEMP
|
||||
rts
|
||||
|
||||
|
@ -33,6 +33,8 @@
|
||||
; Result: M2:M1
|
||||
;
|
||||
multiply_u8x8:
|
||||
stx TEMP
|
||||
|
||||
lda M1
|
||||
sta sm1a+1 ; 3
|
||||
sta sm3a+1 ; 3
|
||||
@ -53,4 +55,5 @@ sm4a:
|
||||
sbc square2_hi,X
|
||||
sta M2
|
||||
|
||||
ldx TEMP
|
||||
rts
|
||||
|
@ -3,9 +3,10 @@
|
||||
; by deater (Vince Weaver) <vince@deater.net>
|
||||
|
||||
; Zero Page
|
||||
BASL = $28
|
||||
BASH = $29
|
||||
GBASL = $26
|
||||
GBASH = $27
|
||||
H2 = $2C
|
||||
MASK = $2F
|
||||
COLOR = $30
|
||||
|
||||
BH = $EF
|
||||
@ -165,7 +166,8 @@ color_done:
|
||||
sta COLOR
|
||||
|
||||
txa ; A==Y1 ; 1
|
||||
jsr PLOT ; (X2,Y1) ; 3
|
||||
; jsr PLOT ; (X2,Y1) ; 3
|
||||
jsr fast_plot
|
||||
|
||||
dey ; 1
|
||||
bpl xloop ; 2
|
||||
@ -176,6 +178,13 @@ color_done:
|
||||
bmi sierpinski_forever ; 2
|
||||
|
||||
|
||||
; for grabbing screenshot
|
||||
;check_key:
|
||||
; bit KEYPRESS
|
||||
; bpl check_key
|
||||
; jmp sierpinski_forever ; 2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -260,18 +269,26 @@ genxloop:
|
||||
|
||||
|
||||
|
||||
.include "fast_plot.s"
|
||||
|
||||
.include "multiply_tables.s"
|
||||
;.include "multiply_s16x16.s"
|
||||
.include "multiply_u16x16_mod.s"
|
||||
|
||||
; original
|
||||
; + plot is $57 cycles (87), so 167040 to draw screen
|
||||
; + plot is $5d cycles (93), so 178560 to draw screen
|
||||
; + $169 (361) to calculate
|
||||
; + 693120+167040 = 860k to draw screen (~1fps)
|
||||
; + 693120+178560 = 872k to draw screen (~1fps)
|
||||
|
||||
; with lookup table:
|
||||
; + plot is $57 cycles (87), so 167040 to draw screen
|
||||
; + plot is $5d cycles (93), so 178560 to draw screen
|
||||
; + $3a (58) to calculate
|
||||
; + $2e5c (11,868) to setup tables
|
||||
; + 111360+167040+11868=290k (~3fps)
|
||||
; + 111360+178560+11868=290k (~3fps)
|
||||
|
||||
; with lookup table+inline fast_plot
|
||||
; + plot is $31 cycles (49), so 94080 to draw screen
|
||||
; + $3a (58) to calculate
|
||||
; + $2e5c (11,868) to setup tables
|
||||
; + 111360+94080+11868=217k (~4fps)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user