memories: working on tilted plane

This commit is contained in:
Vince Weaver 2020-05-12 14:58:40 -04:00
parent 25436f888a
commit d6aed2067d
8 changed files with 463 additions and 127 deletions

View File

@ -233,43 +233,25 @@ static short pop(void) {
/* tilted plane */
/* DH=Y, DL=X */
static int fx0(int xx, int yy, int xprime) {
return 0;
}
#if 0
char ah,al,dh,dl;
unsigned short temp;
unsigned short scaled;
int color;
ax=0x1329; // mov ax,0x1329 init
// 0x1329 = 4905? 200*24.5 40*24.5=981=3d5
al=ax&0xff; ah=(ax>>8)&0xff;
dl=xprime; dh=yy;
yy=yy+0x10;//0x29; // add dh,al ; prevent divide overflow
scaled=((0x3d5/yy)&0xff); // div dh ; reverse divide AL=C/Y'
color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff));
dh=dh+al; // add dh,al ; prevent divide overflow
div_8(dh); // div dh ; reverse divide AL=C/Y'
scaled-=frame;
dx=((dh&0xff)<<8)|dl;
color=(color>>6)&0xff;
color^=(scaled&0xff);
color&=0x1c; // map colors
temp=ax;
ax=dx; dx=temp; // xchg dx,ax ; DL=C/Y' AL=X
dl=dx&0xff; dh=(dx>>8)&0xff;
imul_8(dl); // imul dl
dx=dx-frame; // sub dx,bp
dl=dx&0xff;
ah=(ax>>8)&0xff;
ah=ah^dl; // xor ah,dl
al=ah; // mov al,ah
ax=((ah&0xff)<<8)|(al&0xff);
ax&=0xff1c; // and al,4+8+16
return ax;
return color;
}
#endif
/* circles? */
/* DH=Y, DL=X */
static int fx1(int xx, int yy, int xprime) {
@ -559,7 +541,9 @@ int main(int argc, char **argv) {
ram[DRAW_PAGE]=0;
frame=0x13;
// frame=0x13;
frame=2*512;
while(1) {
for(yy=0;yy<48;yy++) {
@ -571,12 +555,12 @@ int main(int argc, char **argv) {
which=frame/512;
switch (which&0xff) {
case 0: color=fx5(xx,yy,xprime); break;
case 0: color=fx2(xx,yy,xprime); break;
case 1: color=fx1(xx,yy,xprime); break;
case 2: color=fx0(xx,yy,xprime); break;
case 3: color=fx3(xx,yy,xprime); break;
case 4: color=fx4(xx,yy,xprime); break;
case 5: color=fx2(xx,yy,xprime); break;
case 5: color=fx5(xx,yy,xprime); break;
case 6: color=fx6(xx,yy,xprime); break;
case 7: return 0;
default: printf("Trying effect %d\n",which);

View File

@ -210,6 +210,8 @@ static void write_framebuffer(int address, int value) {
}
#if 0
/* unsigned multiply */
static void mul_16(unsigned short value) {
unsigned int result;
@ -238,7 +240,9 @@ static void mul_16(unsigned short value) {
}
/*
#endif
/*
static void imul(short value) {
int result;
@ -355,7 +359,7 @@ static void imul_16_dx(short value) {
#if 0
/* unsigned divide */
static void div_8(unsigned char value) {
@ -382,7 +386,7 @@ static void div_8(unsigned char value) {
}
#endif
static void push(int value) {
//printf("Pushing %x\n",value);
@ -407,35 +411,19 @@ static short pop(void) {
/* DH=Y, DL=X */
static int fx0(int xx, int yy, int xprime) {
char ah,al,dh,dl;
unsigned short temp;
unsigned short scaled;
int color;
ax=0x1329; // mov ax,0x1329 init
al=0x29; ah=0x13;
dl=xprime; dh=yy;
yy=yy+0x29; // add dh,al ; prevent divide overflow
scaled=((0x1329/yy)&0xff); // div dh ; reverse divide AL=C/Y'
color=((signed char)(xprime&0xff))*((signed char)(scaled&0xff));
dh=dh+al; // add dh,al ; prevent divide overflow
div_8(dh); // div dh ; reverse divide AL=C/Y'
scaled-=frame;
dx=((dh&0xff)<<8)|dl;
temp=ax;
ax=dx; dx=temp; // xchg dx,ax ; DL=C/Y' AL=X
dl=dx&0xff; dh=(dx>>8)&0xff;
imul_8(dl); // imul dl
dx=dx-frame; // sub dx,bp
dl=dx&0xff;
ah=(ax>>8)&0xff;
ah=ah^dl; // xor ah,dl
al=ah; // mov al,ah
color=((ah&0xff)<<8)|(al&0xff);
color&=0x1c; // and al,4+8+16
color=(color>>8)&0xff;
color^=(scaled&0xff);
color&=0x1c; // map colors
return color;
}
@ -679,6 +667,68 @@ fx6q: ;
}
/* raycast bent tunnel */
/* no multiply */
static int fx7(int xx, int yy, int xprime) {
#if 0
unsigned char al,ah,bl,bh,cl,dl,dh,tb=0;
unsigned short bp;
// dx=((yy&0xff)<<8) | (xprime&0xff);
// dx=y
// bp=x
dh=0;
dl=yy; // xor dx,dx
bp=xprime;
cl=80; // mov cl,80
ch=0; // mov ch,0
ah=0; // xor ax,ax
al=0;
bh=0; // xor bx,bx
bx=0;
L:
ch=ch-dh; // sub ch,dh ah/ch = x
ah=ah-0-cf; // sbb ah,0
ch=ch+cl; // add ch,cl bend with depth
ah=ah+0+cf; // adc ah,0
bl=bl-dl; // sub bl,dl bh/bl=y
bh=bh-0-cf; // sbb bh,0
bl=bl+cl; // add bl,cl bend with depth
bh=bh+0+cf; // adc bh,0
bl=bl+cl; // add bl,cl bend with depth
bh=bh+0+cf; // adc bh,0
al=bh; // mov al,bh leave ah,bh untouched
al=al^ah; // xor al,ah geometry check
al+=4; // add al,4 geometry check
// test al,8 geometry check
// jnz Q
if (al&8!=0) goto Q;
cl--; // dec cl
if (cl!=0) goto L;
if ((cl!=0) && (zf==1)) goto L; // loopz L
Q:
cl=cl-frame; // probably the timer sub cl,[0x46c]
al=al^cl; // xor al,cl
// aam 6
al=al+20; // add al,20
// stosb
#endif
return ax;
}
int main(int argc, char **argv) {
int color=0,which,xx,yy,xprime;
@ -687,9 +737,11 @@ int main(int argc, char **argv) {
mode13h_graphics_init();
frame=0x13;
// frame=0x13;
es=0xa000-10;
frame=2*512;
while(1) {
for(yy=0;yy<200;yy++) {
for(xx=0;xx<320;xx++) {
@ -698,7 +750,7 @@ int main(int argc, char **argv) {
/* rrolla multiply by 0xcccd trick */
which=frame/512;
switch (which&0xff) {
switch (which&0x7) {
case 0: color=fx2(xx,yy,xprime); break;
case 1: color=fx1(xx,yy,xprime); break;
case 2: color=fx0(xx,yy,xprime); break;
@ -706,7 +758,7 @@ int main(int argc, char **argv) {
case 4: color=fx4(xx,yy,xprime); break;
case 5: color=fx5(xx,yy,xprime); break;
case 6: color=fx6(xx,yy,xprime); break;
case 7: return 0;
case 7: color=fx7(xx,yy,xprime); break;
default: printf("Trying effect %d\n",which);
}
write_framebuffer((es<<4)+((yy*320)+xx), color);

View File

@ -8,14 +8,17 @@ B2D = ../bmp2dhr/b2d
all: memories.dsk
memories.dsk: HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL TUNNEL
memories.dsk: HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL PLANE \
TUNNEL TNM
cp empty.dsk memories.dsk
$(DOS33) -y memories.dsk SAVE A HELLO
$(DOS33) -y memories.dsk SAVE A CIRCLES.BAS
$(DOS33) -y memories.dsk BSAVE -a 0x070 CIRCLES
$(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS
$(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS_SMALL
$(DOS33) -y memories.dsk BSAVE -a 0x1000 PLANE
$(DOS33) -y memories.dsk BSAVE -a 0x1000 TUNNEL
$(DOS33) -y memories.dsk BSAVE -a 0x1000 TNM
CIRCLES: circles.o
ld65 -o CIRCLES circles.o -C ../linker_scripts/apple2_70_zp.inc
@ -33,12 +36,29 @@ checkers.o: checkers.s
###
PLANE: plane.o
ld65 -o PLANE plane.o -C ../linker_scripts/apple2_1000.inc
plane.o: plane.s
ca65 -o plane.o plane.s -l plane.lst
###
TUNNEL: tunnel.o
ld65 -o TUNNEL tunnel.o -C ../linker_scripts/apple2_1000.inc
tunnel.o: tunnel.s
ca65 -o tunnel.o tunnel.s -l tunnel.lst
###
TNM: tunnel_nm.o
ld65 -o TNM tunnel_nm.o -C ../linker_scripts/apple2_1000.inc
tunnel_nm.o: tunnel_nm.s
ca65 -o tunnel_nm.o tunnel_nm.s -l tunnel_nm.lst
###
@ -62,5 +82,5 @@ CIRCLES.BAS: zooming_circles.bas
clean:
rm -f *~ *.o *.lst HELLO CIRCLES.BAS CIRCLES CHECKERS \
CHECKERS_SMALL TUNNEL
CHECKERS_SMALL PLANE TUNNEL TNM

View File

@ -0,0 +1,55 @@
;=================================================
; M1 * M2
multiply_s8x8:
stx TEMP ; save as we trash it
lda M2
eor M1 ; calc if we need to adjust at end
; (++ vs +- vs -+ vs --)
php ; save status on stack
; if M1 negative, negate it
lda M1
bpl m1_positive
eor #$ff
clc
adc #0
m1_positive:
sta M1
; if M2 negative, naegate it
lda M2
bpl m2_positive
eor #$ff
clc
adc #0
m2_positive:
sta M2
;==================
; unsigned multiply
jsr multiply_u8x8
; done, high result in factor2, low result in factor1
; adjust to be signed
; if m1 and m2 positive, good
; if m1 and m2 negative, good
; otherwise, negate result
plp ; restore saved pos/neg value
bpl done_result
negate_result:
sec
lda #0
sbc M1
lda #0
sbc M2
done_result:
sta M2
ldx TEMP
rts

View File

@ -0,0 +1,90 @@
; Fast mutiply -- setup tables
; Note for our purposes we only care about 8.8 x 8.8 fixed point
; with 8.8 result, which means we only care about the middle two bytes
; of the 32 bit result. So we disable generation of the high and low byte
; to save some cycles.
;
; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
; This routine, at an expense of 2kB of looku tables, takes around 250
; If you reuse a term the next time this drops closer to 200
; This routine was described by Stephen Judd and found
; in The Fridge and in the C=Hacking magazine
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
; The key thing to note is that
; (a+b)^2 (a-b)^2
; a*b = ------- - --------
; 4 4
; So if you have tables of the squares of 0..511 you can lookup and subtract
; instead of multiplying.
; Table generation: I:0..511
; square1_lo = <((I*I)/4)
; square1_hi = >((I*I)/4)
; square2_lo = <(((I-255)*(I-255))/4)
; square2_hi = >(((I-255)*(I-255))/4)
.ifndef square1_lo
square1_lo = $2000
square1_hi = $2200
square2_lo = $2400
square2_hi = $2600
.endif
; for(i=0;i<512;i++) {
; square1_lo[i]=((i*i)/4)&0xff;
; square1_hi[i]=(((i*i)/4)>>8)&0xff;
; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff;
; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff;
; }
init_multiply_tables:
; Build the add tables
ldx #$00
txa
.byte $c9 ; CMP #immediate - skip TYA and clear carry flag
lb1: tya
adc #$00 ; 0
ml1: sta square1_hi,x ; square1_hi[0]=0
tay ; y=0
cmp #$40 ; subtract 64 and update flags (c=0)
txa ; a=0
ror ; rotate
ml9: adc #$00 ; add 0
sta ml9+1 ; update add value
inx ; x=1
ml0: sta square1_lo,x ; square1_lo[0]=1
bne lb1 ; if not zero, loop
inc ml0+2 ; increment values
inc ml1+2 ; increment values
clc ; c=0
iny ; y=1
bne lb1 ; loop
; Build the subtract tables based on the existing one
ldx #$00
ldy #$ff
second_table:
lda square1_hi+1,x
sta square2_hi+$100,x
lda square1_hi,x
sta square2_hi,y
lda square1_lo+1,x
sta square2_lo+$100,x
lda square1_lo,x
sta square2_lo,y
dey
inx
bne second_table
rts

View File

@ -0,0 +1,56 @@
; Fast mutiply
; Note for our purposes we only care about 8.8 x 8.8 fixed point
; with 8.8 result, which means we only care about the middle two bytes
; of the 32 bit result. So we disable generation of the high and low byte
; to save some cycles.
;
; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
; This routine, at an expense of 2kB of looku tables, takes around 250
; If you reuse a term the next time this drops closer to 200
; This routine was described by Stephen Judd and found
; in The Fridge and in the C=Hacking magazine
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
; The key thing to note is that
; (a+b)^2 (a-b)^2
; a*b = ------- - --------
; 4 4
; So if you have tables of the squares of 0..511 you can lookup and subtract
; instead of multiplying.
; Table generation: I:0..511
; square1_lo = <((I*I)/4)
; square1_hi = >((I*I)/4)
; square2_lo = <(((I-255)*(I-255))/4)
; square2_hi = >(((I-255)*(I-255))/4)
; Fast 8x8 bit unsigned multiplication, 16-bit result
; Input: M1xM2
; Result: M2:M1
;
multiply_u8x8:
lda M1
sta sm1a+1 ; 3
sta sm3a+1 ; 3
eor #$ff ; invert the bits for subtracting ; 2
sta sm2a+1 ; 3
sta sm4a+1 ; 3
ldx M2
sec
sm1a:
lda square1_lo,X
sm2a:
sbc square2_lo,X
sta M1
sm3a:
lda square1_hi,X
sm4a:
sbc square2_hi,X
sta M2
rts

128
hellmood_memories/plane.s Normal file
View File

@ -0,0 +1,128 @@
; Tilted Plane, based on the code in Hellmood's Memories
; by deater (Vince Weaver) <vince@deater.net>
; Zero Page
BASL = $28
BASH = $29
H2 = $2C
COLOR = $30
X1 = $F0
X2 = $F1
Y1 = $F2
Y2 = $F3
M1 = $F7
M2 = $F8
TEMP = $FA
TEMPY = $FB
FRAME = $FC
TEMPX = $FD
SCALED = $FE
; Soft Switches
KEYPRESS= $C000
KEYRESET= $C010
SET_GR = $C050 ; Enable graphics
FULLGR = $C052 ; Full screen, no text
PAGE0 = $C054 ; Page0
PAGE1 = $C055 ; Page1
LORES = $C056 ; Enable LORES graphics
; ROM routines
PLOT = $F800 ; plot, horiz=y, vert=A (A trashed, XY Saved)
SETCOL = $F864
TEXT = $FB36 ;; Set text mode
BASCALC = $FBC1
SETGR = $FB40
HOME = $FC58 ;; Clear the text screen
WAIT = $FCA8 ;; delay 1/2(26+27A+5A^2) us
HLINE = $F819
tilted_plane:
;===================
; init screen
jsr SETGR ; 3
bit FULLGR ; 3
jsr init_multiply_tables
plane_forever:
inc FRAME ; 2
ldx #47 ; yy ; 2
yloop:
ldy #39 ; xx ; 2
xloop:
; clc
; adc #$10 ; adjust top of screen
lda division,X ; scaled=((0x3d5/yy)&0xff);
; reverse divide AL=C/Y'
sta M1
sta SCALED
; color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff));
tya
sec
sbc #20
sta M2
jsr multiply_s8x8
lda M1
rol
rol M2
rol
rol M2
lda M2
sta COLOR
; fedcba9876543210
; dcba9876
; color=(color>>6)&0xff;
sec
lda SCALED
sbc FRAME ; scaled-=frame;
eor COLOR ; color^=(scaled&0xff);
and #$1C ; color&=0x1c; // map colors
jsr SETCOL
txa ; A==Y1 ; 1
jsr PLOT ; (X2,Y1) ; 3
dey ; 1
bpl xloop ; 2
dex ; 1
bpl yloop ; 2
bmi plane_forever ; 2
division:
.byte $62,$59,$51,$4B,$46,$41,$3D,$39,$36,$33
.byte $31,$2E,$2C,$2A,$28,$27,$25,$24,$23,$21
.byte $20,$1F,$1E,$1D,$1C,$1C,$1B,$1A,$19,$19
.byte $18,$17,$17,$16,$16,$15,$15,$14,$14,$14
.byte $13,$13,$12,$12,$12,$11,$11,$11
.include "multiply_tables.s"
.include "multiply_s8x8.s"
.include "multiply_u8x8.s"

View File

@ -2,6 +2,10 @@
; by deater (Vince Weaver) <vince@deater.net>
; first try (shift/add multiply) = 160 bytes, 14 seconds/frame
; second (lookup table mul) = 251 bytes, 9 seconds/frame
; Zero Page
COLOR = $30
@ -32,6 +36,8 @@ tunnel:
jsr SETGR ; 3
bit FULLGR ; 3
jsr init_multiply_tables ; 3
tunnel_forever:
inc FRAME ; 2
@ -71,8 +77,8 @@ fx5_loop:
;sta M1 ; 2
jsr imul ; 3
lda M2 ; 2
sta VALUE ; 2
; lda M2 ; 2
sta VALUE ; high result in A ; 2
; get xcoord
lda XCOORD ; 2
@ -129,70 +135,15 @@ putpixel:
; A = M1
; DEPTH (preserve) is M2
imul:
stx TEMP ; save as we trash it
sta M1 ; get values in right place
lda DEPTH
sta M2
eor M1 ; calc if we need to adjust at end
; (++ vs +- vs -+ vs --)
php ; save status on stack
jsr multiply_s8x8
; if M1 negative, negate it
lda M1
bpl m1_positive
eor #$ff
clc
adc #0
m1_positive:
sta M1
; if M2 negative, naegate it
lda M2
bpl m2_positive
eor #$ff
clc
adc #0
m2_positive:
sta M2
;==================
; unsigned multiply
; factors in M1 and M2
lda #0
ldx #$8
lsr M1
clc
imul_loop:
bcc no_add
clc
adc M2
no_add:
ror
ror M1
dex
bne imul_loop
sta M2
; done, high result in factor2, low result in factor1
; adjust to be signed
; if m1 and m2 positive, good
; if m1 and m2 negative, good
; otherwise, negate result
plp ; restore saved pos/neg value
bpl done_result
negate_result:
sec
lda #0
sbc M1
lda #0
sbc M2
done_result:
sta M2
ldx TEMP
rts
.include "multiply_tables.s"
.include "multiply_s8x8.s"
.include "multiply_u8x8.s"