diff --git a/gr-sim/hellmood_memories/hellmood_gr.c b/gr-sim/hellmood_memories/hellmood_gr.c index 5f9bb0aa..a73886d0 100644 --- a/gr-sim/hellmood_memories/hellmood_gr.c +++ b/gr-sim/hellmood_memories/hellmood_gr.c @@ -233,43 +233,25 @@ static short pop(void) { /* tilted plane */ /* DH=Y, DL=X */ static int fx0(int xx, int yy, int xprime) { - return 0; -} -#if 0 - char ah,al,dh,dl; - unsigned short temp; + unsigned short scaled; + int color; - ax=0x1329; // mov ax,0x1329 init + // 0x1329 = 4905? 200*24.5 40*24.5=981=3d5 - al=ax&0xff; ah=(ax>>8)&0xff; - dl=xprime; dh=yy; + yy=yy+0x10;//0x29; // add dh,al ; prevent divide overflow + scaled=((0x3d5/yy)&0xff); // div dh ; reverse divide AL=C/Y' + color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff)); - dh=dh+al; // add dh,al ; prevent divide overflow - div_8(dh); // div dh ; reverse divide AL=C/Y' + scaled-=frame; - dx=((dh&0xff)<<8)|dl; + color=(color>>6)&0xff; + color^=(scaled&0xff); + color&=0x1c; // map colors - temp=ax; - ax=dx; dx=temp; // xchg dx,ax ; DL=C/Y' AL=X - - dl=dx&0xff; dh=(dx>>8)&0xff; - - imul_8(dl); // imul dl - dx=dx-frame; // sub dx,bp - dl=dx&0xff; - - ah=(ax>>8)&0xff; - ah=ah^dl; // xor ah,dl - al=ah; // mov al,ah - ax=((ah&0xff)<<8)|(al&0xff); - - ax&=0xff1c; // and al,4+8+16 - - return ax; + return color; } -#endif /* circles? */ /* DH=Y, DL=X */ static int fx1(int xx, int yy, int xprime) { @@ -559,7 +541,9 @@ int main(int argc, char **argv) { ram[DRAW_PAGE]=0; - frame=0x13; +// frame=0x13; + + frame=2*512; while(1) { for(yy=0;yy<48;yy++) { @@ -571,12 +555,12 @@ int main(int argc, char **argv) { which=frame/512; switch (which&0xff) { - case 0: color=fx5(xx,yy,xprime); break; + case 0: color=fx2(xx,yy,xprime); break; case 1: color=fx1(xx,yy,xprime); break; case 2: color=fx0(xx,yy,xprime); break; case 3: color=fx3(xx,yy,xprime); break; case 4: color=fx4(xx,yy,xprime); break; - case 5: color=fx2(xx,yy,xprime); break; + case 5: color=fx5(xx,yy,xprime); break; case 6: color=fx6(xx,yy,xprime); break; case 7: return 0; default: printf("Trying effect %d\n",which); diff --git a/gr-sim/hellmood_memories/hellmood_simple.c b/gr-sim/hellmood_memories/hellmood_simple.c index 68ed494e..96be8771 100644 --- a/gr-sim/hellmood_memories/hellmood_simple.c +++ b/gr-sim/hellmood_memories/hellmood_simple.c @@ -210,6 +210,8 @@ static void write_framebuffer(int address, int value) { } +#if 0 + /* unsigned multiply */ static void mul_16(unsigned short value) { unsigned int result; @@ -238,7 +240,9 @@ static void mul_16(unsigned short value) { } -/* +#endif + +/* static void imul(short value) { int result; @@ -355,7 +359,7 @@ static void imul_16_dx(short value) { - +#if 0 /* unsigned divide */ static void div_8(unsigned char value) { @@ -382,7 +386,7 @@ static void div_8(unsigned char value) { } - +#endif static void push(int value) { //printf("Pushing %x\n",value); @@ -407,35 +411,19 @@ static short pop(void) { /* DH=Y, DL=X */ static int fx0(int xx, int yy, int xprime) { - char ah,al,dh,dl; - unsigned short temp; + unsigned short scaled; int color; - ax=0x1329; // mov ax,0x1329 init - al=0x29; ah=0x13; - dl=xprime; dh=yy; + yy=yy+0x29; // add dh,al ; prevent divide overflow + scaled=((0x1329/yy)&0xff); // div dh ; reverse divide AL=C/Y' + color=((signed char)(xprime&0xff))*((signed char)(scaled&0xff)); - dh=dh+al; // add dh,al ; prevent divide overflow - div_8(dh); // div dh ; reverse divide AL=C/Y' + scaled-=frame; - dx=((dh&0xff)<<8)|dl; - - temp=ax; - ax=dx; dx=temp; // xchg dx,ax ; DL=C/Y' AL=X - - dl=dx&0xff; dh=(dx>>8)&0xff; - - imul_8(dl); // imul dl - dx=dx-frame; // sub dx,bp - dl=dx&0xff; - - ah=(ax>>8)&0xff; - ah=ah^dl; // xor ah,dl - al=ah; // mov al,ah - color=((ah&0xff)<<8)|(al&0xff); - - color&=0x1c; // and al,4+8+16 + color=(color>>8)&0xff; + color^=(scaled&0xff); + color&=0x1c; // map colors return color; } @@ -679,6 +667,68 @@ fx6q: ; } + + +/* raycast bent tunnel */ +/* no multiply */ +static int fx7(int xx, int yy, int xprime) { + +#if 0 + unsigned char al,ah,bl,bh,cl,dl,dh,tb=0; + unsigned short bp; + +// dx=((yy&0xff)<<8) | (xprime&0xff); + + // dx=y + // bp=x + + dh=0; + dl=yy; // xor dx,dx + bp=xprime; + + cl=80; // mov cl,80 + ch=0; // mov ch,0 + ah=0; // xor ax,ax + al=0; + bh=0; // xor bx,bx + bx=0; + +L: + ch=ch-dh; // sub ch,dh ah/ch = x + ah=ah-0-cf; // sbb ah,0 + ch=ch+cl; // add ch,cl bend with depth + ah=ah+0+cf; // adc ah,0 + + bl=bl-dl; // sub bl,dl bh/bl=y + bh=bh-0-cf; // sbb bh,0 + bl=bl+cl; // add bl,cl bend with depth + bh=bh+0+cf; // adc bh,0 + bl=bl+cl; // add bl,cl bend with depth + bh=bh+0+cf; // adc bh,0 + + al=bh; // mov al,bh leave ah,bh untouched + al=al^ah; // xor al,ah geometry check + al+=4; // add al,4 geometry check + // test al,8 geometry check + // jnz Q + if (al&8!=0) goto Q; + + cl--; // dec cl + if (cl!=0) goto L; + + if ((cl!=0) && (zf==1)) goto L; // loopz L + +Q: + cl=cl-frame; // probably the timer sub cl,[0x46c] + al=al^cl; // xor al,cl + // aam 6 + al=al+20; // add al,20 + // stosb +#endif + + return ax; +} + int main(int argc, char **argv) { int color=0,which,xx,yy,xprime; @@ -687,9 +737,11 @@ int main(int argc, char **argv) { mode13h_graphics_init(); - frame=0x13; +// frame=0x13; es=0xa000-10; + frame=2*512; + while(1) { for(yy=0;yy<200;yy++) { for(xx=0;xx<320;xx++) { @@ -698,7 +750,7 @@ int main(int argc, char **argv) { /* rrolla multiply by 0xcccd trick */ which=frame/512; - switch (which&0xff) { + switch (which&0x7) { case 0: color=fx2(xx,yy,xprime); break; case 1: color=fx1(xx,yy,xprime); break; case 2: color=fx0(xx,yy,xprime); break; @@ -706,7 +758,7 @@ int main(int argc, char **argv) { case 4: color=fx4(xx,yy,xprime); break; case 5: color=fx5(xx,yy,xprime); break; case 6: color=fx6(xx,yy,xprime); break; - case 7: return 0; + case 7: color=fx7(xx,yy,xprime); break; default: printf("Trying effect %d\n",which); } write_framebuffer((es<<4)+((yy*320)+xx), color); diff --git a/hellmood_memories/Makefile b/hellmood_memories/Makefile index 0904b5b2..4daca9f0 100644 --- a/hellmood_memories/Makefile +++ b/hellmood_memories/Makefile @@ -8,14 +8,17 @@ B2D = ../bmp2dhr/b2d all: memories.dsk -memories.dsk: HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL TUNNEL +memories.dsk: HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL PLANE \ + TUNNEL TNM cp empty.dsk memories.dsk $(DOS33) -y memories.dsk SAVE A HELLO $(DOS33) -y memories.dsk SAVE A CIRCLES.BAS $(DOS33) -y memories.dsk BSAVE -a 0x070 CIRCLES $(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS $(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS_SMALL + $(DOS33) -y memories.dsk BSAVE -a 0x1000 PLANE $(DOS33) -y memories.dsk BSAVE -a 0x1000 TUNNEL + $(DOS33) -y memories.dsk BSAVE -a 0x1000 TNM CIRCLES: circles.o ld65 -o CIRCLES circles.o -C ../linker_scripts/apple2_70_zp.inc @@ -33,12 +36,29 @@ checkers.o: checkers.s ### +PLANE: plane.o + ld65 -o PLANE plane.o -C ../linker_scripts/apple2_1000.inc + +plane.o: plane.s + ca65 -o plane.o plane.s -l plane.lst + +### + TUNNEL: tunnel.o ld65 -o TUNNEL tunnel.o -C ../linker_scripts/apple2_1000.inc tunnel.o: tunnel.s ca65 -o tunnel.o tunnel.s -l tunnel.lst +### + +TNM: tunnel_nm.o + ld65 -o TNM tunnel_nm.o -C ../linker_scripts/apple2_1000.inc + +tunnel_nm.o: tunnel_nm.s + ca65 -o tunnel_nm.o tunnel_nm.s -l tunnel_nm.lst + + ### @@ -62,5 +82,5 @@ CIRCLES.BAS: zooming_circles.bas clean: rm -f *~ *.o *.lst HELLO CIRCLES.BAS CIRCLES CHECKERS \ - CHECKERS_SMALL TUNNEL + CHECKERS_SMALL PLANE TUNNEL TNM diff --git a/hellmood_memories/multiply_s8x8.s b/hellmood_memories/multiply_s8x8.s new file mode 100644 index 00000000..bed139fe --- /dev/null +++ b/hellmood_memories/multiply_s8x8.s @@ -0,0 +1,55 @@ + ;================================================= + ; M1 * M2 +multiply_s8x8: + stx TEMP ; save as we trash it + + lda M2 + eor M1 ; calc if we need to adjust at end + ; (++ vs +- vs -+ vs --) + php ; save status on stack + + ; if M1 negative, negate it + lda M1 + bpl m1_positive + eor #$ff + clc + adc #0 +m1_positive: + sta M1 + + ; if M2 negative, naegate it + lda M2 + bpl m2_positive + eor #$ff + clc + adc #0 +m2_positive: + sta M2 + + ;================== + ; unsigned multiply + + jsr multiply_u8x8 + + + ; done, high result in factor2, low result in factor1 + + ; adjust to be signed + ; if m1 and m2 positive, good + ; if m1 and m2 negative, good + ; otherwise, negate result + + plp ; restore saved pos/neg value + bpl done_result +negate_result: + sec + lda #0 + sbc M1 + lda #0 + sbc M2 +done_result: + sta M2 + + ldx TEMP + rts + diff --git a/hellmood_memories/multiply_tables.s b/hellmood_memories/multiply_tables.s new file mode 100644 index 00000000..a19beec7 --- /dev/null +++ b/hellmood_memories/multiply_tables.s @@ -0,0 +1,90 @@ +; Fast mutiply -- setup tables + + +; Note for our purposes we only care about 8.8 x 8.8 fixed point +; with 8.8 result, which means we only care about the middle two bytes +; of the 32 bit result. So we disable generation of the high and low byte +; to save some cycles. + +; +; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply +; This routine, at an expense of 2kB of looku tables, takes around 250 +; If you reuse a term the next time this drops closer to 200 + +; This routine was described by Stephen Judd and found +; in The Fridge and in the C=Hacking magazine +; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication + +; The key thing to note is that +; (a+b)^2 (a-b)^2 +; a*b = ------- - -------- +; 4 4 +; So if you have tables of the squares of 0..511 you can lookup and subtract +; instead of multiplying. + +; Table generation: I:0..511 +; square1_lo = <((I*I)/4) +; square1_hi = >((I*I)/4) +; square2_lo = <(((I-255)*(I-255))/4) +; square2_hi = >(((I-255)*(I-255))/4) + + +.ifndef square1_lo +square1_lo = $2000 +square1_hi = $2200 +square2_lo = $2400 +square2_hi = $2600 +.endif + +; for(i=0;i<512;i++) { +; square1_lo[i]=((i*i)/4)&0xff; +; square1_hi[i]=(((i*i)/4)>>8)&0xff; +; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff; +; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff; +; } + +init_multiply_tables: + + ; Build the add tables + + ldx #$00 + txa + .byte $c9 ; CMP #immediate - skip TYA and clear carry flag +lb1: tya + adc #$00 ; 0 +ml1: sta square1_hi,x ; square1_hi[0]=0 + tay ; y=0 + cmp #$40 ; subtract 64 and update flags (c=0) + txa ; a=0 + ror ; rotate +ml9: adc #$00 ; add 0 + sta ml9+1 ; update add value + inx ; x=1 +ml0: sta square1_lo,x ; square1_lo[0]=1 + bne lb1 ; if not zero, loop + inc ml0+2 ; increment values + inc ml1+2 ; increment values + clc ; c=0 + iny ; y=1 + bne lb1 ; loop + + ; Build the subtract tables based on the existing one + + ldx #$00 + ldy #$ff +second_table: + lda square1_hi+1,x + sta square2_hi+$100,x + lda square1_hi,x + sta square2_hi,y + lda square1_lo+1,x + sta square2_lo+$100,x + lda square1_lo,x + sta square2_lo,y + dey + inx + bne second_table + + + rts + diff --git a/hellmood_memories/multiply_u8x8.s b/hellmood_memories/multiply_u8x8.s new file mode 100644 index 00000000..15645500 --- /dev/null +++ b/hellmood_memories/multiply_u8x8.s @@ -0,0 +1,56 @@ +; Fast mutiply + +; Note for our purposes we only care about 8.8 x 8.8 fixed point +; with 8.8 result, which means we only care about the middle two bytes +; of the 32 bit result. So we disable generation of the high and low byte +; to save some cycles. + +; +; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply +; This routine, at an expense of 2kB of looku tables, takes around 250 +; If you reuse a term the next time this drops closer to 200 + +; This routine was described by Stephen Judd and found +; in The Fridge and in the C=Hacking magazine +; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication + +; The key thing to note is that +; (a+b)^2 (a-b)^2 +; a*b = ------- - -------- +; 4 4 +; So if you have tables of the squares of 0..511 you can lookup and subtract +; instead of multiplying. + +; Table generation: I:0..511 +; square1_lo = <((I*I)/4) +; square1_hi = >((I*I)/4) +; square2_lo = <(((I-255)*(I-255))/4) +; square2_hi = >(((I-255)*(I-255))/4) + + +; Fast 8x8 bit unsigned multiplication, 16-bit result +; Input: M1xM2 +; Result: M2:M1 +; +multiply_u8x8: + lda M1 + sta sm1a+1 ; 3 + sta sm3a+1 ; 3 + eor #$ff ; invert the bits for subtracting ; 2 + sta sm2a+1 ; 3 + sta sm4a+1 ; 3 + + ldx M2 + sec +sm1a: + lda square1_lo,X +sm2a: + sbc square2_lo,X + sta M1 +sm3a: + lda square1_hi,X +sm4a: + sbc square2_hi,X + sta M2 + + rts diff --git a/hellmood_memories/plane.s b/hellmood_memories/plane.s new file mode 100644 index 00000000..e4e30811 --- /dev/null +++ b/hellmood_memories/plane.s @@ -0,0 +1,128 @@ +; Tilted Plane, based on the code in Hellmood's Memories + +; by deater (Vince Weaver) + +; Zero Page +BASL = $28 +BASH = $29 +H2 = $2C +COLOR = $30 + +X1 = $F0 +X2 = $F1 +Y1 = $F2 +Y2 = $F3 + + +M1 = $F7 +M2 = $F8 + +TEMP = $FA +TEMPY = $FB +FRAME = $FC +TEMPX = $FD +SCALED = $FE + + +; Soft Switches +KEYPRESS= $C000 +KEYRESET= $C010 +SET_GR = $C050 ; Enable graphics +FULLGR = $C052 ; Full screen, no text +PAGE0 = $C054 ; Page0 +PAGE1 = $C055 ; Page1 +LORES = $C056 ; Enable LORES graphics + +; ROM routines + +PLOT = $F800 ; plot, horiz=y, vert=A (A trashed, XY Saved) +SETCOL = $F864 +TEXT = $FB36 ;; Set text mode +BASCALC = $FBC1 +SETGR = $FB40 +HOME = $FC58 ;; Clear the text screen +WAIT = $FCA8 ;; delay 1/2(26+27A+5A^2) us +HLINE = $F819 + +tilted_plane: + + + + ;=================== + ; init screen + jsr SETGR ; 3 + bit FULLGR ; 3 + + jsr init_multiply_tables + +plane_forever: + + inc FRAME ; 2 + + ldx #47 ; yy ; 2 +yloop: + ldy #39 ; xx ; 2 +xloop: + +; clc +; adc #$10 ; adjust top of screen + lda division,X ; scaled=((0x3d5/yy)&0xff); + ; reverse divide AL=C/Y' + + sta M1 + sta SCALED + + ; color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff)); + tya + sec + sbc #20 + sta M2 + + jsr multiply_s8x8 + + lda M1 + + rol + rol M2 + rol + rol M2 + + lda M2 + sta COLOR + +; fedcba9876543210 +; dcba9876 + + +; color=(color>>6)&0xff; + + sec + lda SCALED + sbc FRAME ; scaled-=frame; + eor COLOR ; color^=(scaled&0xff); + and #$1C ; color&=0x1c; // map colors + + jsr SETCOL + + txa ; A==Y1 ; 1 + jsr PLOT ; (X2,Y1) ; 3 + + dey ; 1 + bpl xloop ; 2 + + dex ; 1 + bpl yloop ; 2 + + bmi plane_forever ; 2 + + +division: + .byte $62,$59,$51,$4B,$46,$41,$3D,$39,$36,$33 + .byte $31,$2E,$2C,$2A,$28,$27,$25,$24,$23,$21 + .byte $20,$1F,$1E,$1D,$1C,$1C,$1B,$1A,$19,$19 + .byte $18,$17,$17,$16,$16,$15,$15,$14,$14,$14 + .byte $13,$13,$12,$12,$12,$11,$11,$11 + +.include "multiply_tables.s" +.include "multiply_s8x8.s" +.include "multiply_u8x8.s" diff --git a/hellmood_memories/tunnel.s b/hellmood_memories/tunnel.s index a5630cae..bf0ed61c 100644 --- a/hellmood_memories/tunnel.s +++ b/hellmood_memories/tunnel.s @@ -2,6 +2,10 @@ ; by deater (Vince Weaver) + +; first try (shift/add multiply) = 160 bytes, 14 seconds/frame +; second (lookup table mul) = 251 bytes, 9 seconds/frame + ; Zero Page COLOR = $30 @@ -32,6 +36,8 @@ tunnel: jsr SETGR ; 3 bit FULLGR ; 3 + jsr init_multiply_tables ; 3 + tunnel_forever: inc FRAME ; 2 @@ -71,8 +77,8 @@ fx5_loop: ;sta M1 ; 2 jsr imul ; 3 - lda M2 ; 2 - sta VALUE ; 2 +; lda M2 ; 2 + sta VALUE ; high result in A ; 2 ; get xcoord lda XCOORD ; 2 @@ -129,70 +135,15 @@ putpixel: ; A = M1 ; DEPTH (preserve) is M2 imul: - stx TEMP ; save as we trash it - sta M1 ; get values in right place lda DEPTH sta M2 - eor M1 ; calc if we need to adjust at end - ; (++ vs +- vs -+ vs --) - php ; save status on stack + jsr multiply_s8x8 - ; if M1 negative, negate it - lda M1 - bpl m1_positive - eor #$ff - clc - adc #0 -m1_positive: - sta M1 - - ; if M2 negative, naegate it - lda M2 - bpl m2_positive - eor #$ff - clc - adc #0 -m2_positive: - sta M2 - - ;================== - ; unsigned multiply - - ; factors in M1 and M2 - lda #0 - ldx #$8 - lsr M1 - clc -imul_loop: - bcc no_add - clc - adc M2 -no_add: - ror - ror M1 - dex - bne imul_loop - - sta M2 - ; done, high result in factor2, low result in factor1 - - ; adjust to be signed - ; if m1 and m2 positive, good - ; if m1 and m2 negative, good - ; otherwise, negate result - - plp ; restore saved pos/neg value - bpl done_result -negate_result: - sec - lda #0 - sbc M1 - lda #0 - sbc M2 -done_result: - sta M2 - - ldx TEMP rts + + +.include "multiply_tables.s" +.include "multiply_s8x8.s" +.include "multiply_u8x8.s"