memories: working on tilted plane

2025-02-18 16:30:59 +00:00 · 2020-05-12 14:58:40 -04:00 · 2020-05-12 14:58:40 -04:00 · d6aed2067d
commit d6aed2067d
parent 25436f888a
8 changed files with 463 additions and 127 deletions
--- a/gr-sim/hellmood_memories/hellmood_gr.c
+++ b/gr-sim/hellmood_memories/hellmood_gr.c
@ -233,43 +233,25 @@ static short pop(void) {
 	/* tilted plane */
 	/* DH=Y, DL=X */
 static int fx0(int xx, int yy, int xprime) {
-	return 0;
-}
-#if 0
-	char ah,al,dh,dl;
-	unsigned short temp;
+	unsigned short scaled;
+	int color;

-	ax=0x1329;	// mov ax,0x1329	init
+	// 0x1329 = 4905?  200*24.5  40*24.5=981=3d5

-	al=ax&0xff; ah=(ax>>8)&0xff;
-	dl=xprime; dh=yy;
+	yy=yy+0x10;//0x29;     // add dh,al    ; prevent divide overflow
+	scaled=((0x3d5/yy)&0xff);      // div dh       ; reverse divide AL=C/Y'

+	color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff));

-	dh=dh+al;	// add dh,al	; prevent divide overflow
-	div_8(dh);	// div dh	; reverse divide AL=C/Y'
+	scaled-=frame;

-	dx=((dh&0xff)<<8)|dl;
+	color=(color>>6)&0xff;
+	color^=(scaled&0xff);
+	color&=0x1c;                    // map colors

-	temp=ax;
-	ax=dx; dx=temp;	// xchg dx,ax	; DL=C/Y' AL=X
-
-	dl=dx&0xff; dh=(dx>>8)&0xff;
-
-	imul_8(dl);	// imul dl
-	dx=dx-frame;	// sub dx,bp
-	dl=dx&0xff;
-
-	ah=(ax>>8)&0xff;
-	ah=ah^dl;	// xor ah,dl
-	al=ah;		// mov al,ah
-	ax=((ah&0xff)<<8)|(al&0xff);
-
-	ax&=0xff1c;	// and al,4+8+16
-
-	return ax;
+	return color;
 }

-#endif
 /* circles? */
 /* DH=Y, DL=X */
 static int fx1(int xx, int yy, int xprime) {
@ -559,7 +541,9 @@ int main(int argc, char **argv) {

 	ram[DRAW_PAGE]=0;

-	frame=0x13;
+//	frame=0x13;
+
+	frame=2*512;

 	while(1) {
 		for(yy=0;yy<48;yy++) {
@ -571,12 +555,12 @@ int main(int argc, char **argv) {

 			which=frame/512;
 			switch (which&0xff) {
-				case 0:	color=fx5(xx,yy,xprime); break;
+				case 0:	color=fx2(xx,yy,xprime); break;
 				case 1:	color=fx1(xx,yy,xprime); break;
 				case 2: color=fx0(xx,yy,xprime); break;
 				case 3: color=fx3(xx,yy,xprime); break;
 				case 4: color=fx4(xx,yy,xprime); break;
-				case 5: color=fx2(xx,yy,xprime); break;
+				case 5: color=fx5(xx,yy,xprime); break;
 				case 6: color=fx6(xx,yy,xprime); break;
 				case 7: return 0;
 				default: printf("Trying effect %d\n",which);
--- a/gr-sim/hellmood_memories/hellmood_simple.c
+++ b/gr-sim/hellmood_memories/hellmood_simple.c
@ -210,6 +210,8 @@ static void write_framebuffer(int address, int value) {

 }

+#if 0
+
 /* unsigned multiply */
 static void mul_16(unsigned short value) {
 	unsigned int result;
@ -238,7 +240,9 @@ static void mul_16(unsigned short value) {

 }

-/* 
+#endif
+
+/*
 static void imul(short value) {
 	int result;

@ -355,7 +359,7 @@ static void imul_16_dx(short value) {



-
+#if 0

 /* unsigned divide */
 static void div_8(unsigned char value) {
@ -382,7 +386,7 @@ static void div_8(unsigned char value) {

 }

-
+#endif

 static void push(int value) {
 	//printf("Pushing %x\n",value);
@ -407,35 +411,19 @@ static short pop(void) {
 	/* DH=Y, DL=X */
 static int fx0(int xx, int yy, int xprime) {

-	char ah,al,dh,dl;
-	unsigned short temp;
+	unsigned short scaled;
 	int color;

-	ax=0x1329;	// mov ax,0x1329	init
-	al=0x29; ah=0x13;
-	dl=xprime; dh=yy;
+	yy=yy+0x29;	// add dh,al	; prevent divide overflow
+	scaled=((0x1329/yy)&0xff);	// div dh	; reverse divide AL=C/Y'

+	color=((signed char)(xprime&0xff))*((signed char)(scaled&0xff));

-	dh=dh+al;	// add dh,al	; prevent divide overflow
-	div_8(dh);	// div dh	; reverse divide AL=C/Y'
+	scaled-=frame;

-	dx=((dh&0xff)<<8)|dl;
-
-	temp=ax;
-	ax=dx; dx=temp;	// xchg dx,ax	; DL=C/Y' AL=X
-
-	dl=dx&0xff; dh=(dx>>8)&0xff;
-
-	imul_8(dl);	// imul dl
-	dx=dx-frame;	// sub dx,bp
-	dl=dx&0xff;
-
-	ah=(ax>>8)&0xff;
-	ah=ah^dl;	// xor ah,dl
-	al=ah;		// mov al,ah
-	color=((ah&0xff)<<8)|(al&0xff);
-
-	color&=0x1c;	// and al,4+8+16
+	color=(color>>8)&0xff;
+	color^=(scaled&0xff);
+	color&=0x1c;			// map colors

 	return color;
 }
@ -679,6 +667,68 @@ fx6q:	;
 }


+
+
+/* raycast bent tunnel */
+/* no multiply */
+static int fx7(int xx, int yy, int xprime) {
+
+#if 0
+	unsigned char al,ah,bl,bh,cl,dl,dh,tb=0;
+	unsigned short bp;
+
+//	dx=((yy&0xff)<<8) | (xprime&0xff);
+
+	// dx=y
+	// bp=x
+
+	dh=0;
+	dl=yy;		// xor dx,dx
+	bp=xprime;
+
+	cl=80;		// mov cl,80
+	ch=0;		// mov ch,0
+	ah=0;		// xor ax,ax
+	al=0;
+	bh=0;		// xor bx,bx
+	bx=0;
+
+L:
+	ch=ch-dh;	// sub ch,dh 		ah/ch = x
+	ah=ah-0-cf;	// sbb ah,0
+	ch=ch+cl;	// add ch,cl		bend with depth
+	ah=ah+0+cf;	// adc ah,0
+
+	bl=bl-dl;	// sub bl,dl		bh/bl=y
+	bh=bh-0-cf;	// sbb bh,0
+	bl=bl+cl;	// add bl,cl		bend with depth
+	bh=bh+0+cf;	// adc bh,0
+	bl=bl+cl;	// add bl,cl		bend with depth
+	bh=bh+0+cf;	// adc bh,0
+
+	al=bh;		// mov al,bh		leave ah,bh untouched
+	al=al^ah;	// xor al,ah		geometry check
+	al+=4;		// add al,4		geometry check
+			// test al,8		geometry check
+			// jnz Q
+	if (al&8!=0) goto Q;
+
+	cl--;		// dec cl
+	if (cl!=0) goto L;
+
+	if ((cl!=0) && (zf==1)) goto L; // loopz L
+
+Q:
+	cl=cl-frame;	// probably the timer sub cl,[0x46c]
+	al=al^cl;	// xor al,cl
+			// aam 6
+	al=al+20;	// add al,20
+			// stosb
+#endif
+
+	return ax;
+}
+
 int main(int argc, char **argv) {

 	int color=0,which,xx,yy,xprime;
@ -687,9 +737,11 @@ int main(int argc, char **argv) {

 	mode13h_graphics_init();

-	frame=0x13;
+//	frame=0x13;
 	es=0xa000-10;

+	frame=2*512;
+
 	while(1) {
 		for(yy=0;yy<200;yy++) {
 		for(xx=0;xx<320;xx++) {
@ -698,7 +750,7 @@ int main(int argc, char **argv) {
 			/* rrolla multiply by 0xcccd trick */

 			which=frame/512;
-			switch (which&0xff) {
+			switch (which&0x7) {
 				case 0:	color=fx2(xx,yy,xprime); break;
 				case 1:	color=fx1(xx,yy,xprime); break;
 				case 2: color=fx0(xx,yy,xprime); break;
@ -706,7 +758,7 @@ int main(int argc, char **argv) {
 				case 4: color=fx4(xx,yy,xprime); break;
 				case 5: color=fx5(xx,yy,xprime); break;
 				case 6: color=fx6(xx,yy,xprime); break;
-				case 7: return 0;
+				case 7: color=fx7(xx,yy,xprime); break;
 				default: printf("Trying effect %d\n",which);
 			}
 			write_framebuffer((es<<4)+((yy*320)+xx), color);
--- a/hellmood_memories/Makefile
+++ b/hellmood_memories/Makefile
@ -8,14 +8,17 @@ B2D = ../bmp2dhr/b2d

 all:	memories.dsk

-memories.dsk:	HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL TUNNEL
+memories.dsk:	HELLO CIRCLES.BAS CIRCLES CHECKERS CHECKERS_SMALL PLANE \
+		TUNNEL TNM
 	cp empty.dsk memories.dsk
 	$(DOS33) -y memories.dsk SAVE A HELLO
 	$(DOS33) -y memories.dsk SAVE A CIRCLES.BAS
 	$(DOS33) -y memories.dsk BSAVE -a 0x070 CIRCLES
 	$(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS
 	$(DOS33) -y memories.dsk BSAVE -a 0x070 CHECKERS_SMALL
+	$(DOS33) -y memories.dsk BSAVE -a 0x1000 PLANE
 	$(DOS33) -y memories.dsk BSAVE -a 0x1000 TUNNEL
+	$(DOS33) -y memories.dsk BSAVE -a 0x1000 TNM

 CIRCLES: circles.o
 	ld65 -o CIRCLES circles.o -C ../linker_scripts/apple2_70_zp.inc
@ -33,12 +36,29 @@ checkers.o:	checkers.s

 ###

+PLANE: plane.o
+	ld65 -o PLANE plane.o -C ../linker_scripts/apple2_1000.inc
+
+plane.o:	plane.s
+	ca65 -o plane.o plane.s -l plane.lst
+
+###
+
 TUNNEL: tunnel.o
 	ld65 -o TUNNEL tunnel.o -C ../linker_scripts/apple2_1000.inc

 tunnel.o:	tunnel.s
 	ca65 -o tunnel.o tunnel.s -l tunnel.lst

+###
+
+TNM: tunnel_nm.o
+	ld65 -o TNM tunnel_nm.o -C ../linker_scripts/apple2_1000.inc
+
+tunnel_nm.o:	tunnel_nm.s
+	ca65 -o tunnel_nm.o tunnel_nm.s -l tunnel_nm.lst
+
+

 ###

@ -62,5 +82,5 @@ CIRCLES.BAS:	zooming_circles.bas

 clean:		
 		rm -f *~ *.o *.lst HELLO CIRCLES.BAS CIRCLES CHECKERS \
-			CHECKERS_SMALL TUNNEL
+			CHECKERS_SMALL PLANE TUNNEL TNM

--- a/hellmood_memories/multiply_s8x8.s
+++ b/hellmood_memories/multiply_s8x8.s
@ -0,0 +1,55 @@
+	;=================================================
+	; M1 * M2
+multiply_s8x8:
+	stx	TEMP		; save as we trash it
+
+	lda	M2
+	eor	M1		; calc if we need to adjust at end
+				; (++ vs +- vs -+ vs --)
+	php			; save status on stack
+
+	; if M1 negative, negate it
+	lda	M1
+	bpl	m1_positive
+	eor	#$ff
+	clc
+	adc	#0
+m1_positive:
+	sta	M1
+
+	; if M2 negative, naegate it
+	lda	M2
+	bpl	m2_positive
+	eor	#$ff
+	clc
+	adc	#0
+m2_positive:
+	sta	M2
+
+	;==================
+	; unsigned multiply
+
+	jsr	multiply_u8x8
+
+
+	; done, high result in factor2, low result in factor1
+
+	; adjust to be signed
+	; if m1 and m2 positive, good
+	; if m1 and m2 negative, good
+	; otherwise, negate result
+
+	plp			; restore saved pos/neg value
+	bpl	done_result
+negate_result:
+	sec
+	lda	#0
+	sbc	M1
+	lda	#0
+	sbc	M2
+done_result:
+	sta	M2
+
+	ldx	TEMP
+	rts
+
--- a/hellmood_memories/multiply_tables.s
+++ b/hellmood_memories/multiply_tables.s
@ -0,0 +1,90 @@
+; Fast mutiply -- setup tables
+
+
+; Note for our purposes we only care about 8.8 x 8.8 fixed point
+; with 8.8 result, which means we only care about the middle two bytes
+; of the 32 bit result.  So we disable generation of the high and low byte
+; to save some cycles.
+
+;
+; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
+; This routine, at an expense of 2kB of looku tables, takes around 250
+;	If you reuse a term the next time this drops closer to 200
+
+; This routine was described by Stephen Judd and found
+;	in The Fridge and in the C=Hacking magazine
+; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
+
+; The key thing to note is that
+;	       (a+b)^2     (a-b)^2
+;       a*b =  -------  -  --------
+;                 4           4
+; So if you have tables of the squares of 0..511 you can lookup and subtract
+; instead of multiplying.
+
+; Table generation: I:0..511
+;                   square1_lo = <((I*I)/4)
+;                   square1_hi = >((I*I)/4)
+;                   square2_lo = <(((I-255)*(I-255))/4)
+;                   square2_hi = >(((I-255)*(I-255))/4)
+
+
+.ifndef square1_lo
+square1_lo	=	$2000
+square1_hi	=	$2200
+square2_lo	=	$2400
+square2_hi	=	$2600
+.endif
+
+;	for(i=0;i<512;i++) {
+;		square1_lo[i]=((i*i)/4)&0xff;
+;		square1_hi[i]=(((i*i)/4)>>8)&0xff;
+;		square2_lo[i]=( ((i-255)*(i-255))/4)&0xff;
+;		square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff;
+;	}
+
+init_multiply_tables:
+
+	; Build the add tables
+
+	ldx	#$00
+	txa
+	.byte $c9   ; CMP #immediate - skip TYA and clear carry flag
+lb1:	tya
+	adc	#$00			; 0
+ml1:	sta	square1_hi,x		; square1_hi[0]=0
+	tay				; y=0
+	cmp	#$40			; subtract 64 and update flags (c=0)
+	txa				; a=0
+	ror				; rotate
+ml9:	adc	#$00			; add 0
+	sta	ml9+1			; update add value
+	inx				; x=1
+ml0:	sta	square1_lo,x		; square1_lo[0]=1
+	bne	lb1			; if not zero, loop
+	inc	ml0+2			; increment values
+	inc	ml1+2			; increment values
+	clc				; c=0
+	iny				; y=1
+	bne	lb1			; loop
+
+	; Build the subtract tables based on the existing one
+
+	ldx	#$00
+	ldy	#$ff
+second_table:
+	lda	square1_hi+1,x
+	sta	square2_hi+$100,x
+	lda	square1_hi,x
+	sta	square2_hi,y
+	lda	square1_lo+1,x
+	sta	square2_lo+$100,x
+	lda	square1_lo,x
+	sta	square2_lo,y
+	dey
+	inx
+	bne second_table
+
+
+	rts
+
--- a/hellmood_memories/multiply_u8x8.s
+++ b/hellmood_memories/multiply_u8x8.s
@ -0,0 +1,56 @@
+; Fast mutiply
+
+; Note for our purposes we only care about 8.8 x 8.8 fixed point
+; with 8.8 result, which means we only care about the middle two bytes
+; of the 32 bit result.  So we disable generation of the high and low byte
+; to save some cycles.
+
+;
+; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
+; This routine, at an expense of 2kB of looku tables, takes around 250
+;	If you reuse a term the next time this drops closer to 200
+
+; This routine was described by Stephen Judd and found
+;	in The Fridge and in the C=Hacking magazine
+; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
+
+; The key thing to note is that
+;	       (a+b)^2     (a-b)^2
+;       a*b =  -------  -  --------
+;                 4           4
+; So if you have tables of the squares of 0..511 you can lookup and subtract
+; instead of multiplying.
+
+; Table generation: I:0..511
+;                   square1_lo = <((I*I)/4)
+;                   square1_hi = >((I*I)/4)
+;                   square2_lo = <(((I-255)*(I-255))/4)
+;                   square2_hi = >(((I-255)*(I-255))/4)
+
+
+; Fast 8x8 bit unsigned multiplication, 16-bit result
+; Input: M1xM2
+; Result: M2:M1
+;
+multiply_u8x8:
+	lda     M1
+        sta     sm1a+1                                                  ; 3
+        sta     sm3a+1                                                  ; 3
+        eor     #$ff    ; invert the bits for subtracting               ; 2
+        sta     sm2a+1                                                  ; 3
+        sta     sm4a+1                                                  ; 3
+
+	ldx	M2
+	sec
+sm1a:
+	lda	square1_lo,X
+sm2a:
+	sbc	square2_lo,X
+	sta	M1
+sm3a:
+	lda	square1_hi,X
+sm4a:
+	sbc	square2_hi,X
+	sta	M2
+
+	rts
--- a/hellmood_memories/plane.s
+++ b/hellmood_memories/plane.s
@ -0,0 +1,128 @@
+; Tilted Plane, based on the code in Hellmood's Memories
+
+; by deater (Vince Weaver) <vince@deater.net>
+
+; Zero Page
+BASL		= $28
+BASH		= $29
+H2		= $2C
+COLOR		= $30
+
+X1		= $F0
+X2		= $F1
+Y1		= $F2
+Y2		= $F3
+
+
+M1		= $F7
+M2		= $F8
+
+TEMP		= $FA
+TEMPY		= $FB
+FRAME		= $FC
+TEMPX		= $FD
+SCALED		= $FE
+
+
+; Soft Switches
+KEYPRESS= $C000
+KEYRESET= $C010
+SET_GR	= $C050 ; Enable graphics
+FULLGR	= $C052	; Full screen, no text
+PAGE0	= $C054 ; Page0
+PAGE1	= $C055 ; Page1
+LORES	= $C056	; Enable LORES graphics
+
+; ROM routines
+
+PLOT	= $F800	; plot, horiz=y, vert=A (A trashed, XY Saved)
+SETCOL	= $F864
+TEXT	= $FB36				;; Set text mode
+BASCALC	= $FBC1
+SETGR	= $FB40
+HOME	= $FC58				;; Clear the text screen
+WAIT	= $FCA8				;; delay 1/2(26+27A+5A^2) us
+HLINE	= $F819
+
+tilted_plane:
+
+
+
+	;===================
+	; init screen
+	jsr	SETGR				; 3
+	bit	FULLGR				; 3
+
+	jsr	init_multiply_tables
+
+plane_forever:
+
+	inc	FRAME				; 2
+
+	ldx	#47		; yy		; 2
+yloop:
+	ldy	#39		; xx		; 2
+xloop:
+
+;	clc
+;	adc	#$10				; adjust top of screen
+	lda	division,X			; scaled=((0x3d5/yy)&0xff);
+						; reverse divide AL=C/Y'
+
+	sta	M1
+	sta	SCALED
+
+	; color=((signed char)((xprime-20)&0xff))*((signed char)(scaled&0xff));
+	tya
+	sec
+	sbc	#20
+	sta	M2
+
+	jsr	multiply_s8x8
+
+	lda	M1
+
+	rol
+	rol	M2
+	rol
+	rol	M2
+
+	lda	M2
+	sta	COLOR
+
+;	fedcba9876543210
+;	        dcba9876
+
+
+;        color=(color>>6)&0xff;
+
+	sec
+	lda	SCALED
+	sbc	FRAME			; scaled-=frame;
+	eor	COLOR			; color^=(scaled&0xff);
+	and	#$1C			; color&=0x1c;   // map colors
+
+	jsr	SETCOL
+
+	txa		; A==Y1			; 1
+	jsr	PLOT	; (X2,Y1)		; 3
+
+	dey					; 1
+	bpl	xloop				; 2
+
+	dex					; 1
+	bpl	yloop				; 2
+
+	bmi	plane_forever			; 2
+
+
+division:
+	.byte $62,$59,$51,$4B,$46,$41,$3D,$39,$36,$33
+	.byte $31,$2E,$2C,$2A,$28,$27,$25,$24,$23,$21
+	.byte $20,$1F,$1E,$1D,$1C,$1C,$1B,$1A,$19,$19
+	.byte $18,$17,$17,$16,$16,$15,$15,$14,$14,$14
+	.byte $13,$13,$12,$12,$12,$11,$11,$11
+
+.include "multiply_tables.s"
+.include "multiply_s8x8.s"
+.include "multiply_u8x8.s"
--- a/hellmood_memories/tunnel.s
+++ b/hellmood_memories/tunnel.s
@ -2,6 +2,10 @@

 ; by deater (Vince Weaver) <vince@deater.net>

+
+; first try (shift/add multiply) = 160 bytes, 14 seconds/frame
+; second    (lookup table mul)    = 251 bytes,  9 seconds/frame
+
 ; Zero Page
 COLOR		= $30

@ -32,6 +36,8 @@ tunnel:
 	jsr	SETGR				; 3
 	bit	FULLGR				; 3

+	jsr	init_multiply_tables		; 3
+
 tunnel_forever:

 	inc	FRAME				; 2
@ -71,8 +77,8 @@ fx5_loop:
 	;sta	M1				; 2
 	jsr	imul				; 3

-	lda	M2				; 2
-	sta	VALUE				; 2
+;	lda	M2				; 2
+	sta	VALUE	; high result in A	; 2

 	; get xcoord
 	lda	XCOORD				; 2
@ -129,70 +135,15 @@ putpixel:
 	; A = M1
 	; DEPTH (preserve) is M2
 imul:
-	stx	TEMP		; save as we trash it
-
 	sta	M1		; get values in right place
 	lda	DEPTH
 	sta	M2

-	eor	M1		; calc if we need to adjust at end
-				; (++ vs +- vs -+ vs --)
-	php			; save status on stack
+	jsr	multiply_s8x8

-	; if M1 negative, negate it
-	lda	M1
-	bpl	m1_positive
-	eor	#$ff
-	clc
-	adc	#0
-m1_positive:
-	sta	M1
-
-	; if M2 negative, naegate it
-	lda	M2
-	bpl	m2_positive
-	eor	#$ff
-	clc
-	adc	#0
-m2_positive:
-	sta	M2
-
-	;==================
-	; unsigned multiply
-
-	; factors in M1 and M2
-	lda	#0
-	ldx	#$8
-	lsr	M1
-	clc
-imul_loop:
-	bcc	no_add
-	clc
-	adc	M2
-no_add:
-	ror
-	ror	M1
-	dex
-	bne	imul_loop
-
-	sta	M2
-	; done, high result in factor2, low result in factor1
-
-	; adjust to be signed
-	; if m1 and m2 positive, good
-	; if m1 and m2 negative, good
-	; otherwise, negate result
-
-	plp			; restore saved pos/neg value
-	bpl	done_result
-negate_result:
-	sec
-	lda	#0
-	sbc	M1
-	lda	#0
-	sbc	M2
-done_result:
-	sta	M2
-
-	ldx	TEMP
 	rts
+
+
+.include "multiply_tables.s"
+.include "multiply_s8x8.s"
+.include "multiply_u8x8.s"