second: forgot some more files

This commit is contained in:
Vince Weaver
2023-08-15 16:02:53 -04:00
parent fb72b9032c
commit 7df21a6912
7 changed files with 1380 additions and 11 deletions

View File

@@ -0,0 +1,16 @@
common_offsets_l:
.byte <$4000,<$4080,<$4100,<$4180,<$4200,<$4280,<$4300,<$4380
.byte <$4028,<$40a8,<$4128,<$41a8,<$4228,<$42a8,<$4328,<$43a8
.byte <$4050,<$40d0,<$4150,<$41d0,<$4250,<$42d0,<$4350,<$43d0
scrn_c00_offsets_h:
.byte >$4000,>$4080,>$4100,>$4180,>$4200,>$4280,>$4300,>$4380
.byte >$4028,>$40a8,>$4128,>$41a8,>$4228,>$42a8,>$4328,>$43a8
.byte >$4050,>$40d0,>$4150,>$41d0,>$4250,>$42d0,>$4350,>$43d0
gr_400_offsets_h:
.byte >$400,>$480,>$500,>$580,>$600,>$680,>$700,>$780
.byte >$428,>$4a8,>$528,>$5a8,>$628,>$6a8,>$728,>$7a8
.byte >$450,>$4d0,>$550,>$5d0,>$650,>$6d0,>$750,>$7d0

161
demos/second/gr_copy.s Normal file
View File

@@ -0,0 +1,161 @@
;=========================================================
; gr_copy_to_current, 40x48 version
;=========================================================
; copy 0x4000 to DRAW_PAGE
;
; 45 + 2 + 120*(8*9 + 5) -1 + 6 = 9292
;.align $100
gr_copy_to_current:
lda DRAW_PAGE ; 3
clc ; 2
adc #$4 ; 2
sta gr_copy_line+5 ; 4
sta gr_copy_line+11 ; 4
adc #$1 ; 2
sta gr_copy_line+17 ; 4
sta gr_copy_line+23 ; 4
adc #$1 ; 2
sta gr_copy_line+29 ; 4
sta gr_copy_line+35 ; 4
adc #$1 ; 2
sta gr_copy_line+41 ; 4
sta gr_copy_line+47 ; 4
;===========
; 45
ldy #119 ; for early ones, copy 120 bytes ; 2
gr_copy_line:
lda $4000,Y ; load a byte (self modified) ; 4
sta $400,Y ; store a byte (self modified) ; 5
lda $4080,Y ; load a byte (self modified) ; 4
sta $480,Y ; store a byte (self modified) ; 5
lda $4100,Y ; load a byte (self modified) ; 4
sta $500,Y ; store a byte (self modified) ; 5
lda $4180,Y ; load a byte (self modified) ; 4
sta $580,Y ; store a byte (self modified) ; 5
lda $4200,Y ; load a byte (self modified) ; 4
sta $600,Y ; store a byte (self modified) ; 5
lda $4280,Y ; load a byte (self modified) ; 4
sta $680,Y ; store a byte (self modified) ; 5
lda $4300,Y ; load a byte (self modified) ; 4
sta $700,Y ; store a byte (self modified) ; 5
lda $4380,Y ; load a byte (self modified) ; 4
sta $780,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_copy_line ; ; 2nt/3
rts ; 6
;=========================================================
; gr_copy_to_current, 40x48 version
;=========================================================
; copy 0x1000 to DRAW_PAGE
gr_copy_to_current_1000:
lda DRAW_PAGE ; 3
clc ; 2
adc #$4 ; 2
sta gr_copy_line_40+5 ; 4
sta gr_copy_line_40+11 ; 4
adc #$1 ; 2
sta gr_copy_line_40+17 ; 4
sta gr_copy_line_40+23 ; 4
adc #$1 ; 2
sta gr_copy_line_40+29 ; 4
sta gr_copy_line_40+35 ; 4
adc #$1 ; 2
sta gr_copy_line_40+41 ; 4
sta gr_copy_line_40+47 ; 4
;===========
; 45
ldy #119 ; for early ones, copy 120 bytes ; 2
gr_copy_line_40:
lda $1000,Y ; load a byte (self modified) ; 4
sta $400,Y ; store a byte (self modified) ; 5
lda $1080,Y ; load a byte (self modified) ; 4
sta $480,Y ; store a byte (self modified) ; 5
lda $1100,Y ; load a byte (self modified) ; 4
sta $500,Y ; store a byte (self modified) ; 5
lda $1180,Y ; load a byte (self modified) ; 4
sta $580,Y ; store a byte (self modified) ; 5
lda $1200,Y ; load a byte (self modified) ; 4
sta $600,Y ; store a byte (self modified) ; 5
lda $1280,Y ; load a byte (self modified) ; 4
sta $680,Y ; store a byte (self modified) ; 5
lda $1300,Y ; load a byte (self modified) ; 4
sta $700,Y ; store a byte (self modified) ; 5
lda $1380,Y ; load a byte (self modified) ; 4
sta $780,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_copy_line_40 ; ; 2nt/3
rts ; 6
;=========================================================
; gr_copy_to_current_40x40
;=========================================================
; Take image in 0xc00
; Copy to DRAW_PAGE
; Actually copy lines 0..39
; Don't over-write bottom 4 lines of text
gr_copy_to_current_40x40:
ldx #0
gc_40x40_loop:
lda gr_offsets,x
sta OUTL
sta INL
lda gr_offsets+1,x
clc
adc DRAW_PAGE
sta OUTH
lda gr_offsets+1,x
clc
adc #$8
sta INH
ldy #39
gc_40x40_inner:
lda (INL),Y
sta (OUTL),Y
dey
bpl gc_40x40_inner
inx
inx
cpx #40
bne gc_40x40_loop
rts ; 6

View File

@@ -0,0 +1,24 @@
;==========
; page_flip
;==========
page_flip:
lda DISP_PAGE ; 3
beq page_flip_show_1 ; 2nt/3
page_flip_show_0:
bit PAGE0 ; 4
lda #4 ; 2
sta DRAW_PAGE ; DRAW_PAGE=1 ; 3
lda #0 ; 2
sta DISP_PAGE ; DISP_PAGE=0 ; 3
rts ; 6
page_flip_show_1:
bit PAGE1 ; 4
sta DRAW_PAGE ; DRAW_PAGE=0 ; 3
lda #1 ; 2
sta DISP_PAGE ; DISP_PAGE=1 ; 3
rts ; 6
;====================
; DISP_PAGE=0 26
; DISP_PAGE=1 24

View File

@@ -0,0 +1,351 @@
; Fast mutiply
; Note for our purposes we only care about 8.8 x 8.8 fixed point
; with 8.8 result, which means we only care about the middle two bytes
; of the 32 bit result. So we disable generation of the high and low byte
; to save some cycles.
;
; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
; This routine, at an expense of 2kB of looku tables, takes around 250
; If you reuse a term the next time this drops closer to 200
; This routine was described by Stephen Judd and found
; in The Fridge and in the C=Hacking magazine
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
; The key thing to note is that
; (a+b)^2 (a-b)^2
; a*b = ------- - --------
; 4 4
; So if you have tables of the squares of 0..511 you can lookup and subtract
; instead of multiplying.
; Table generation: I:0..511
; square1_lo = <((I*I)/4)
; square1_hi = >((I*I)/4)
; square2_lo = <(((I-255)*(I-255))/4)
; square2_hi = >(((I-255)*(I-255))/4)
; Note: DOS3.3 starts at $9600
.ifndef square1_lo
square1_lo = $5400
square1_hi = $5600
square2_lo = $5800
square2_hi = $5A00
.endif
; for(i=0;i<512;i++) {
; square1_lo[i]=((i*i)/4)&0xff;
; square1_hi[i]=(((i*i)/4)>>8)&0xff;
; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff;
; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff;
; }
init_multiply_tables:
; Build the add tables
ldx #$00
txa
.byte $c9 ; CMP #immediate - skip TYA and clear carry flag
lb1: tya
adc #$00 ; 0
ml1: sta square1_hi,x ; square1_hi[0]=0
tay ; y=0
cmp #$40 ; subtract 64 and update flags (c=0)
txa ; a=0
ror ; rotate
ml9: adc #$00 ; add 0
sta ml9+1 ; update add value
inx ; x=1
ml0: sta square1_lo,x ; square1_lo[0]=1
bne lb1 ; if not zero, loop
inc ml0+2 ; increment values
inc ml1+2 ; increment values
clc ; c=0
iny ; y=1
bne lb1 ; loop
; Build the subtract tables based on the existing one
ldx #$00
ldy #$ff
second_table:
lda square1_hi+1,x
sta square2_hi+$100,x
lda square1_hi,x
sta square2_hi,y
lda square1_lo+1,x
sta square2_lo+$100,x
lda square1_lo,x
sta square2_lo,y
dey
inx
bne second_table
rts
; Fast 16x16 bit unsigned multiplication, 32-bit result
; Input: NUM1H:NUM1L * NUM2H:NUM2L
; Result: RESULT3:RESULT2:RESULT1:RESULT0
;
; Does self-modifying code to hard-code NUM1H:NUM1L into the code
; carry=0: re-use previous NUM1H:NUM1L
; carry=1: reload NUM1H:NUM1L (58 cycles slower)
;
; clobbered: RESULT, X, A, C
; Allocation setup: T1,T2 and RESULT preferably on Zero-page.
;
; NUM1H (x_i), NUM1L (x_f)
; NUM2H (y_i), NUM2L (y_f)
; NUM1L * NUM2L = AAaa
; NUM1L * NUM2H = BBbb
; NUM1H * NUM2L = CCcc
; NUM1H * NUM2H = DDdd
;
; AAaa
; BBbb
; CCcc
; + DDdd
; ----------
; RESULT
;fixed_16x16_mul_unsigned:
multiply:
bcc num1_same_as_last_time ; 2nt/3
;============================
; Set up self-modifying code
; this changes the code to be hard-coded to multiply by NUM1H:NUM1L
;============================
lda NUM1L ; load the low byte ; 3
sta sm1a+1 ; 3
sta sm3a+1 ; 3
sta sm5a+1 ; 3
sta sm7a+1 ; 3
eor #$ff ; invert the bits for subtracting ; 2
sta sm2a+1 ; 3
sta sm4a+1 ; 3
sta sm6a+1 ; 3
sta sm8a+1 ; 3
lda NUM1H ; load the high byte ; 3
sta sm1b+1 ; 3
sta sm3b+1 ; 3
sta sm5b+1 ; 3
; sta sm7b+1 ;
eor #$ff ; invert the bits for subtractin ; 2
sta sm2b+1 ; 3
sta sm4b+1 ; 3
sta sm6b+1 ; 3
; sta sm8b+1 ;
;===========
; 52
num1_same_as_last_time:
;==========================
; Perform NUM1L * NUM2L = AAaa
;==========================
ldx NUM2L ; (low le) ; 3
sec ; 2
sm1a:
lda square1_lo,x ; 4
sm2a:
sbc square2_lo,x ; 4
; a is _aa
; sta RESULT+0 ;
sm3a:
lda square1_hi,x ; 4
sm4a:
sbc square2_hi,x ; 4
; a is _AA
sta _AA+1 ; 3
;===========
; 24
; Perform NUM1H * NUM2L = CCcc
sec ; 2
sm1b:
lda square1_lo,x ; 4
sm2b:
sbc square2_lo,x ; 4
; a is _cc
sta _cc+1 ; 3
sm3b:
lda square1_hi,x ; 4
sm4b:
sbc square2_hi,x ; 4
; a is _CC
sta _CC+1 ; 3
;===========
; 24
;==========================
; Perform NUM1L * NUM2H = BBbb
;==========================
ldx NUM2H ; 3
sec ; 2
sm5a:
lda square1_lo,x ; 4
sm6a:
sbc square2_lo,x ; 4
; a is _bb
sta _bb+1 ; 3
sm7a:
lda square1_hi,x ; 4
sm8a:
sbc square2_hi,x ; 4
; a is _BB
sta _BB+1 ; 3
;===========
; 27
;==========================
; Perform NUM1H * NUM2H = DDdd
;==========================
sec ; 2
sm5b:
lda square1_lo,x ; 4
sm6b:
sbc square2_lo,x ; 4
; a is _dd
sta _dd+1 ; 3
;sm7b:
; lda square1_hi,x ;
;sm8b:
; sbc square2_hi,x ;
; a = _DD
; sta RESULT+3 ;
;===========
; 13
;===========================================
; Add the separate multiplications together
;===========================================
clc ; 2
_AA:
lda #0 ; loading _AA ; 2
_bb:
adc #0 ; adding in _bb ; 2
sta RESULT+1 ; 3
;==========
; 9
; product[2]=_BB+_CC+c
_BB:
lda #0 ; loading _BB ; 2
_CC:
adc #0 ; adding in _CC ; 2
sta RESULT+2 ; 3
;===========
; 7
; product[3]=_DD+c
; bcc dd_no_carry1 ;
; inc RESULT+3 ;
clc ; 2
;=============
; 2
dd_no_carry1:
; product[1]=_AA+_bb+_cc
_cc:
lda #0 ; load _cc ; 2
adc RESULT+1 ; 3
sta RESULT+1 ; 3
; product[2]=_BB+_CC+_dd+c
_dd:
lda #0 ; load _dd ; 2
adc RESULT+2 ; 3
sta RESULT+2 ; 3
;===========
; 16
; product[3]=_DD+c
; bcc dd_no_carry2 ;
; inc RESULT+3 ;
;=============
; 0
dd_no_carry2:
; *z_i=product[1];
; *z_f=product[0];
; rts ; 6
;=================
; Signed multiply
;=================
;multiply:
; jsr fixed_16x16_mul_unsigned ; 6
lda NUM1H ; x_i ; 3
;===========
; 12
bpl x_positive ;^3/2nt
sec ; 2
lda RESULT+2 ; 3
sbc NUM2L ; 3
sta RESULT+2 ; 3
; lda RESULT+3 ;
; sbc NUM2H ;
; sta RESULT+3 ;
;============
; 10
x_positive:
lda NUM2H ; y_i ; 3
;============
; ; 6
bpl y_positive ;^3/2nt
sec ; 2
lda RESULT+2 ; 3
sbc NUM1L ; 3
sta RESULT+2 ; 3
; lda RESULT+3 ;
; sbc NUM1H ;
; sta RESULT+3 ;
;===========
; 10
y_positive:
ldx RESULT+2 ; *z_i=product[2]; ; 3
lda RESULT+1 ; *z_f=product[1]; ; 3
rts ; 6
;==========
; 12

187
demos/second/roto.s Normal file
View File

@@ -0,0 +1,187 @@
; do a (hopefully fast) roto-zoom
do_rotozoom:
;================================
; Clear screen and setup graphics
;================================
bit PAGE0 ; set page 0
bit LORES ; Lo-res graphics
lda #0
sta DISP_PAGE
lda #4
sta DRAW_PAGE
;===================================
; Clear top/bottom of page 0 and 1
;===================================
jsr clear_screens
;===================================
; init the multiply tables
;===================================
jsr init_multiply_tables
;======================
; show the title screen
;======================
; Title Screen
title_screen:
load_background:
;===========================
; Clear both bottoms
; jsr clear_bottoms
;=============================
; Load title
lda #<lens_zx02
sta zx_src_l+1
lda #>lens_zx02
sta zx_src_h+1
lda #$40
jsr zx02_full_decomp
;=================================
; copy to both pages
jsr gr_copy_to_current
jsr page_flip
jsr gr_copy_to_current
jsr wait_until_keypress
;=================================
; main loop
lda #0
sta ANGLE
sta SCALE_F
sta FRAMEL
lda #1
sta SCALE_I
main_loop:
jsr rotozoom
jsr page_flip
lda KEYPRESS ; 4
bpl no_keypress
bit KEYRESET ; clear the keyboard buffer
rts
no_keypress:
clc
lda FRAMEL
adc direction
sta FRAMEL
cmp #$f8
beq back_at_zero
cmp #33
beq at_far_end
bne done_reverse
back_at_zero:
at_far_end:
; change bg color
lda roto_color_even_smc+1
clc
adc #$01
and #$0f
sta roto_color_even_smc+1
lda roto_color_odd_smc+1
clc
adc #$10
and #$f0
sta roto_color_odd_smc+1
; reverse direction
lda direction
eor #$ff
clc
adc #1
sta direction
lda scaleaddl
eor #$ff
clc
adc #1
sta scaleaddl
lda scaleaddh
eor #$ff
adc #0
sta scaleaddh
done_reverse:
clc
lda ANGLE
adc direction
and #$1f
sta ANGLE
clc
lda SCALE_F
adc scaleaddl
sta SCALE_F
lda SCALE_I
adc scaleaddh
sta SCALE_I
jmp main_loop
direction: .byte $01
scaleaddl: .byte $10
scaleaddh: .byte $00
;===============================================
; External modules
;===============================================
.include "rotozoom.s"
.include "gr_pageflip.s"
;.include "gr_fast_clear.s"
.include "gr_copy.s"
.include "gr_offsets.s"
.include "c00_scrn_offsets.s"
.include "multiply_fast.s"
;===============================================
; Data
;===============================================
lens_zx02:
.incbin "graphics/lenspic.gr.zx02"

629
demos/second/rotozoom.s Normal file
View File

@@ -0,0 +1,629 @@
; rotozoomer!
; takes a lores-formatted image in $c00 and rotozooms it
; by ANGLE and SCALE_I/SCALE_F and draws it to the
; lo-res page in DRAW_PAGE
; ANGLE in our case is 0..31
; SCALE_I/SCALE_F is 8.8 fixed point scale multiplier
; optimization (cycles measured at ANGLE=0)
; $6BD76=441,718=2.26fps initial code with external plot and scrn
; $62776=403,318=2.48fps inline plot
; $597b6=366,518=2.73fps inline scrn
; $4F496=324,758=3.08fps move plot line calc outside of inner loop
; $49d16=302,358=3.31fps do color*17 ourselves
; $4645e=287,838=3.47fps move XX into X
; $3ef7e=257,918=3.87fps optimize plot
; $3c9fe=248,318=4.03fps optimize scrn
; $39e3e=237,118=4.22fps add scrn address lookup table
; $39fdf=237,535 add two scale multiplies
; $39e17=237,079=4.22fps change the init to also use multiply
; $39dc9=237,001= change to use common lookup table (outside inner loop)
; $3399f=211,359=4.73fps unroll the Y loop by one
; $2BA83=178,819=5.59fps optimize unrolled loop
; $2B14B=176,459=5.66fps avoid extra jump (qkumba)
CAL = $B0
CAH = $B1
SAL = $B2
SAH = $B3
YPL = $B4
YPH = $B5
XPL = $B6
XPH = $B7
;YY
;XX
CCAL = $B8
CCAH = $B9
CSAL = $BA
CSAH = $BB
YCAL = $BC
YCAH = $BD
YSAL = $BE
YSAH = $BF
rotozoom:
; setup scale for multiply
lda SCALE_I ; 3
sta NUM1H ; 3
lda SCALE_F ; 3
sta NUM1L ; 3
; ca = cos(theta)*scale;
; (we use equiv ca=fixed_sin[(theta+8)&0xf] )
lda ANGLE ; 3
clc ; 2
adc #8 ; 2
and #$1f ; 2
asl ; 2
tay ; 2
lda fixed_sin,Y ; load integer half ; 4
sta NUM2H ; 3
lda fixed_sin+1,Y ; load float half ; 4
sta NUM2L ; 3
;===========
; 27
sec ; reload NUM1H/NUM1L ; 2
jsr multiply ; 6+???
stx CAH ; 3
sta CAL ; 3
; sa = sin(theta)*scale;
lda ANGLE ; 3
asl ; 2
tay ; 2
lda fixed_sin,Y ; load integer half ; 4
sta NUM2H ; 3
lda fixed_sin+1,Y ; load integer half ; 4
sta NUM2L ; 3
;==========
; 21
clc ; NUM1H/NUM1L same as last time ; 2
jsr multiply ; 6+???
stx SAH ; 3
sta SAL ; 3
; cca = -20*ca;
lda #<(-20) ; 2
sta NUM1H ; 3
lda #0 ; 2
sta NUM1L ; 3
lda CAL ; 3
sta NUM2L ; 3
lda CAH ; 3
sta NUM2H ; 3
sec ; reload NUM1H/NUM1L ; 2
jsr multiply ; 6+???
stx CCAH ; 3
sta CCAL ; 3
; csa = -20*sa;
lda SAL ; 3
sta NUM2L ; 3
lda SAH ; 3
sta NUM2H ; 3
clc ; same NUM1H/NUM1L as las time ; 2
jsr multiply ; 6+???
stx CSAH ; 3
sta CSAL ; 3
; yca=cca+ycenter;
lda CCAL ; 3
sta YCAL ; 3
clc ; 2
lda CCAH ; 3
adc #20 ; 2
sta YCAH ; 3
;===========
; 16
; ysa=csa+xcenter;
lda CSAL ; 3
sta YSAL ; 3
clc ; 2
lda CSAH ; 3
adc #20 ; 2
sta YSAH ; 3
;===========
; 16
; yloop, unrolled once
;===================================================================
; for(yy=0;yy<40;yy++) {
;===================================================================
ldy #0 ; 2
sty YY ; 3
rotozoom_yloop:
; setup self-modifying code for plot
; YY already in Y from end of loop
; ldy YY ; 3
lda common_offsets_l,Y ; lookup low-res memory address ; 4
sta rplot2_smc+1 ; 4
sta rplot12_smc+1 ; 4
sta rplot22_smc+1 ; 4
clc ; 2
lda gr_400_offsets_h,Y ; 4
adc DRAW_PAGE ; add in draw page offset ; 3
sta rplot2_smc+2 ; 4
sta rplot12_smc+2 ; 4
sta rplot22_smc+2 ; 4
;=====================
; unroll 0, even line
;=====================
; xp=cca+ysa; 8.8 fixed point
clc ; 2
lda YSAL ; 3
adc CCAL ; 3
sta XPL ; 3
lda YSAH ; 3
adc CCAH ; 3
sta XPH ; 3
;==========
; 20
; yp=yca-csa; 8.8 fixed point
sec ; 2
lda YCAL ; 3
sbc CSAL ; 3
sta YPL ; 3
lda YCAH ; 3
sbc CSAH ; 3
sta YPH ; 3
;===========
; 20
; for(xx=0;xx<40;xx++) {
ldx #0 ; 2
rotozoom_xloop:
;===================================================================
;===================================================================
; note: every cycle saved below here
; saves 1600 cycles
;===================================================================
;===================================================================
; if ((xp<0) || (xp>39)) color=0;
; else if ((yp<0) || (yp>39)) color=0;
; else color=scrn_page(xp,yp,PAGE2);
; we know it's never going to go *that* far out of bounds
; so we could avoid the Y check by just having "0"
; on the edges of the screen? Tricky due to Apple II
; interlacing
roto_color_even_smc:
lda #0 ; default color ; 2
ldy XPH ; 3
bmi rplot ; 2nt/3
cpy #40 ; 2
bcs rplot ; 2nt/3
ldy YPH ; 3
bmi rplot ; 2nt/3
cpy #48 ; 2
bcs rplot ; 2nt/3
scrn1:
;==================================================
; scrn(xp,yp)
tya ; YPH ; 2
lsr ; divide to get index, also low bit in carry ; 2
tay ; 2
; TODO: put these in zero page?
; also we can share low bytes with other lookup
lda common_offsets_l,Y ; lookup low-res memory address ; 4
sta BASL ; 3
lda scrn_c00_offsets_h,Y ; 4
sta BASH ; 3
ldy XPH ; 3
lda (BASL),Y ; top/bottom color ; 5+
; carry was set a bit before to low bit of YPH
; hopefully nothing has cleared it
bcc rscrn_adjust_even ; 2nt/3
rscrn_adjust_odd:
; YP was odd so want top nibble
lsr ; 2
lsr ; 2
lsr ; 2
lsr ; 2
; fall through
rscrn_adjust_even:
; YP was even so want bottom nibble
and #$f ; 2
rscrn_done:
;=============================================
; always even, want A in bottom of nibble
; so we are all set
rotozoom_set_color:
; want same color in top and bottom nibbles
;==========
; 0
;=================================================
rplot:
; plot(xx,yy); (color is in A)
; we are in loop unroll0 so always even line here
; meaning we want to load old color, save top nibble, and over-write
; bottom nibble with our value
; but! we don't need to save old as we are re-drawing whole screen!
rplot_even:
rplot2_smc:
sta $400,X ; 5
;============
; 5
;=======================
; xp=xp+ca; fixed point 8.8
clc ; 2
lda CAL ; 3
adc XPL ; 3
sta XPL ; 3
lda CAH ; 3
adc XPH ; 3
sta XPH ; 3
; yp=yp-sa; fixed point 8.8
sec ; 2
lda YPL ; 3
sbc SAL ; 3
sta YPL ; 3
lda YPH ; 3
sbc SAH ; 3
sta YPH ; 3
rotozoom_end_xloop:
inx ; 2
cpx #40 ; 2
bne rotozoom_xloop ; 2nt/3
rotozoom_xloop_done:
; yca+=ca; 8.8 fixed point
clc ; 2
lda YCAL ; 3
adc CAL ; 3
sta YCAL ; 3
lda YCAH ; 3
adc CAH ; 3
sta YCAH ; 3
;===========
; 20
; ysa+=sa; 8.8 fixed point
clc ; 2
lda YSAL ; 3
adc SAL ; 3
sta YSAL ; 3
lda YSAH ; 3
adc SAH ; 3
sta YSAH ; 3
;==========
; 20
;===============
; loop unroll 1
;===============
;rotozoom_yloop:
; xp=cca+ysa; 8.8 fixed point
clc ; 2
lda YSAL ; 3
adc CCAL ; 3
sta XPL ; 3
lda YSAH ; 3
adc CCAH ; 3
sta XPH ; 3
;==========
; 20
; yp=yca-csa; 8.8 fixed point
sec ; 2
lda YCAL ; 3
sbc CSAL ; 3
sta YPL ; 3
lda YCAH ; 3
sbc CSAH ; 3
sta YPH ; 3
;===========
; 20
; for(xx=0;xx<40;xx++) {
ldx #0 ; 2
rotozoom_xloop2:
;===================================================================
;===================================================================
; note: every cycle saved below here
; saves 1600 cycles
;===================================================================
;===================================================================
; if ((xp<0) || (xp>39)) color=0;
; else if ((yp<0) || (yp>39)) color=0;
; else color=scrn_page(xp,yp,PAGE2);
; we know it's never going to go *that* far out of bounds
; so we could avoid the Y check by just having "0"
; on the edges of the screen? Tricky due to Apple II
; interlacing
roto_color_odd_smc:
lda #0 ; default color ; 2
ldy XPH ; 3
bmi rplot2 ; 2nt/3
cpy #40 ; 2
bcs rplot2 ; 2nt/3
ldy YPH ; 3
bmi rplot2 ; 2nt/3
cpy #48 ; 2
bcs rplot2 ; 2nt/3
;==================================================
; scrn(xp,yp)
tya ; YPH ; 2
lsr ; divide to get index, also low bit in carry ; 2
tay ; 2
; TODO: put these in zero page?
; also we can share low bytes with other lookup
lda common_offsets_l,Y ; lookup low-res memory address ; 4
sta BASL ; 3
lda scrn_c00_offsets_h,Y ; 4
sta BASH ; 3
ldy XPH ; 3
lda (BASL),Y ; top/bottom color ; 5+
; carry was set a bit before to low bit of YPH
; hopefully nothing has cleared it
bcs rscrn_adjust_odd2 ; 3
rscrn_adjust_even2:
; want bottom color, but put it in top of A
asl ; 2
asl ; 2
asl ; 2
asl ; 2
jmp rscrn_done2 ; 3
rscrn_adjust_odd2:
; want top color alone
and #$f0 ; 2
rscrn_done2:
;=============================================
rotozoom_set_color2:
; always odd
; want color in top, which it is from above
;==========
; 0
;=================================================
rplot2:
; plot(xx,yy); (color is in A)
; always odd, so place color in top
; note! since we are drawing whole screen, we know the top of
; the value is already clear from loop=0 so we don't have to mask
rplot_odd:
rplot12_smc:
ora $400,X ; 4
rplot22_smc:
sta $400,X ; 5
;============
; 9
;=======================
; xp=xp+ca; 8.8 fixed point
clc ; 2
lda CAL ; 3
adc XPL ; 3
sta XPL ; 3
lda CAH ; 3
adc XPH ; 3
sta XPH ; 3
; yp=yp-sa; 8.8 fixed point
sec ; 2
lda YPL ; 3
sbc SAL ; 3
sta YPL ; 3
lda YPH ; 3
sbc SAH ; 3
sta YPH ; 3
rotozoom_end_xloop2:
inx ; 2
cpx #40 ; 2
bne rotozoom_xloop2 ; 3
rotozoom_xloop_done2:
; yca+=ca; 8.8 fixed point
clc ; 2
lda YCAL ; 3
adc CAL ; 3
sta YCAL ; 3
lda YCAH ; 3
adc CAH ; 3
sta YCAH ; 3
;===========
; 20
; ysa+=sa; 8.8 fixed point
clc ; 2
lda YSAL ; 3
adc SAL ; 3
sta YSAL ; 3
lda YSAH ; 3
adc SAH ; 3
sta YSAH ; 3
;==========
; 20
rotozoom_end_yloop:
inc YY ; 5
ldy YY ; 3
cpy #24 ; 2
beq done_rotozoom ; 2nt/3
jmp rotozoom_yloop ; too far ; 3
done_rotozoom:
rts ; 6
fixed_sin:
; .byte $00,$00 ; 0.000000=00.00
; .byte $00,$61 ; 0.382683=00.61
; .byte $00,$b5 ; 0.707107=00.b5
; .byte $00,$ec ; 0.923880=00.ec
; .byte $01,$00 ; 1.000000=01.00
; .byte $00,$ec ; 0.923880=00.ec
; .byte $00,$b5 ; 0.707107=00.b5
; .byte $00,$61 ; 0.382683=00.61
; .byte $00,$00 ; 0.000000=00.00
; .byte $ff,$9f ; -0.382683=ff.9f
; .byte $ff,$4b ; -0.707107=ff.4b
; .byte $ff,$14 ; -0.923880=ff.14
; .byte $ff,$00 ; -1.000000=ff.00
; .byte $ff,$14 ; -0.923880=ff.14
; .byte $ff,$4b ; -0.707107=ff.4b
; .byte $ff,$9f ; -0.382683=ff.9f
.byte $00,$00 ; 0.000000
.byte $00,$31 ; 0.195090
.byte $00,$61 ; 0.382683
.byte $00,$8E ; 0.555570
.byte $00,$B5 ; 0.707107
.byte $00,$D4 ; 0.831470
.byte $00,$EC ; 0.923880
.byte $00,$FB ; 0.980785
.byte $01,$00 ; 1.000000
.byte $00,$FB ; 0.980785
.byte $00,$EC ; 0.923880
.byte $00,$D4 ; 0.831470
.byte $00,$B5 ; 0.707107
.byte $00,$8E ; 0.555570
.byte $00,$61 ; 0.382683
.byte $00,$31 ; 0.195090
.byte $00,$00 ; 0.000000
.byte $FF,$CF ; -0.195090
.byte $FF,$9F ; -0.382683
.byte $FF,$72 ; -0.555570
.byte $FF,$4B ; -0.707107
.byte $FF,$2C ; -0.831470
.byte $FF,$14 ; -0.923880
.byte $FF,$05 ; -0.980785
.byte $FF,$00 ; -1.000000
.byte $FF,$05 ; -0.980785
.byte $FF,$14 ; -0.923880
.byte $FF,$2C ; -0.831470
.byte $FF,$4B ; -0.707107
.byte $FF,$72 ; -0.555570
.byte $FF,$9F ; -0.382683
.byte $FF,$CF ; -0.195090

View File

@@ -259,17 +259,6 @@ load_loop:
jsr wait_until_keypress
;================================
; ROTO
;================================
jsr do_rotozoom
bit HIRES
bit FULLGR
bit PAGE0
;================================
; MNTSCRL3 (gorilla looking guy)
;================================
@@ -289,6 +278,18 @@ load_loop:
jsr wait_until_keypress
;================================
; ROTO
;================================
jsr do_rotozoom
bit HIRES
bit FULLGR
bit PAGE0
;================================
; FINAL3 (leaves)
;================================