mode7_demo: move all code into one dir

if I mess around with common asm_routines it will mess with the
size optimized demo.  So lock in place the files and make a copy here.
Should have done this once when I was prepping the PoC||GTFO code.
This commit is contained in:
Vince Weaver 2018-05-21 11:18:33 -04:00
parent 517a7600a2
commit 60c3137e62
16 changed files with 1519 additions and 18 deletions

View File

@ -36,12 +36,12 @@ MODE7_DEMO: mode7_demo.o
mode7_demo.o: mode7_demo.s mode7_demo_backgrounds.inc sprites.inc \
a2.scrolltext deater.scrolltext wave.krg \
../asm_routines/gr_unrle.s \
../asm_routines/gr_fast_clear.s \
gr_unrle.s \
gr_fast_clear.s \
gr_setpage.s \
../asm_routines/gr_fade.s \
../asm_routines/gr_copy.s \
../asm_routines/gr_scroll.s \
gr_fade.s \
gr_copy.s \
gr_scroll.s \
mockingboard.s \
credits.s mode7.s rasterbars.s starfield_demo.s \
interrupt_handler.s \

79
mode7_demo/bg_scroll.s Normal file
View File

@ -0,0 +1,79 @@
;==================
; scroll background
;==================
; background already loaded
; ANGLE 0-15 sets angle
; CV is Y position to display at
; 182/220... 220/16 = 13.75
; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
; 0 11 22 34 45 56 68 79 91 102 113 125 136 147 159 170 182
scroll_offsets:
.byte 0,11,22,34,45,56,68,79,91,102,113,125,136,147,159,170
scroll_background:
ldy ANGLE ; use angle
lda scroll_offsets,Y ; lookup in table
sta OFFSET ; calculate offset
ldx #0
ldy CV ; lookup Y co-ord
lsr ; and set up self-modify code
lda gr_offsets,Y ; get position
sta bgsm1+1
lda gr_offsets+2,Y ; get position
sta bgsm2+1
lda gr_offsets+4,Y ; get position
sta bgsm3+1
lda gr_offsets+6,Y ; get position
sta bgsm4+1
iny
clc
lda gr_offsets,Y ; get position
adc DRAW_PAGE
sta bgsm1+2
lda gr_offsets+2,Y ; get position
adc DRAW_PAGE
sta bgsm2+2
lda gr_offsets+4,Y ; get position
adc DRAW_PAGE
sta bgsm3+2
lda gr_offsets+6,Y ; get position
adc DRAW_PAGE
sta bgsm4+2
ldy OFFSET
bgdraw_loop:
lda scroll_row1,Y
bgsm1:
sta $400,X
lda scroll_row2,Y
bgsm2:
sta $480,X
lda scroll_row3,Y
bgsm3:
sta $500,X
lda scroll_row4,Y
bgsm4:
sta $580,X
iny
inx
cpx #40
bne bgdraw_loop
rts

53
mode7_demo/gr_copy.s Normal file
View File

@ -0,0 +1,53 @@
;=========================================================
; gr_copy_to_current
;=========================================================
; copy 0xc00 to DRAW_PAGE
; ORIGINAL: 2 + 8*38 + 4*80*23 + 4*120*26 + 13 = 20,159 = 20ms = 50Hz
;
; OPTIMIZED: 2+ 8*38 + 9*4 + 7*4 + 14*120*4 + 14*80*4 + 9*8 + 6 =
; 11,648 = 11ms = 90Hz
gr_copy_to_current:
ldx #0 ; set ypos to zero ; 2
gr_copy_loop:
lda gr_offsets,X ; lookup low byte for line addr ; 4+
sta gr_copy_line+1 ; out and in are the same ; 4
sta gr_copy_line+4 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
clc ; 2
adc DRAW_PAGE ; 3
sta gr_copy_line+5 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
adc #$8 ; for now, fixed 0xc ; 2
sta gr_copy_line+2 ; 4
ldy #0 ; set xpos counter to 0 ; 2
cpx #$8 ; don't want to copy bottom 4*40 ; 2
bcs gr_copy_above4 ; 2nt/3
gr_copy_below4:
ldy #119 ; for early ones, copy 120 bytes ; 2
bcc gr_copy_line ; ; 3
gr_copy_above4: ; for last four, just copy 80 bytes
ldy #79 ; 2
gr_copy_line:
lda $ffff,Y ; load a byte (self modified) ; 4+
sta $ffff,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_copy_line ; ; 2nt/3
gr_copy_line_done:
inx ; increment ypos value ; 2
inx ; twice, as address is 2 bytes ; 2
cpx #16 ; there are 8*2 of them ; 2
bne gr_copy_loop ; if not, loop ; 3
rts ; 6

191
mode7_demo/gr_fade.s Normal file
View File

@ -0,0 +1,191 @@
;============================================
; gr, "fade" out. Badly fake a pallette fade
;============================================
; Image to fade out should be in $C00
fade_out:
lda #<fade_lookup
sta GBASL
lda #>fade_lookup
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+16)
sta GBASL
lda #>(fade_lookup+16)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+32)
sta GBASL
lda #>(fade_lookup+32)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+48)
sta GBASL
lda #>(fade_lookup+48)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
rts
;===========================================
; gr, "fade" in. Badly fake a pallette fade
;===========================================
; Image to fade in should be in $C00
fade_in:
lda #<(fade_lookup+48)
sta GBASL
lda #>(fade_lookup+48)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+32)
sta GBASL
lda #>(fade_lookup+32)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+16)
sta GBASL
lda #>(fade_lookup+16)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
lda #<(fade_lookup+0)
sta GBASL
lda #>(fade_lookup+0)
sta GBASH
jsr gr_fade
jsr page_flip
lda #200
jsr WAIT
rts
;================================================
; Fade in/out lowres graphics
; GR image should be in $C00
; pointer to fade table in GBASL/GBASH
gr_fade:
ldx #0 ; set ypos to zero ; 2
gr_fade_loop:
lda gr_offsets,X ; lookup low byte for line addr ; 4+
sta gr_fade_line1+1 ; out and in are the same ; 4
sta gr_fade_line2+1 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
clc ; 2
adc DRAW_PAGE ; 3
sta gr_fade_line2+2 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
adc #$8 ; for now, fixed 0xc ; 2
sta gr_fade_line1+2 ; 4
ldy #0 ; set xpos counter to 0 ; 2
cpx #$8 ; don't want to copy bottom 4*40 ; 2
bcs gr_fade_above4 ; 2nt/3
gr_fade_below4:
ldy #119 ; for early ones, copy 120 bytes ; 2
bcc gr_fade_line1 ; ; 3
gr_fade_above4: ; for last four, just copy 80 bytes
ldy #79 ; 2
gr_fade_line1:
lda $ffff,Y ; load a byte (self modified) ; 4+
pha
sty TEMPY ; save Y
; do high nibble
and #$f0
lsr
lsr
lsr
lsr
tay
lda (GBASL),Y
and #$f0
sta TEMP
; do low nibble
pla
and #$0f
tay
lda (GBASL),Y
and #$0f
ora TEMP
ldy TEMPY ; restore Y
gr_fade_line2:
sta $ffff,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_fade_line1 ; ; 2nt/3
gr_fade_line_done:
inx ; increment ypos value ; 2
inx ; twice, as address is 2 bytes ; 2
cpx #16 ; there are 8*2 of them ; 2
bne gr_fade_loop ; if not, loop ; 3
rts ; 6
; Fade paramaters
fade_lookup:
.byte $00,$11,$22,$33,$44,$55,$66,$77, $88,$99,$aa,$bb,$cc,$dd,$ee,$ff
.byte $00,$00,$00,$22,$00,$00,$22,$55, $55,$88,$55,$33,$44,$88,$44,$77
.byte $00,$00,$00,$00,$00,$00,$55,$00, $00,$00,$00,$00,$88,$00,$00,$55
.byte $00,$00,$00,$00,$00,$00,$00,$00, $00,$00,$00,$00,$00,$00,$00,$00

192
mode7_demo/gr_fast_clear.s Normal file
View File

@ -0,0 +1,192 @@
clear_screens:
;===================================
; Clear top/bottom of page 0
;===================================
lda #$0
sta DRAW_PAGE
jsr clear_top
jsr clear_bottom
;===================================
; Clear top/bottom of page 1
;===================================
lda #$4
sta DRAW_PAGE
jsr clear_top
jsr clear_bottom
rts
clear_bottoms:
;===================================
; Clear bottom of page 0
;===================================
lda #$0
sta DRAW_PAGE
jsr clear_bottom
;===================================
; Clear bottom of page 1
;===================================
lda #$4
sta DRAW_PAGE
jsr clear_bottom
rts
;=========================================================
; clear_top
;=========================================================
; clear DRAW_PAGE
; original = 14,558 cycles(?) 15ms, 70Hz
; OPTIMIZED MAX (page0,48rows): 45*120+4+6 = 5410 = 5.4ms 185Hz
; (pageX,40rows): 50*120+4+6 = 6010 = 6.0ms 166Hz
; 50*120+4+6+37 = 6055 = 6.0ms 166Hz
clear_top:
lda #0 ; 2
clear_top_a:
sta COLOR ; 3
clc ; 2
lda DRAW_PAGE ; 3
adc #4 ; 2
sta __ctf+2 ; 3
sta __ctf+5 ; 3
adc #1 ; 2
sta __ctf+8 ; 3
sta __ctf+11 ; 3
adc #1 ; 2
sta __ctf2+2 ; 3
sta __ctf2+5 ; 3
adc #1 ; 2
sta __ctf2+8 ; 3
sta __ctf2+11 ; 3
ldy #120 ; 2
lda COLOR ; 3
clear_top_fast_loop:
__ctf:
sta $400,Y ; 5
sta $480,Y ; 5
sta $500,Y ; 5
sta $580,Y ; 5
cpy #80 ; 2
bpl no_draw_bottom ; 2nt/3
__ctf2:
sta $600,Y ; 5
sta $680,Y ; 5
sta $700,Y ; 5
sta $780,Y ; 5
no_draw_bottom:
dey ; 2
bpl clear_top_fast_loop ; 2nt/3
rts ; 6
;=========================================================
; clear_bottom
;=========================================================
; clear bottom of draw page
clear_bottom:
clc ; 2
lda DRAW_PAGE ; 3
adc #6 ; 2
sta __cbf2+2 ; 3
sta __cbf2+5 ; 3
adc #1 ; 2
sta __cbf2+8 ; 3
sta __cbf2+11 ; 3
ldy #120 ; 2
lda #$a0 ; Normal Space ; 2
clear_bottom_fast_loop:
__cbf2:
sta $600,Y ; 5
sta $680,Y ; 5
sta $700,Y ; 5
sta $780,Y ; 5
dey ; 2
cpy #80 ; 2
bpl clear_bottom_fast_loop ; 2nt/3
rts ; 6
clear_screens_notext:
;===================================
; Clear top/bottom of page 0
;===================================
lda #$0
sta DRAW_PAGE
jsr clear_all
;===================================
; Clear top/bottom of page 1
;===================================
lda #$4
sta DRAW_PAGE
jsr clear_all
rts
;=========================================================
; clear_all
;=========================================================
; clear 48 rows
clear_all:
clc ; 2
lda DRAW_PAGE ; 3
adc #4 ; 2
sta __caf+2 ; 3
sta __caf+5 ; 3
adc #1 ; 2
sta __caf+8 ; 3
sta __caf+11 ; 3
adc #1 ; 2
sta __caf2+2 ; 3
sta __caf2+5 ; 3
adc #1 ; 2
sta __caf2+8 ; 3
sta __caf2+11 ; 3
ldy #120 ; 2
clear_all_color:
lda #0 ; 2
clear_all_fast_loop:
__caf:
sta $400,Y ; 5
sta $480,Y ; 5
sta $500,Y ; 5
sta $580,Y ; 5
__caf2:
sta $600,Y ; 5
sta $680,Y ; 5
sta $700,Y ; 5
sta $780,Y ; 5
dey ; 2
bpl clear_all_fast_loop ; 2nt/3
rts ; 6

View File

@ -0,0 +1,67 @@
;=====================================================================
;= ROUTINES
;=====================================================================
;================================
; hlin_setup
;================================
; put address in GBASL/GBASH
; Ycoord in A, Xcoord in Y
hlin_setup:
sty TEMPY ; 3
tay ; y=A ; 2
lda gr_offsets,Y ; lookup low-res memory address ; 4
clc ; 2
adc TEMPY ; 3
sta GBASL ; 3
iny ; 2
lda gr_offsets,Y ; 4
adc DRAW_PAGE ; add in draw page offset ; 3
sta GBASH ; 3
rts ; 6
;===========
; 35
;================================
; hlin_double:
;================================
; HLIN Y, V2 AT A
; Y, X, A trashed
; start at Y, draw up to and including X
hlin_double:
;int hlin_double(int page, int x1, int x2, int at) {
jsr hlin_setup ; 41
sec ; 2
lda V2 ; 3
sbc TEMPY ; 3
tax ; 2
inx ; 2
;===========
; 53
; fallthrough
;=================================
; hlin_double_continue: width
;=================================
; GBASL has correct offset for row/col
; width in X
hlin_double_continue:
ldy #0 ; 2
lda COLOR ; 3
hlin_double_loop:
sta (GBASL),Y ; 6
inc GBASL ; 5
dex ; 2
bne hlin_double_loop ; 2nt/3
rts ; 6
;=============
; 53+5+X*16+5

5
mode7_demo/gr_offsets.s Normal file
View File

@ -0,0 +1,5 @@
; move these to zero page for slight speed increase?
gr_offsets:
.word $400,$480,$500,$580,$600,$680,$700,$780
.word $428,$4a8,$528,$5a8,$628,$6a8,$728,$7a8
.word $450,$4d0,$550,$5d0,$650,$6d0,$750,$7d0

48
mode7_demo/gr_plot.s Normal file
View File

@ -0,0 +1,48 @@
;================================
; plot routine
;================================
; Xcoord in XPOS
; Ycoord in YPOS
; color in COLOR
plot:
lda YPOS ; 2
lsr ; shift bottom bit into carry ; 2
bcc plot_even ; 2nt/3
plot_odd:
ldx #$f0 ; 2
bcs plot_c_done ; 2nt/3
plot_even:
ldx #$0f ; 2
plot_c_done:
stx MASK ; 3
asl ; shift back (now even) ; 2
tay
lda gr_offsets,Y ; lookup low-res memory address ; 4
clc ; 2
adc XPOS ; 3
sta GBASL ; 3
iny ; 2
lda gr_offsets,Y ; 4
adc DRAW_PAGE ; add in draw page offset ; 3
sta GBASH ; 3
ldy #0 ; 2
plot_write:
lda MASK ; 3
eor #$ff ; 2
and (GBASL),Y ; 5
sta COLOR_MASK ; 3
lda COLOR ; 3
and MASK ; 3
ora COLOR_MASK ; 3
sta (GBASL),Y ; 5
rts ; 6

101
mode7_demo/gr_putsprite.s Normal file
View File

@ -0,0 +1,101 @@
;=============================================
; put_sprite
;=============================================
; Sprite to display in INH,INL
; Location is XPOS,YPOS
; Note, only works if YPOS is multiple of two?
put_sprite:
ldy #0 ; byte 0 is xsize ; 2
lda (INL),Y ; 5
sta CH ; xsize is in CH ; 3
iny ; 2
lda (INL),Y ; byte 1 is ysize ; 5
sta CV ; ysize is in CV ; 3
iny ; 2
lda YPOS ; make a copy of ypos ; 3
sta TEMPY ; as we modify it ; 3
;===========
; 28
put_sprite_loop:
sty TEMP ; save sprite pointer ; 3
ldy TEMPY ; 3
lda gr_offsets,Y ; lookup low-res memory address ; 5
clc ; 2
adc XPOS ; add in xpos ; 3
sta OUTL ; store out low byte of addy ; 3
lda gr_offsets+1,Y ; look up high byte ; 5
adc DRAW_PAGE ; ; 3
sta OUTH ; and store it out ; 3
ldy TEMP ; restore sprite pointer ; 3
; OUTH:OUTL now points at right place
ldx CH ; load xsize into x ; 3
;===========
; 36
put_sprite_pixel:
lda (INL),Y ; get sprite colors ; 5
iny ; increment sprite pointer ; 2
sty TEMP ; save sprite pointer ; 3
ldy #$0 ; 2
; check if completely transparent
; if so, skip
cmp #$0 ; if all zero, transparent ; 2
beq put_sprite_done_draw ; don't draw it ; 2nt/3
; FIXME: use BIT? ;==============
; 17
sta COLOR ; save color for later ; 3
; check if top pixel transparent
and #$f0 ; check if top nibble zero ; 2
bne put_sprite_bottom ; if not skip ahead ; 2nt/3
lda #$f0 ; setup mask ; 2
sta MASK ; 3
bmi put_sprite_mask ; 2nt/3
put_sprite_bottom:
lda COLOR ; re-load color ; 3
and #$0f ; check if bottom nibble zero ; 2
bne put_sprite_all ; if not, skip ahead ; 2nt/3
lda #$0f ; 2
sta MASK ; setup mask ; 3
put_sprite_mask:
lda (OUTL),Y ; get color at output ; 5
and MASK ; mask off unneeded part ; 3
ora COLOR ; or the color in ; 3
sta (OUTL),Y ; store it back ; 5
jmp put_sprite_done_draw ; we are done ; 3
put_sprite_all:
lda COLOR ; load color ; 3
sta (OUTL),Y ; and write it out ; 5
put_sprite_done_draw:
ldy TEMP ; restore sprite pointer ; 3
inc OUTL ; increment output pointer ; 5
dex ; decrement x counter ; 2
bne put_sprite_pixel ; if not done, keep looping ; 2nt/3
inc TEMPY ; each line has two y vars ; 5
inc TEMPY ; 5
dec CV ; decemenet total y count ; 5
bne put_sprite_loop ; loop if not done ; 2nt/3
rts ; return ; 6

195
mode7_demo/gr_scroll.s Normal file
View File

@ -0,0 +1,195 @@
.ifndef scroll_row1
scroll_row1 EQU $8A00
scroll_row2 EQU $8B00
scroll_row3 EQU $8C00
scroll_row4 EQU $8D00
.endif
SCROLL_LENGTH EQU $61
OFFSET EQU $62
;========================
; scroll some text
;========================
; RLE compressed data in INL/INH
; CV is Y position to display at
gr_scroll:
lda #0
sta OFFSET
;=======================
; decompress scroll text
;=======================
jsr decompress_scroll
scroll_loop:
ldx #0
ldy CV
lsr
lda gr_offsets,Y ; get position
sta sm1+1
lda gr_offsets+2,Y ; get position
sta sm2+1
lda gr_offsets+4,Y ; get position
sta sm3+1
lda gr_offsets+6,Y ; get position
sta sm4+1
iny
clc
lda gr_offsets,Y ; get position
adc DRAW_PAGE
sta sm1+2
lda gr_offsets+2,Y ; get position
adc DRAW_PAGE
sta sm2+2
lda gr_offsets+4,Y ; get position
adc DRAW_PAGE
sta sm3+2
lda gr_offsets+6,Y ; get position
adc DRAW_PAGE
sta sm4+2
ldy OFFSET
draw_loop:
lda scroll_row1,Y
sm1:
sta $400,X
lda scroll_row2,Y
sm2:
sta $480,X
lda scroll_row3,Y
sm3:
sta $500,X
lda scroll_row4,Y
sm4:
sta $580,X
iny
inx
cpx #40
bne draw_loop
;==================
; flip pages
;==================
jsr page_flip ; 6
;==================
; delay
;==================
lda #125
jsr WAIT
;==================
; loop forever
;==================
clc
lda OFFSET
adc #40
cmp SCROLL_LENGTH
beq done_scrolling
inc OFFSET
jmp scroll_loop ; 3
done_scrolling:
rts
;=======================
; decompress scroll
;=======================
decompress_scroll:
ldy #0
jsr scroll_load_and_increment
sta SCROLL_LENGTH
lda #<scroll_row1
sta OUTL
lda #>scroll_row1
sta OUTH
decompress_scroll_loop:
jsr scroll_load_and_increment ; load compressed value
cmp #$A1 ; EOF marker
beq done_decompress_scroll ; if EOF, exit
pha ; save
and #$f0 ; mask
cmp #$a0 ; see if special AX
beq decompress_scroll_special
pla ; note, PLA sets flags!
ldx #$1 ; only want to print 1
bne decompress_scroll_run
decompress_scroll_special:
pla
and #$0f ; check if was A0
bne decompress_scroll_color ; if A0 need to read run, color
decompress_scroll_large:
jsr scroll_load_and_increment ; get run length
decompress_scroll_color:
tax ; put runlen into X
jsr scroll_load_and_increment ; get color
decompress_scroll_run:
sta (OUTL),Y
pha
clc ; increment 16-bit pointer
lda OUTL
adc #$1
sta OUTL
lda OUTH
adc #$0
sta OUTH
pla
dex ; repeat for X times
bne decompress_scroll_run
beq decompress_scroll_loop ; get next run
done_decompress_scroll:
rts
scroll_load_and_increment:
lda (INL),Y ; load and increment 16-bit pointer
pha
clc
lda INL
adc #$1
sta INL
lda INH
adc #$0
sta INH
pla
rts

115
mode7_demo/gr_unrle.s Normal file
View File

@ -0,0 +1,115 @@
;=================
; load RLE image
;=================
; Output is BASH/BASL
; Input is in GBASH/GBASL
load_rle_gr:
lda #$0
tay ; init Y to 0
sta TEMP ; stores the xcoord
sta CV ; ycoord=0
jsr load_and_increment ; load xsize
sta CH
rle_loop:
jsr load_and_increment
cmp #$A1 ; if 0xa1
beq rle_done ; we are done
pha
and #$f0 ; mask
cmp #$a0 ; see if special AX
beq decompress_special
pla ; note, PLA sets flags!
ldx #$1 ; only want to print 1
bne decompress_run
decompress_special:
pla
and #$0f ; check if was A0
bne decompress_color ; if A0 need to read run, color
decompress_large:
jsr load_and_increment ; get run length
decompress_color:
tax ; put runlen into X
jsr load_and_increment ; get color
decompress_run:
rle_run_loop:
sta (BASL),y ; write out the value
inc BASL ; increment the pointer
bne rle_skip3 ; if wrapped
inc BASH ; then increment the high value
rle_skip3:
pha ; store colore for later
inc TEMP ; increment the X value
lda TEMP
cmp CH ; compare against the image width
bcc rle_not_eol ; if less then keep going
lda BASL ; cheat to avoid a 16-bit add
cmp #$a7 ; we are adding 0x58 to get
bcc rle_add_skip ; to the next line
inc BASH
rle_add_skip:
clc
adc #$58 ; actually do the 0x58 add
sta BASL ; and store it back
inc CV ; add 2 to ypos
inc CV ; each "line" is two high
lda CV ; load value
cmp #15 ; if it's greater than 14 it wraps
bcc rle_no_wrap ; Thanks Woz
lda #$0 ; we wrapped, so set to zero
sta CV
; when wrapping have to sub 0x3d8
sec ; this is a 16-bit subtract routine
lda BASL
sbc #$d8 ; LSB
sta BASL
lda BASH ; MSB
sbc #$3 ;
sta BASH
rle_no_wrap:
lda #$0 ; set X value back to zero
sta TEMP
rle_not_eol:
pla ; restore color
dex
bne rle_run_loop ; if not zero, keep looping
beq rle_loop ; and branch always
rle_done:
lda #$15 ; move the cursor somewhere sane
sta CV
rts
load_and_increment:
lda (GBASL),y ; load value ; 5?
inc GBASL ; 5?
bne lskip2 ; 2nt/3
inc GBASH ; 5?
lskip2:
rts ; 6

View File

@ -1160,8 +1160,8 @@ lookup_checkerboard_map:
; External modules
;===============================================
.include "../asm_routines/gr_putsprite.s"
.include "../asm_routines/bg_scroll.s"
.include "gr_putsprite.s"
.include "bg_scroll.s"
.include "sprites.inc"
@ -1172,7 +1172,7 @@ lookup_checkerboard_map:
.include "island_map.inc"
.include "starry_sky.scroll"
.include "../asm_routines/multiply_fast.s"
.include "multiply_fast.s"
; 8.8 fixed point

View File

@ -330,11 +330,11 @@ title_routine:
.byte 0,0,0,0
.byte $A0,$55,$26,$55,$81 ; at $4800
.include "../asm_routines/pageflip.s"
.include "pageflip.s"
.include "rasterbars.s"
.include "starfield_demo.s"
.include "../asm_routines/gr_unrle.s"
.include "../asm_routines/gr_offsets.s"
.include "gr_unrle.s"
.include "gr_offsets.s"
.include "gr_setpage.s"
.byte 0,0,0,0,0,0,0,0
.byte 0,0,0,0,0,0,0,0
@ -345,13 +345,13 @@ title_routine:
;===============================================
.include "../asm_routines/gr_fast_clear.s"
.include "../asm_routines/gr_hlin_double.s"
.include "../asm_routines/text_print.s"
.include "../asm_routines/gr_fade.s"
.include "../asm_routines/gr_plot.s"
.include "../asm_routines/gr_copy.s"
.include "../asm_routines/gr_scroll.s"
.include "gr_fast_clear.s"
.include "gr_hlin_double.s"
.include "text_print.s"
.include "gr_fade.s"
.include "gr_plot.s"
.include "gr_copy.s"
.include "gr_scroll.s"
.include "mode7.s"

351
mode7_demo/multiply_fast.s Normal file
View File

@ -0,0 +1,351 @@
; Fast mutiply
; Note for our purposes we only care about 8.8 x 8.8 fixed point
; with 8.8 result, which means we only care about the middle two bytes
; of the 32 bit result. So we disable generation of the high and low byte
; to save some cycles.
;
; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
; This routine, at an expense of 2kB of looku tables, takes around 250
; If you reuse a term the next time this drops closer to 200
; This routine was described by Stephen Judd and found
; in The Fridge and in the C=Hacking magazine
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
; The key thing to note is that
; (a+b)^2 (a-b)^2
; a*b = ------- - --------
; 4 4
; So if you have tables of the squares of 0..511 you can lookup and subtract
; instead of multiplying.
; Table generation: I:0..511
; square1_lo = <((I*I)/4)
; square1_hi = >((I*I)/4)
; square2_lo = <(((I-255)*(I-255))/4)
; square2_hi = >(((I-255)*(I-255))/4)
; Note: DOS3.3 starts at $9600
.ifndef square1_lo
square1_lo EQU $8E00
square1_hi EQU $9000
square2_lo EQU $9200
square2_hi EQU $9400
.endif
; for(i=0;i<512;i++) {
; square1_lo[i]=((i*i)/4)&0xff;
; square1_hi[i]=(((i*i)/4)>>8)&0xff;
; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff;
; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff;
; }
init_multiply_tables:
; Build the add tables
ldx #$00
txa
.byte $c9 ; CMP #immediate - skip TYA and clear carry flag
lb1: tya
adc #$00 ; 0
ml1: sta square1_hi,x ; square1_hi[0]=0
tay ; y=0
cmp #$40 ; subtract 64 and update flags (c=0)
txa ; a=0
ror ; rotate
ml9: adc #$00 ; add 0
sta ml9+1 ; update add value
inx ; x=1
ml0: sta square1_lo,x ; square1_lo[0]=1
bne lb1 ; if not zero, loop
inc ml0+2 ; increment values
inc ml1+2 ; increment values
clc ; c=0
iny ; y=1
bne lb1 ; loop
; Build the subtract tables based on the existing one
ldx #$00
ldy #$ff
second_table:
lda square1_hi+1,x
sta square2_hi+$100,x
lda square1_hi,x
sta square2_hi,y
lda square1_lo+1,x
sta square2_lo+$100,x
lda square1_lo,x
sta square2_lo,y
dey
inx
bne second_table
rts
; Fast 16x16 bit unsigned multiplication, 32-bit result
; Input: NUM1H:NUM1L * NUM2H:NUM2L
; Result: RESULT3:RESULT2:RESULT1:RESULT0
;
; Does self-modifying code to hard-code NUM1H:NUM1L into the code
; carry=0: re-use previous NUM1H:NUM1L
; carry=1: reload NUM1H:NUM1L (58 cycles slower)
;
; clobbered: RESULT, X, A, C
; Allocation setup: T1,T2 and RESULT preferably on Zero-page.
;
; NUM1H (x_i), NUM1L (x_f)
; NUM2H (y_i), NUM2L (y_f)
; NUM1L * NUM2L = AAaa
; NUM1L * NUM2H = BBbb
; NUM1H * NUM2L = CCcc
; NUM1H * NUM2H = DDdd
;
; AAaa
; BBbb
; CCcc
; + DDdd
; ----------
; RESULT
;fixed_16x16_mul_unsigned:
multiply:
bcc num1_same_as_last_time ; 2nt/3
;============================
; Set up self-modifying code
; this changes the code to be hard-coded to multiply by NUM1H:NUM1L
;============================
lda NUM1L ; load the low byte ; 3
sta sm1a+1 ; 3
sta sm3a+1 ; 3
sta sm5a+1 ; 3
sta sm7a+1 ; 3
eor #$ff ; invert the bits for subtracting ; 2
sta sm2a+1 ; 3
sta sm4a+1 ; 3
sta sm6a+1 ; 3
sta sm8a+1 ; 3
lda NUM1H ; load the high byte ; 3
sta sm1b+1 ; 3
sta sm3b+1 ; 3
sta sm5b+1 ; 3
; sta sm7b+1 ;
eor #$ff ; invert the bits for subtractin ; 2
sta sm2b+1 ; 3
sta sm4b+1 ; 3
sta sm6b+1 ; 3
; sta sm8b+1 ;
;===========
; 52
num1_same_as_last_time:
;==========================
; Perform NUM1L * NUM2L = AAaa
;==========================
ldx NUM2L ; (low le) ; 3
sec ; 2
sm1a:
lda square1_lo,x ; 4
sm2a:
sbc square2_lo,x ; 4
; a is _aa
; sta RESULT+0 ;
sm3a:
lda square1_hi,x ; 4
sm4a:
sbc square2_hi,x ; 4
; a is _AA
sta _AA+1 ; 3
;===========
; 24
; Perform NUM1H * NUM2L = CCcc
sec ; 2
sm1b:
lda square1_lo,x ; 4
sm2b:
sbc square2_lo,x ; 4
; a is _cc
sta _cc+1 ; 3
sm3b:
lda square1_hi,x ; 4
sm4b:
sbc square2_hi,x ; 4
; a is _CC
sta _CC+1 ; 3
;===========
; 24
;==========================
; Perform NUM1L * NUM2H = BBbb
;==========================
ldx NUM2H ; 3
sec ; 2
sm5a:
lda square1_lo,x ; 4
sm6a:
sbc square2_lo,x ; 4
; a is _bb
sta _bb+1 ; 3
sm7a:
lda square1_hi,x ; 4
sm8a:
sbc square2_hi,x ; 4
; a is _BB
sta _BB+1 ; 3
;===========
; 27
;==========================
; Perform NUM1H * NUM2H = DDdd
;==========================
sec ; 2
sm5b:
lda square1_lo,x ; 4
sm6b:
sbc square2_lo,x ; 4
; a is _dd
sta _dd+1 ; 3
;sm7b:
; lda square1_hi,x ;
;sm8b:
; sbc square2_hi,x ;
; a = _DD
; sta RESULT+3 ;
;===========
; 13
;===========================================
; Add the separate multiplications together
;===========================================
clc ; 2
_AA:
lda #0 ; loading _AA ; 2
_bb:
adc #0 ; adding in _bb ; 2
sta RESULT+1 ; 3
;==========
; 9
; product[2]=_BB+_CC+c
_BB:
lda #0 ; loading _BB ; 2
_CC:
adc #0 ; adding in _CC ; 2
sta RESULT+2 ; 3
;===========
; 7
; product[3]=_DD+c
; bcc dd_no_carry1 ;
; inc RESULT+3 ;
clc ; 2
;=============
; 2
dd_no_carry1:
; product[1]=_AA+_bb+_cc
_cc:
lda #0 ; load _cc ; 2
adc RESULT+1 ; 3
sta RESULT+1 ; 3
; product[2]=_BB+_CC+_dd+c
_dd:
lda #0 ; load _dd ; 2
adc RESULT+2 ; 3
sta RESULT+2 ; 3
;===========
; 16
; product[3]=_DD+c
; bcc dd_no_carry2 ;
; inc RESULT+3 ;
;=============
; 0
dd_no_carry2:
; *z_i=product[1];
; *z_f=product[0];
; rts ; 6
;=================
; Signed multiply
;=================
;multiply:
; jsr fixed_16x16_mul_unsigned ; 6
lda NUM1H ; x_i ; 3
;===========
; 12
bpl x_positive ;^3/2nt
sec ; 2
lda RESULT+2 ; 3
sbc NUM2L ; 3
sta RESULT+2 ; 3
; lda RESULT+3 ;
; sbc NUM2H ;
; sta RESULT+3 ;
;============
; 10
x_positive:
lda NUM2H ; y_i ; 3
;============
; ; 6
bpl y_positive ;^3/2nt
sec ; 2
lda RESULT+2 ; 3
sbc NUM1L ; 3
sta RESULT+2 ; 3
; lda RESULT+3 ;
; sbc NUM1H ;
; sta RESULT+3 ;
;===========
; 10
y_positive:
ldx RESULT+2 ; *z_i=product[2]; ; 3
lda RESULT+1 ; *z_f=product[1]; ; 3
rts ; 6
;==========
; 12

24
mode7_demo/pageflip.s Normal file
View File

@ -0,0 +1,24 @@
;==========
; page_flip
;==========
page_flip:
lda DISP_PAGE ; 3
beq page_flip_show_1 ; 2nt/3
page_flip_show_0:
bit PAGE0 ; 4
lda #4 ; 2
sta DRAW_PAGE ; DRAW_PAGE=1 ; 3
lda #0 ; 2
sta DISP_PAGE ; DISP_PAGE=0 ; 3
rts ; 6
page_flip_show_1:
bit PAGE1 ; 4
sta DRAW_PAGE ; DRAW_PAGE=0 ; 3
lda #1 ; 2
sta DISP_PAGE ; DISP_PAGE=1 ; 3
rts ; 6
;====================
; DISP_PAGE=0 26
; DISP_PAGE=1 24

80
mode7_demo/text_print.s Normal file
View File

@ -0,0 +1,80 @@
;================================
; htab_vtab
;================================
; move to CH/CV
htab_vtab:
lda CV
asl
tay
lda gr_offsets,Y ; lookup low-res memory address
clc
adc CH ; add in xpos
sta BASL ; store out low byte of addy
lda gr_offsets+1,Y ; look up high byte
adc DRAW_PAGE ;
sta BASH ; and store it out
; BASH:BASL now points at right place
rts
;================================
; move_and_print
;================================
; move to CH/CV
move_and_print:
jsr htab_vtab
;================================
; print_string
;================================
print_string:
ldy #0
print_string_loop:
lda (OUTL),Y
beq done_print_string
ora #$80
sta (BASL),Y
iny
bne print_string_loop
done_print_string:
rts
;====================
; point_to_end_string
;====================
point_to_end_string:
iny
tya
clc
adc OUTL
sta OUTL
lda #0
adc OUTH
sta OUTH
rts
;================================
; print_both_pages
;================================
print_both_pages:
lda DRAW_PAGE
pha
lda #0
sta DRAW_PAGE
jsr move_and_print
lda #4
sta DRAW_PAGE
jsr move_and_print
pla
sta DRAW_PAGE
rts ; oops forgot this initially
; explains the weird vertical stripes on the screen