diff --git a/mode7_demo/Makefile b/mode7_demo/Makefile index 47b822ef..02f91190 100644 --- a/mode7_demo/Makefile +++ b/mode7_demo/Makefile @@ -36,12 +36,12 @@ MODE7_DEMO: mode7_demo.o mode7_demo.o: mode7_demo.s mode7_demo_backgrounds.inc sprites.inc \ a2.scrolltext deater.scrolltext wave.krg \ - ../asm_routines/gr_unrle.s \ - ../asm_routines/gr_fast_clear.s \ + gr_unrle.s \ + gr_fast_clear.s \ gr_setpage.s \ - ../asm_routines/gr_fade.s \ - ../asm_routines/gr_copy.s \ - ../asm_routines/gr_scroll.s \ + gr_fade.s \ + gr_copy.s \ + gr_scroll.s \ mockingboard.s \ credits.s mode7.s rasterbars.s starfield_demo.s \ interrupt_handler.s \ diff --git a/mode7_demo/bg_scroll.s b/mode7_demo/bg_scroll.s new file mode 100644 index 00000000..64444f24 --- /dev/null +++ b/mode7_demo/bg_scroll.s @@ -0,0 +1,79 @@ + ;================== + ; scroll background + ;================== + ; background already loaded + ; ANGLE 0-15 sets angle + ; CV is Y position to display at + ; 182/220... 220/16 = 13.75 + ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + ; 0 11 22 34 45 56 68 79 91 102 113 125 136 147 159 170 182 + +scroll_offsets: + .byte 0,11,22,34,45,56,68,79,91,102,113,125,136,147,159,170 + +scroll_background: + ldy ANGLE ; use angle + lda scroll_offsets,Y ; lookup in table + sta OFFSET ; calculate offset + + ldx #0 + + ldy CV ; lookup Y co-ord + lsr ; and set up self-modify code + + lda gr_offsets,Y ; get position + sta bgsm1+1 + lda gr_offsets+2,Y ; get position + sta bgsm2+1 + lda gr_offsets+4,Y ; get position + sta bgsm3+1 + lda gr_offsets+6,Y ; get position + sta bgsm4+1 + + + iny + + clc + + lda gr_offsets,Y ; get position + adc DRAW_PAGE + sta bgsm1+2 + lda gr_offsets+2,Y ; get position + adc DRAW_PAGE + sta bgsm2+2 + lda gr_offsets+4,Y ; get position + adc DRAW_PAGE + sta bgsm3+2 + lda gr_offsets+6,Y ; get position + adc DRAW_PAGE + sta bgsm4+2 + + ldy OFFSET + + +bgdraw_loop: + + lda scroll_row1,Y +bgsm1: + sta $400,X + + lda scroll_row2,Y +bgsm2: + sta $480,X + + lda scroll_row3,Y +bgsm3: + sta $500,X + + lda scroll_row4,Y +bgsm4: + sta $580,X + + iny + inx + cpx #40 + bne bgdraw_loop + + rts + + diff --git a/mode7_demo/gr_copy.s b/mode7_demo/gr_copy.s new file mode 100644 index 00000000..3d270f89 --- /dev/null +++ b/mode7_demo/gr_copy.s @@ -0,0 +1,53 @@ + ;========================================================= + ; gr_copy_to_current + ;========================================================= + ; copy 0xc00 to DRAW_PAGE + ; ORIGINAL: 2 + 8*38 + 4*80*23 + 4*120*26 + 13 = 20,159 = 20ms = 50Hz + ; + ; OPTIMIZED: 2+ 8*38 + 9*4 + 7*4 + 14*120*4 + 14*80*4 + 9*8 + 6 = + ; 11,648 = 11ms = 90Hz +gr_copy_to_current: + + ldx #0 ; set ypos to zero ; 2 + +gr_copy_loop: + lda gr_offsets,X ; lookup low byte for line addr ; 4+ + + sta gr_copy_line+1 ; out and in are the same ; 4 + sta gr_copy_line+4 ; 4 + + lda gr_offsets+1,X ; lookup high byte for line addr ; 4+ + clc ; 2 + adc DRAW_PAGE ; 3 + sta gr_copy_line+5 ; 4 + + lda gr_offsets+1,X ; lookup high byte for line addr ; 4+ + adc #$8 ; for now, fixed 0xc ; 2 + sta gr_copy_line+2 ; 4 + + ldy #0 ; set xpos counter to 0 ; 2 + + + cpx #$8 ; don't want to copy bottom 4*40 ; 2 + bcs gr_copy_above4 ; 2nt/3 + +gr_copy_below4: + ldy #119 ; for early ones, copy 120 bytes ; 2 + bcc gr_copy_line ; ; 3 + +gr_copy_above4: ; for last four, just copy 80 bytes + ldy #79 ; 2 + +gr_copy_line: + lda $ffff,Y ; load a byte (self modified) ; 4+ + sta $ffff,Y ; store a byte (self modified) ; 5 + dey ; decrement pointer ; 2 + bpl gr_copy_line ; ; 2nt/3 + +gr_copy_line_done: + inx ; increment ypos value ; 2 + inx ; twice, as address is 2 bytes ; 2 + cpx #16 ; there are 8*2 of them ; 2 + bne gr_copy_loop ; if not, loop ; 3 + rts ; 6 + diff --git a/mode7_demo/gr_fade.s b/mode7_demo/gr_fade.s new file mode 100644 index 00000000..cc4c5887 --- /dev/null +++ b/mode7_demo/gr_fade.s @@ -0,0 +1,191 @@ + + ;============================================ + ; gr, "fade" out. Badly fake a pallette fade + ;============================================ + ; Image to fade out should be in $C00 +fade_out: + + lda #fade_lookup + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+16) + sta GBASL + lda #>(fade_lookup+16) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+32) + sta GBASL + lda #>(fade_lookup+32) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+48) + sta GBASL + lda #>(fade_lookup+48) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + rts + + ;=========================================== + ; gr, "fade" in. Badly fake a pallette fade + ;=========================================== + ; Image to fade in should be in $C00 +fade_in: + + lda #<(fade_lookup+48) + sta GBASL + lda #>(fade_lookup+48) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+32) + sta GBASL + lda #>(fade_lookup+32) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+16) + sta GBASL + lda #>(fade_lookup+16) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + lda #<(fade_lookup+0) + sta GBASL + lda #>(fade_lookup+0) + sta GBASH + + jsr gr_fade + jsr page_flip + + lda #200 + jsr WAIT + + rts + + ;================================================ + ; Fade in/out lowres graphics + ; GR image should be in $C00 + ; pointer to fade table in GBASL/GBASH + +gr_fade: + + ldx #0 ; set ypos to zero ; 2 + +gr_fade_loop: + lda gr_offsets,X ; lookup low byte for line addr ; 4+ + + sta gr_fade_line1+1 ; out and in are the same ; 4 + sta gr_fade_line2+1 ; 4 + + lda gr_offsets+1,X ; lookup high byte for line addr ; 4+ + clc ; 2 + adc DRAW_PAGE ; 3 + sta gr_fade_line2+2 ; 4 + + lda gr_offsets+1,X ; lookup high byte for line addr ; 4+ + adc #$8 ; for now, fixed 0xc ; 2 + sta gr_fade_line1+2 ; 4 + + ldy #0 ; set xpos counter to 0 ; 2 + + + cpx #$8 ; don't want to copy bottom 4*40 ; 2 + bcs gr_fade_above4 ; 2nt/3 + +gr_fade_below4: + ldy #119 ; for early ones, copy 120 bytes ; 2 + bcc gr_fade_line1 ; ; 3 + +gr_fade_above4: ; for last four, just copy 80 bytes + ldy #79 ; 2 + +gr_fade_line1: + lda $ffff,Y ; load a byte (self modified) ; 4+ + pha + + sty TEMPY ; save Y + + ; do high nibble + and #$f0 + lsr + lsr + lsr + lsr + + tay + lda (GBASL),Y + and #$f0 + sta TEMP + + ; do low nibble + pla + and #$0f + + tay + lda (GBASL),Y + and #$0f + ora TEMP + + ldy TEMPY ; restore Y + +gr_fade_line2: + sta $ffff,Y ; store a byte (self modified) ; 5 + dey ; decrement pointer ; 2 + bpl gr_fade_line1 ; ; 2nt/3 + +gr_fade_line_done: + inx ; increment ypos value ; 2 + inx ; twice, as address is 2 bytes ; 2 + cpx #16 ; there are 8*2 of them ; 2 + bne gr_fade_loop ; if not, loop ; 3 + rts ; 6 + + + +; Fade paramaters +fade_lookup: +.byte $00,$11,$22,$33,$44,$55,$66,$77, $88,$99,$aa,$bb,$cc,$dd,$ee,$ff +.byte $00,$00,$00,$22,$00,$00,$22,$55, $55,$88,$55,$33,$44,$88,$44,$77 +.byte $00,$00,$00,$00,$00,$00,$55,$00, $00,$00,$00,$00,$88,$00,$00,$55 +.byte $00,$00,$00,$00,$00,$00,$00,$00, $00,$00,$00,$00,$00,$00,$00,$00 diff --git a/mode7_demo/gr_fast_clear.s b/mode7_demo/gr_fast_clear.s new file mode 100644 index 00000000..796b985a --- /dev/null +++ b/mode7_demo/gr_fast_clear.s @@ -0,0 +1,192 @@ +clear_screens: + ;=================================== + ; Clear top/bottom of page 0 + ;=================================== + + lda #$0 + sta DRAW_PAGE + jsr clear_top + jsr clear_bottom + + ;=================================== + ; Clear top/bottom of page 1 + ;=================================== + + lda #$4 + sta DRAW_PAGE + jsr clear_top + jsr clear_bottom + + rts + +clear_bottoms: + ;=================================== + ; Clear bottom of page 0 + ;=================================== + + lda #$0 + sta DRAW_PAGE + jsr clear_bottom + + ;=================================== + ; Clear bottom of page 1 + ;=================================== + + lda #$4 + sta DRAW_PAGE + jsr clear_bottom + + rts + + + + ;========================================================= + ; clear_top + ;========================================================= + ; clear DRAW_PAGE + ; original = 14,558 cycles(?) 15ms, 70Hz + ; OPTIMIZED MAX (page0,48rows): 45*120+4+6 = 5410 = 5.4ms 185Hz + ; (pageX,40rows): 50*120+4+6 = 6010 = 6.0ms 166Hz + ; 50*120+4+6+37 = 6055 = 6.0ms 166Hz +clear_top: + lda #0 ; 2 +clear_top_a: + sta COLOR ; 3 + clc ; 2 + lda DRAW_PAGE ; 3 + + adc #4 ; 2 + sta __ctf+2 ; 3 + sta __ctf+5 ; 3 + adc #1 ; 2 + sta __ctf+8 ; 3 + sta __ctf+11 ; 3 + adc #1 ; 2 + sta __ctf2+2 ; 3 + sta __ctf2+5 ; 3 + adc #1 ; 2 + sta __ctf2+8 ; 3 + sta __ctf2+11 ; 3 + + + ldy #120 ; 2 + lda COLOR ; 3 +clear_top_fast_loop: +__ctf: + sta $400,Y ; 5 + sta $480,Y ; 5 + sta $500,Y ; 5 + sta $580,Y ; 5 + + cpy #80 ; 2 + bpl no_draw_bottom ; 2nt/3 +__ctf2: + sta $600,Y ; 5 + sta $680,Y ; 5 + sta $700,Y ; 5 + sta $780,Y ; 5 +no_draw_bottom: + + dey ; 2 + bpl clear_top_fast_loop ; 2nt/3 + + rts ; 6 + + + + + ;========================================================= + ; clear_bottom + ;========================================================= + ; clear bottom of draw page + +clear_bottom: + clc ; 2 + lda DRAW_PAGE ; 3 + + adc #6 ; 2 + sta __cbf2+2 ; 3 + sta __cbf2+5 ; 3 + adc #1 ; 2 + sta __cbf2+8 ; 3 + sta __cbf2+11 ; 3 + + + ldy #120 ; 2 + lda #$a0 ; Normal Space ; 2 +clear_bottom_fast_loop: +__cbf2: + sta $600,Y ; 5 + sta $680,Y ; 5 + sta $700,Y ; 5 + sta $780,Y ; 5 + + dey ; 2 + cpy #80 ; 2 + bpl clear_bottom_fast_loop ; 2nt/3 + + rts ; 6 + + +clear_screens_notext: + ;=================================== + ; Clear top/bottom of page 0 + ;=================================== + + lda #$0 + sta DRAW_PAGE + jsr clear_all + + ;=================================== + ; Clear top/bottom of page 1 + ;=================================== + + lda #$4 + sta DRAW_PAGE + jsr clear_all + + rts + + + ;========================================================= + ; clear_all + ;========================================================= + ; clear 48 rows + +clear_all: + clc ; 2 + lda DRAW_PAGE ; 3 + + adc #4 ; 2 + sta __caf+2 ; 3 + sta __caf+5 ; 3 + adc #1 ; 2 + sta __caf+8 ; 3 + sta __caf+11 ; 3 + adc #1 ; 2 + sta __caf2+2 ; 3 + sta __caf2+5 ; 3 + adc #1 ; 2 + sta __caf2+8 ; 3 + sta __caf2+11 ; 3 + + + ldy #120 ; 2 +clear_all_color: + lda #0 ; 2 +clear_all_fast_loop: +__caf: + sta $400,Y ; 5 + sta $480,Y ; 5 + sta $500,Y ; 5 + sta $580,Y ; 5 +__caf2: + sta $600,Y ; 5 + sta $680,Y ; 5 + sta $700,Y ; 5 + sta $780,Y ; 5 + + dey ; 2 + bpl clear_all_fast_loop ; 2nt/3 + + rts ; 6 diff --git a/mode7_demo/gr_hlin_double.s b/mode7_demo/gr_hlin_double.s new file mode 100644 index 00000000..127dd30e --- /dev/null +++ b/mode7_demo/gr_hlin_double.s @@ -0,0 +1,67 @@ +;===================================================================== +;= ROUTINES +;===================================================================== + + ;================================ + ; hlin_setup + ;================================ + ; put address in GBASL/GBASH + ; Ycoord in A, Xcoord in Y +hlin_setup: + sty TEMPY ; 3 + tay ; y=A ; 2 + lda gr_offsets,Y ; lookup low-res memory address ; 4 + clc ; 2 + adc TEMPY ; 3 + sta GBASL ; 3 + iny ; 2 + + lda gr_offsets,Y ; 4 + adc DRAW_PAGE ; add in draw page offset ; 3 + sta GBASH ; 3 + rts ; 6 + ;=========== + ; 35 + ;================================ + ; hlin_double: + ;================================ + ; HLIN Y, V2 AT A + ; Y, X, A trashed + ; start at Y, draw up to and including X +hlin_double: +;int hlin_double(int page, int x1, int x2, int at) { + + jsr hlin_setup ; 41 + + sec ; 2 + lda V2 ; 3 + sbc TEMPY ; 3 + + tax ; 2 + inx ; 2 + ;=========== + ; 53 + ; fallthrough + + ;================================= + ; hlin_double_continue: width + ;================================= + ; GBASL has correct offset for row/col + ; width in X + +hlin_double_continue: + + ldy #0 ; 2 + lda COLOR ; 3 +hlin_double_loop: + sta (GBASL),Y ; 6 + inc GBASL ; 5 + dex ; 2 + bne hlin_double_loop ; 2nt/3 + + rts ; 6 + ;============= + ; 53+5+X*16+5 + + + diff --git a/mode7_demo/gr_offsets.s b/mode7_demo/gr_offsets.s new file mode 100644 index 00000000..f9f575be --- /dev/null +++ b/mode7_demo/gr_offsets.s @@ -0,0 +1,5 @@ + ; move these to zero page for slight speed increase? +gr_offsets: + .word $400,$480,$500,$580,$600,$680,$700,$780 + .word $428,$4a8,$528,$5a8,$628,$6a8,$728,$7a8 + .word $450,$4d0,$550,$5d0,$650,$6d0,$750,$7d0 diff --git a/mode7_demo/gr_plot.s b/mode7_demo/gr_plot.s new file mode 100644 index 00000000..1ad15895 --- /dev/null +++ b/mode7_demo/gr_plot.s @@ -0,0 +1,48 @@ + ;================================ + ; plot routine + ;================================ + ; Xcoord in XPOS + ; Ycoord in YPOS + ; color in COLOR +plot: + lda YPOS ; 2 + + lsr ; shift bottom bit into carry ; 2 + + bcc plot_even ; 2nt/3 +plot_odd: + ldx #$f0 ; 2 + bcs plot_c_done ; 2nt/3 +plot_even: + ldx #$0f ; 2 +plot_c_done: + stx MASK ; 3 + + asl ; shift back (now even) ; 2 + tay + + lda gr_offsets,Y ; lookup low-res memory address ; 4 + clc ; 2 + adc XPOS ; 3 + sta GBASL ; 3 + iny ; 2 + + lda gr_offsets,Y ; 4 + adc DRAW_PAGE ; add in draw page offset ; 3 + sta GBASH ; 3 + + ldy #0 ; 2 + +plot_write: + lda MASK ; 3 + eor #$ff ; 2 + + and (GBASL),Y ; 5 + sta COLOR_MASK ; 3 + + lda COLOR ; 3 + and MASK ; 3 + ora COLOR_MASK ; 3 + sta (GBASL),Y ; 5 + + rts ; 6 diff --git a/mode7_demo/gr_putsprite.s b/mode7_demo/gr_putsprite.s new file mode 100644 index 00000000..6aecf782 --- /dev/null +++ b/mode7_demo/gr_putsprite.s @@ -0,0 +1,101 @@ + ;============================================= + ; put_sprite + ;============================================= + ; Sprite to display in INH,INL + ; Location is XPOS,YPOS + ; Note, only works if YPOS is multiple of two? + +put_sprite: + + ldy #0 ; byte 0 is xsize ; 2 + lda (INL),Y ; 5 + sta CH ; xsize is in CH ; 3 + iny ; 2 + + lda (INL),Y ; byte 1 is ysize ; 5 + sta CV ; ysize is in CV ; 3 + iny ; 2 + + lda YPOS ; make a copy of ypos ; 3 + sta TEMPY ; as we modify it ; 3 + ;=========== + ; 28 +put_sprite_loop: + sty TEMP ; save sprite pointer ; 3 + + ldy TEMPY ; 3 + lda gr_offsets,Y ; lookup low-res memory address ; 5 + clc ; 2 + adc XPOS ; add in xpos ; 3 + sta OUTL ; store out low byte of addy ; 3 + lda gr_offsets+1,Y ; look up high byte ; 5 + adc DRAW_PAGE ; ; 3 + sta OUTH ; and store it out ; 3 + ldy TEMP ; restore sprite pointer ; 3 + + ; OUTH:OUTL now points at right place + + ldx CH ; load xsize into x ; 3 + ;=========== + ; 36 +put_sprite_pixel: + lda (INL),Y ; get sprite colors ; 5 + iny ; increment sprite pointer ; 2 + + sty TEMP ; save sprite pointer ; 3 + ldy #$0 ; 2 + + ; check if completely transparent + ; if so, skip + + cmp #$0 ; if all zero, transparent ; 2 + beq put_sprite_done_draw ; don't draw it ; 2nt/3 + ; FIXME: use BIT? ;============== + ; 17 + + sta COLOR ; save color for later ; 3 + + ; check if top pixel transparent + + and #$f0 ; check if top nibble zero ; 2 + bne put_sprite_bottom ; if not skip ahead ; 2nt/3 + + lda #$f0 ; setup mask ; 2 + sta MASK ; 3 + bmi put_sprite_mask ; 2nt/3 + +put_sprite_bottom: + lda COLOR ; re-load color ; 3 + and #$0f ; check if bottom nibble zero ; 2 + bne put_sprite_all ; if not, skip ahead ; 2nt/3 + lda #$0f ; 2 + sta MASK ; setup mask ; 3 + +put_sprite_mask: + lda (OUTL),Y ; get color at output ; 5 + and MASK ; mask off unneeded part ; 3 + ora COLOR ; or the color in ; 3 + sta (OUTL),Y ; store it back ; 5 + + jmp put_sprite_done_draw ; we are done ; 3 + +put_sprite_all: + lda COLOR ; load color ; 3 + sta (OUTL),Y ; and write it out ; 5 + + +put_sprite_done_draw: + + ldy TEMP ; restore sprite pointer ; 3 + + inc OUTL ; increment output pointer ; 5 + dex ; decrement x counter ; 2 + bne put_sprite_pixel ; if not done, keep looping ; 2nt/3 + + inc TEMPY ; each line has two y vars ; 5 + inc TEMPY ; 5 + dec CV ; decemenet total y count ; 5 + bne put_sprite_loop ; loop if not done ; 2nt/3 + + rts ; return ; 6 + diff --git a/mode7_demo/gr_scroll.s b/mode7_demo/gr_scroll.s new file mode 100644 index 00000000..d92d27ec --- /dev/null +++ b/mode7_demo/gr_scroll.s @@ -0,0 +1,195 @@ +.ifndef scroll_row1 +scroll_row1 EQU $8A00 +scroll_row2 EQU $8B00 +scroll_row3 EQU $8C00 +scroll_row4 EQU $8D00 +.endif + +SCROLL_LENGTH EQU $61 +OFFSET EQU $62 + + + ;======================== + ; scroll some text + ;======================== + ; RLE compressed data in INL/INH + ; CV is Y position to display at + +gr_scroll: + lda #0 + sta OFFSET + + ;======================= + ; decompress scroll text + ;======================= + + jsr decompress_scroll + + +scroll_loop: + + ldx #0 + + ldy CV + lsr + + lda gr_offsets,Y ; get position + sta sm1+1 + lda gr_offsets+2,Y ; get position + sta sm2+1 + lda gr_offsets+4,Y ; get position + sta sm3+1 + lda gr_offsets+6,Y ; get position + sta sm4+1 + + + iny + + clc + + lda gr_offsets,Y ; get position + adc DRAW_PAGE + sta sm1+2 + lda gr_offsets+2,Y ; get position + adc DRAW_PAGE + sta sm2+2 + lda gr_offsets+4,Y ; get position + adc DRAW_PAGE + sta sm3+2 + lda gr_offsets+6,Y ; get position + adc DRAW_PAGE + sta sm4+2 + + ldy OFFSET + +draw_loop: + + lda scroll_row1,Y +sm1: + sta $400,X + + lda scroll_row2,Y +sm2: + sta $480,X + + lda scroll_row3,Y +sm3: + sta $500,X + + lda scroll_row4,Y +sm4: + sta $580,X + + iny + inx + cpx #40 + bne draw_loop + + ;================== + ; flip pages + ;================== + + jsr page_flip ; 6 + + ;================== + ; delay + ;================== + + lda #125 + jsr WAIT + + + ;================== + ; loop forever + ;================== + clc + lda OFFSET + adc #40 + cmp SCROLL_LENGTH + beq done_scrolling + inc OFFSET + jmp scroll_loop ; 3 + +done_scrolling: + rts + + ;======================= + ; decompress scroll + ;======================= +decompress_scroll: + ldy #0 + jsr scroll_load_and_increment + sta SCROLL_LENGTH + + lda #scroll_row1 + sta OUTH + +decompress_scroll_loop: + jsr scroll_load_and_increment ; load compressed value + + cmp #$A1 ; EOF marker + beq done_decompress_scroll ; if EOF, exit + + pha ; save + + and #$f0 ; mask + cmp #$a0 ; see if special AX + beq decompress_scroll_special + + pla ; note, PLA sets flags! + + ldx #$1 ; only want to print 1 + bne decompress_scroll_run + +decompress_scroll_special: + pla + + and #$0f ; check if was A0 + + bne decompress_scroll_color ; if A0 need to read run, color + +decompress_scroll_large: + jsr scroll_load_and_increment ; get run length + +decompress_scroll_color: + tax ; put runlen into X + jsr scroll_load_and_increment ; get color + +decompress_scroll_run: + sta (OUTL),Y + pha + + clc ; increment 16-bit pointer + lda OUTL + adc #$1 + sta OUTL + lda OUTH + adc #$0 + sta OUTH + + pla + + dex ; repeat for X times + bne decompress_scroll_run + + beq decompress_scroll_loop ; get next run + +done_decompress_scroll: + rts + + +scroll_load_and_increment: + lda (INL),Y ; load and increment 16-bit pointer + pha + clc + lda INL + adc #$1 + sta INL + lda INH + adc #$0 + sta INH + pla + rts + diff --git a/mode7_demo/gr_unrle.s b/mode7_demo/gr_unrle.s new file mode 100644 index 00000000..51fbf6ce --- /dev/null +++ b/mode7_demo/gr_unrle.s @@ -0,0 +1,115 @@ + ;================= + ; load RLE image + ;================= + ; Output is BASH/BASL + ; Input is in GBASH/GBASL +load_rle_gr: + lda #$0 + tay ; init Y to 0 + sta TEMP ; stores the xcoord + + sta CV ; ycoord=0 + + jsr load_and_increment ; load xsize + sta CH + +rle_loop: + jsr load_and_increment + + cmp #$A1 ; if 0xa1 + beq rle_done ; we are done + + pha + + and #$f0 ; mask + cmp #$a0 ; see if special AX + beq decompress_special + + pla ; note, PLA sets flags! + + ldx #$1 ; only want to print 1 + bne decompress_run + +decompress_special: + pla + + and #$0f ; check if was A0 + + bne decompress_color ; if A0 need to read run, color + +decompress_large: + jsr load_and_increment ; get run length + +decompress_color: + tax ; put runlen into X + jsr load_and_increment ; get color + +decompress_run: +rle_run_loop: + sta (BASL),y ; write out the value + inc BASL ; increment the pointer + bne rle_skip3 ; if wrapped + inc BASH ; then increment the high value + +rle_skip3: + pha ; store colore for later + + inc TEMP ; increment the X value + lda TEMP + cmp CH ; compare against the image width + bcc rle_not_eol ; if less then keep going + + lda BASL ; cheat to avoid a 16-bit add + cmp #$a7 ; we are adding 0x58 to get + bcc rle_add_skip ; to the next line + inc BASH +rle_add_skip: + clc + adc #$58 ; actually do the 0x58 add + sta BASL ; and store it back + + inc CV ; add 2 to ypos + inc CV ; each "line" is two high + + lda CV ; load value + cmp #15 ; if it's greater than 14 it wraps + bcc rle_no_wrap ; Thanks Woz + + lda #$0 ; we wrapped, so set to zero + sta CV + + ; when wrapping have to sub 0x3d8 + sec ; this is a 16-bit subtract routine + lda BASL + sbc #$d8 ; LSB + sta BASL + lda BASH ; MSB + sbc #$3 ; + sta BASH + +rle_no_wrap: + lda #$0 ; set X value back to zero + sta TEMP + +rle_not_eol: + pla ; restore color + dex + bne rle_run_loop ; if not zero, keep looping + beq rle_loop ; and branch always + +rle_done: + lda #$15 ; move the cursor somewhere sane + sta CV + rts + + +load_and_increment: + lda (GBASL),y ; load value ; 5? + inc GBASL ; 5? + bne lskip2 ; 2nt/3 + inc GBASH ; 5? +lskip2: + rts ; 6 + + + diff --git a/mode7_demo/mode7.s b/mode7_demo/mode7.s index f4bfa772..32035a07 100644 --- a/mode7_demo/mode7.s +++ b/mode7_demo/mode7.s @@ -1160,8 +1160,8 @@ lookup_checkerboard_map: ; External modules ;=============================================== -.include "../asm_routines/gr_putsprite.s" -.include "../asm_routines/bg_scroll.s" +.include "gr_putsprite.s" +.include "bg_scroll.s" .include "sprites.inc" @@ -1172,7 +1172,7 @@ lookup_checkerboard_map: .include "island_map.inc" .include "starry_sky.scroll" -.include "../asm_routines/multiply_fast.s" +.include "multiply_fast.s" ; 8.8 fixed point diff --git a/mode7_demo/mode7_demo.s b/mode7_demo/mode7_demo.s index f8500b46..448fd0ea 100644 --- a/mode7_demo/mode7_demo.s +++ b/mode7_demo/mode7_demo.s @@ -330,11 +330,11 @@ title_routine: .byte 0,0,0,0 .byte $A0,$55,$26,$55,$81 ; at $4800 -.include "../asm_routines/pageflip.s" +.include "pageflip.s" .include "rasterbars.s" .include "starfield_demo.s" -.include "../asm_routines/gr_unrle.s" -.include "../asm_routines/gr_offsets.s" +.include "gr_unrle.s" +.include "gr_offsets.s" .include "gr_setpage.s" .byte 0,0,0,0,0,0,0,0 .byte 0,0,0,0,0,0,0,0 @@ -345,13 +345,13 @@ title_routine: ;=============================================== -.include "../asm_routines/gr_fast_clear.s" -.include "../asm_routines/gr_hlin_double.s" -.include "../asm_routines/text_print.s" -.include "../asm_routines/gr_fade.s" -.include "../asm_routines/gr_plot.s" -.include "../asm_routines/gr_copy.s" -.include "../asm_routines/gr_scroll.s" +.include "gr_fast_clear.s" +.include "gr_hlin_double.s" +.include "text_print.s" +.include "gr_fade.s" +.include "gr_plot.s" +.include "gr_copy.s" +.include "gr_scroll.s" .include "mode7.s" diff --git a/mode7_demo/multiply_fast.s b/mode7_demo/multiply_fast.s new file mode 100644 index 00000000..f59736cc --- /dev/null +++ b/mode7_demo/multiply_fast.s @@ -0,0 +1,351 @@ +; Fast mutiply + + +; Note for our purposes we only care about 8.8 x 8.8 fixed point +; with 8.8 result, which means we only care about the middle two bytes +; of the 32 bit result. So we disable generation of the high and low byte +; to save some cycles. + +; +; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply +; This routine, at an expense of 2kB of looku tables, takes around 250 +; If you reuse a term the next time this drops closer to 200 + +; This routine was described by Stephen Judd and found +; in The Fridge and in the C=Hacking magazine +; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication + +; The key thing to note is that +; (a+b)^2 (a-b)^2 +; a*b = ------- - -------- +; 4 4 +; So if you have tables of the squares of 0..511 you can lookup and subtract +; instead of multiplying. + +; Table generation: I:0..511 +; square1_lo = <((I*I)/4) +; square1_hi = >((I*I)/4) +; square2_lo = <(((I-255)*(I-255))/4) +; square2_hi = >(((I-255)*(I-255))/4) + +; Note: DOS3.3 starts at $9600 + +.ifndef square1_lo +square1_lo EQU $8E00 +square1_hi EQU $9000 +square2_lo EQU $9200 +square2_hi EQU $9400 +.endif + +; for(i=0;i<512;i++) { +; square1_lo[i]=((i*i)/4)&0xff; +; square1_hi[i]=(((i*i)/4)>>8)&0xff; +; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff; +; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff; +; } + +init_multiply_tables: + + ; Build the add tables + + ldx #$00 + txa + .byte $c9 ; CMP #immediate - skip TYA and clear carry flag +lb1: tya + adc #$00 ; 0 +ml1: sta square1_hi,x ; square1_hi[0]=0 + tay ; y=0 + cmp #$40 ; subtract 64 and update flags (c=0) + txa ; a=0 + ror ; rotate +ml9: adc #$00 ; add 0 + sta ml9+1 ; update add value + inx ; x=1 +ml0: sta square1_lo,x ; square1_lo[0]=1 + bne lb1 ; if not zero, loop + inc ml0+2 ; increment values + inc ml1+2 ; increment values + clc ; c=0 + iny ; y=1 + bne lb1 ; loop + + ; Build the subtract tables based on the existing one + + ldx #$00 + ldy #$ff +second_table: + lda square1_hi+1,x + sta square2_hi+$100,x + lda square1_hi,x + sta square2_hi,y + lda square1_lo+1,x + sta square2_lo+$100,x + lda square1_lo,x + sta square2_lo,y + dey + inx + bne second_table + + + rts + + +; Fast 16x16 bit unsigned multiplication, 32-bit result +; Input: NUM1H:NUM1L * NUM2H:NUM2L +; Result: RESULT3:RESULT2:RESULT1:RESULT0 +; +; Does self-modifying code to hard-code NUM1H:NUM1L into the code +; carry=0: re-use previous NUM1H:NUM1L +; carry=1: reload NUM1H:NUM1L (58 cycles slower) +; +; clobbered: RESULT, X, A, C +; Allocation setup: T1,T2 and RESULT preferably on Zero-page. +; +; NUM1H (x_i), NUM1L (x_f) +; NUM2H (y_i), NUM2L (y_f) + +; NUM1L * NUM2L = AAaa +; NUM1L * NUM2H = BBbb +; NUM1H * NUM2L = CCcc +; NUM1H * NUM2H = DDdd +; +; AAaa +; BBbb +; CCcc +; + DDdd +; ---------- +; RESULT + +;fixed_16x16_mul_unsigned: + +multiply: + + bcc num1_same_as_last_time ; 2nt/3 + + ;============================ + ; Set up self-modifying code + ; this changes the code to be hard-coded to multiply by NUM1H:NUM1L + ;============================ + + lda NUM1L ; load the low byte ; 3 + sta sm1a+1 ; 3 + sta sm3a+1 ; 3 + sta sm5a+1 ; 3 + sta sm7a+1 ; 3 + eor #$ff ; invert the bits for subtracting ; 2 + sta sm2a+1 ; 3 + sta sm4a+1 ; 3 + sta sm6a+1 ; 3 + sta sm8a+1 ; 3 + lda NUM1H ; load the high byte ; 3 + sta sm1b+1 ; 3 + sta sm3b+1 ; 3 + sta sm5b+1 ; 3 +; sta sm7b+1 ; + eor #$ff ; invert the bits for subtractin ; 2 + sta sm2b+1 ; 3 + sta sm4b+1 ; 3 + sta sm6b+1 ; 3 +; sta sm8b+1 ; + ;=========== + ; 52 + +num1_same_as_last_time: + + ;========================== + ; Perform NUM1L * NUM2L = AAaa + ;========================== + + ldx NUM2L ; (low le) ; 3 + sec ; 2 +sm1a: + lda square1_lo,x ; 4 +sm2a: + sbc square2_lo,x ; 4 + + ; a is _aa + +; sta RESULT+0 ; + +sm3a: + lda square1_hi,x ; 4 +sm4a: + sbc square2_hi,x ; 4 + ; a is _AA + sta _AA+1 ; 3 + ;=========== + ; 24 + + ; Perform NUM1H * NUM2L = CCcc + sec ; 2 +sm1b: + lda square1_lo,x ; 4 +sm2b: + sbc square2_lo,x ; 4 + ; a is _cc + sta _cc+1 ; 3 +sm3b: + lda square1_hi,x ; 4 +sm4b: + sbc square2_hi,x ; 4 + ; a is _CC + sta _CC+1 ; 3 + ;=========== + ; 24 + + ;========================== + ; Perform NUM1L * NUM2H = BBbb + ;========================== + ldx NUM2H ; 3 + sec ; 2 +sm5a: + lda square1_lo,x ; 4 +sm6a: + sbc square2_lo,x ; 4 + ; a is _bb + sta _bb+1 ; 3 + +sm7a: + lda square1_hi,x ; 4 +sm8a: + sbc square2_hi,x ; 4 + ; a is _BB + sta _BB+1 ; 3 + ;=========== + ; 27 + + ;========================== + ; Perform NUM1H * NUM2H = DDdd + ;========================== + sec ; 2 +sm5b: + lda square1_lo,x ; 4 +sm6b: + sbc square2_lo,x ; 4 + ; a is _dd + sta _dd+1 ; 3 +;sm7b: +; lda square1_hi,x ; +;sm8b: +; sbc square2_hi,x ; + ; a = _DD +; sta RESULT+3 ; + ;=========== + ; 13 + + ;=========================================== + ; Add the separate multiplications together + ;=========================================== + + clc ; 2 +_AA: + lda #0 ; loading _AA ; 2 +_bb: + adc #0 ; adding in _bb ; 2 + sta RESULT+1 ; 3 + ;========== + ; 9 + ; product[2]=_BB+_CC+c + +_BB: + lda #0 ; loading _BB ; 2 +_CC: + adc #0 ; adding in _CC ; 2 + sta RESULT+2 ; 3 + ;=========== + ; 7 + + ; product[3]=_DD+c + +; bcc dd_no_carry1 ; +; inc RESULT+3 ; + clc ; 2 + ;============= + ; 2 +dd_no_carry1: + + ; product[1]=_AA+_bb+_cc + +_cc: + lda #0 ; load _cc ; 2 + adc RESULT+1 ; 3 + sta RESULT+1 ; 3 + + ; product[2]=_BB+_CC+_dd+c + +_dd: + lda #0 ; load _dd ; 2 + adc RESULT+2 ; 3 + sta RESULT+2 ; 3 + + ;=========== + ; 16 + ; product[3]=_DD+c + + +; bcc dd_no_carry2 ; +; inc RESULT+3 ; + + ;============= + ; 0 + +dd_no_carry2: + +; *z_i=product[1]; +; *z_f=product[0]; + +; rts ; 6 + + + ;================= + ; Signed multiply + ;================= + +;multiply: + +; jsr fixed_16x16_mul_unsigned ; 6 + + lda NUM1H ; x_i ; 3 + ;=========== + ; 12 + + + bpl x_positive ;^3/2nt + + sec ; 2 + lda RESULT+2 ; 3 + sbc NUM2L ; 3 + sta RESULT+2 ; 3 +; lda RESULT+3 ; +; sbc NUM2H ; +; sta RESULT+3 ; + ;============ + ; 10 + +x_positive: + + lda NUM2H ; y_i ; 3 + ;============ + ; ; 6 + + bpl y_positive ;^3/2nt + + + sec ; 2 + lda RESULT+2 ; 3 + sbc NUM1L ; 3 + sta RESULT+2 ; 3 +; lda RESULT+3 ; +; sbc NUM1H ; +; sta RESULT+3 ; + ;=========== + ; 10 + +y_positive: + ldx RESULT+2 ; *z_i=product[2]; ; 3 + lda RESULT+1 ; *z_f=product[1]; ; 3 + + rts ; 6 + ;========== + ; 12 + diff --git a/mode7_demo/pageflip.s b/mode7_demo/pageflip.s new file mode 100644 index 00000000..f199f5be --- /dev/null +++ b/mode7_demo/pageflip.s @@ -0,0 +1,24 @@ + ;========== + ; page_flip + ;========== + +page_flip: + lda DISP_PAGE ; 3 + beq page_flip_show_1 ; 2nt/3 +page_flip_show_0: + bit PAGE0 ; 4 + lda #4 ; 2 + sta DRAW_PAGE ; DRAW_PAGE=1 ; 3 + lda #0 ; 2 + sta DISP_PAGE ; DISP_PAGE=0 ; 3 + rts ; 6 +page_flip_show_1: + bit PAGE1 ; 4 + sta DRAW_PAGE ; DRAW_PAGE=0 ; 3 + lda #1 ; 2 + sta DISP_PAGE ; DISP_PAGE=1 ; 3 + rts ; 6 + ;==================== + ; DISP_PAGE=0 26 + ; DISP_PAGE=1 24 + diff --git a/mode7_demo/text_print.s b/mode7_demo/text_print.s new file mode 100644 index 00000000..a39506f7 --- /dev/null +++ b/mode7_demo/text_print.s @@ -0,0 +1,80 @@ + ;================================ + ; htab_vtab + ;================================ + ; move to CH/CV +htab_vtab: + lda CV + asl + tay + lda gr_offsets,Y ; lookup low-res memory address + clc + adc CH ; add in xpos + sta BASL ; store out low byte of addy + + lda gr_offsets+1,Y ; look up high byte + adc DRAW_PAGE ; + sta BASH ; and store it out + ; BASH:BASL now points at right place + + rts + + ;================================ + ; move_and_print + ;================================ + ; move to CH/CV +move_and_print: + jsr htab_vtab + + ;================================ + ; print_string + ;================================ + +print_string: + ldy #0 +print_string_loop: + lda (OUTL),Y + beq done_print_string + ora #$80 + sta (BASL),Y + iny + bne print_string_loop +done_print_string: + rts + + ;==================== + ; point_to_end_string + ;==================== +point_to_end_string: + iny + tya + clc + adc OUTL + sta OUTL + lda #0 + adc OUTH + sta OUTH + + rts + + + ;================================ + ; print_both_pages + ;================================ +print_both_pages: + lda DRAW_PAGE + pha + + lda #0 + sta DRAW_PAGE + jsr move_and_print + + lda #4 + sta DRAW_PAGE + jsr move_and_print + + pla + sta DRAW_PAGE + + rts ; oops forgot this initially + ; explains the weird vertical stripes on the screen +