asm_routines: optimize gr_copy.s

sped it up by nearly a factor of 2
This commit is contained in:
Vince Weaver 2017-12-17 00:35:32 -05:00
parent d5f906218a
commit 7d61a84afc
3 changed files with 108 additions and 34 deletions

View File

@ -2,49 +2,52 @@
; gr_copy_to_current
;=========================================================
; copy 0xc00 to DRAW_PAGE
; 2 + 8*38 + 4*80*23 + 4*120*26 + 13 = 20,159 = 20ms = 50Hz
; ORIGINAL: 2 + 8*38 + 4*80*23 + 4*120*26 + 13 = 20,159 = 20ms = 50Hz
;
; OPTIMIZED: 2+ 8*38 + 9*4 + 7*4 + 14*120*4 + 14*80*4 + 9*8 + 6 =
; 11,648 = 11ms = 90Hz
gr_copy_to_current:
ldx #0 ; set y to zero ; 2
ldx #0 ; set ypos to zero ; 2
gr_copy_loop:
stx TEMP ; save y ; 3
txa ; move to A ; 2
asl ; mult by 2 ; 2
tay ; put into Y ; 2
lda gr_offsets,Y ; lookup low byte for line addr ; 5
sta OUTL ; out and in are the same ; 3
sta INL ; 3
lda gr_offsets+1,Y ; lookup high byte for line addr ; 5
adc DRAW_PAGE
sta OUTH ; 3
lda gr_offsets+1,Y ; lookup high byte for line addr ; 5
lda gr_offsets,X ; lookup low byte for line addr ; 4+
sta gr_copy_line+1 ; out and in are the same ; 4
sta gr_copy_line+4 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
clc ; 2
adc DRAW_PAGE ; 3
sta gr_copy_line+5 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
adc #$8 ; for now, fixed 0xc ; 2
sta INH ; 3
ldx TEMP ; restore y ; 3
sta gr_copy_line+2 ; 4
ldy #0 ; set X counter to 0 ; 2
gr_copy_line:
lda (INL),Y ; load a byte ; 5
sta (OUTL),Y ; store a byte ; 6
iny ; increment pointer ; 2
ldy #0 ; set xpos counter to 0 ; 2
cpx #$4 ; don't want to copy bottom 4*40 ; 2
bcs gr_copy_above4 ; 3
cpx #$8 ; don't want to copy bottom 4*40 ; 2
bcs gr_copy_above4 ; 2nt/3
gr_copy_below4:
cpy #120 ; for early ones, copy 120 bytes ; 2
bne gr_copy_line ; 3
beq gr_copy_line_done ; 3
ldy #119 ; for early ones, copy 120 bytes ; 2
bcc gr_copy_line ; ; 3
gr_copy_above4: ; for last four, just copy 80 bytes
cpy #80 ; 2
bne gr_copy_line ; 3
ldy #79 ; 2
gr_copy_line:
lda $ffff,Y ; load a byte (self modified) ; 4+
sta $ffff,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_copy_line ; ; 2nt/3
gr_copy_line_done:
inx ; increment y value ; 2
cpx #8 ; there are 8 of them ; 2
inx ; increment ypos value ; 2
inx ; twice, as address is 2 bytes ; 2
cpx #16 ; there are 8*2 of them ; 2
bne gr_copy_loop ; if not, loop ; 3
rts ; 6

View File

@ -20,7 +20,7 @@ MODE7_DEMO: mode7_demo.o
ld65 -o MODE7_DEMO mode7_demo.o -C ./apple2_1000.inc
mode7_demo.o: mode7_demo.s mode7_demo_backgrounds.inc \
../asm_routines/unrle_gr.s \
../asm_routines/gr_unrle.s \
../asm_routines/hlin_clearscreen.s \
../asm_routines/gr_setpage.s
ca65 -o mode7_demo.o mode7_demo.s -l mode7_demo.lst

View File

@ -7,28 +7,99 @@
jsr clear_screens ; clear top/bottom of page 0/1
jsr set_gr_page0
lda #$0
sta DRAW_PAGE
lda #<demo_rle
sta GBASL
lda #>demo_rle
sta GBASH
lda #<$400
; Load offscreen
lda #<$c00
sta BASL
lda #>$400
lda #>$c00
sta BASH
jsr load_rle_gr
lda #<fade_lookup
sta GBASL
lda #>fade_lookup
sta GBASH
loop_forever:
jsr gr_fade
jmp loop_forever
;================================================
; Fade in/out lowres graphics
; GR image should be in $C00
gr_fade:
ldx #0 ; set ypos to zero ; 2
gr_copy_loop:
lda gr_offsets,X ; lookup low byte for line addr ; 4+
sta gr_copy_line+1 ; out and in are the same ; 4
sta gr_copy_line+4 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
clc ; 2
adc DRAW_PAGE ; 3
sta gr_copy_line+5 ; 4
lda gr_offsets+1,X ; lookup high byte for line addr ; 4+
adc #$8 ; for now, fixed 0xc ; 2
sta gr_copy_line+2 ; 4
ldy #0 ; set xpos counter to 0 ; 2
cpx #$8 ; don't want to copy bottom 4*40 ; 2
bcs gr_copy_above4 ; 2nt/3
gr_copy_below4:
ldy #119 ; for early ones, copy 120 bytes ; 2
bcc gr_copy_line ; ; 3
gr_copy_above4: ; for last four, just copy 80 bytes
ldy #79 ; 2
gr_copy_line:
lda $ffff,Y ; load a byte (self modified) ; 4+
sta $ffff,Y ; store a byte (self modified) ; 5
dey ; decrement pointer ; 2
bpl gr_copy_line ; ; 2nt/3
gr_copy_line_done:
inx ; increment ypos value ; 2
inx ; twice, as address is 2 bytes ; 2
cpx #16 ; there are 8*2 of them ; 2
bne gr_copy_loop ; if not, loop ; 3
rts ; 6
;===============================================
; External modules
;===============================================
.include "../asm_routines/unrle_gr.s"
.include "../asm_routines/gr_unrle.s"
.include "../asm_routines/hlin_clearscreen.s"
.include "../asm_routines/gr_setpage.s"
.include "mode7_demo_backgrounds.inc"
; Fade paramaters
fade_lookup:
.byte $0,$1,$2,$3,$4,$5,$6,$7, $8,$9,$a,$b,$c,$d,$e,$f
.byte $0,$0,$0,$2,$0,$0,$2,$5, $5,$8,$5,$3,$4,$8,$4,$7
.byte $0,$0,$0,$0,$0,$0,$5,$0, $0,$0,$0,$0,$8,$0,$0,$5
.byte $0,$0,$0,$0,$0,$0,$0,$0, $0,$0,$0,$0,$0,$0,$0,$0