mirror of
https://github.com/deater/dos33fsprogs.git
synced 2025-03-04 04:30:18 +00:00
xmas2018: more scrolling optimization
This commit is contained in:
parent
cd6d5e9f4c
commit
5353c8814b
@ -196,18 +196,19 @@ OFFSCREEN = $05
|
|||||||
; Timing
|
; Timing
|
||||||
; scroll_hgr_left: 8
|
; scroll_hgr_left: 8
|
||||||
; 140* scroll_hgr_loop: 10 setup
|
; 140* scroll_hgr_loop: 10 setup
|
||||||
; 64*left_one_loop 6+3225
|
; 64*left_one_loop 6+3145
|
||||||
; 23 (increments)
|
; 23 (increments)
|
||||||
; 29 increment counts
|
; 29 increment counts
|
||||||
; 10 check and loop
|
; 10 check and loop
|
||||||
; 6 return
|
; 6 return
|
||||||
|
|
||||||
; total time = 14 + 140*(10+29+10+64*(41+23+(3225)))
|
; total time = 14 + 140*(10+29+10+64*(41+23+(3151)))
|
||||||
; 67,431,293 cycles = roughly 67s -- original
|
; 67,431,293 cycles = roughly 67s -- original
|
||||||
; 64,564,093 cycles = roughly 64s -- optimize inner loop a bit
|
; 64,564,093 cycles = roughly 64s -- optimize inner loop a bit
|
||||||
; 33,347,034 cycles = roughly 33s -- don't shift hidden page
|
; 33,347,034 cycles = roughly 33s -- don't shift hidden page
|
||||||
; 30,569,434 cycles = roughly 30s -- unroll 4 times
|
; 30,569,434 cycles = roughly 30s -- unroll 4 times
|
||||||
; 29,476,314 cycles = roughly 29s -- add back INH for +1 address
|
; 29,476,314 cycles = roughly 29s -- add back INH for +1 address
|
||||||
|
; 28,813,247 cycles = roughly 29s -- use X register for NEXT
|
||||||
scroll_hgr_left:
|
scroll_hgr_left:
|
||||||
|
|
||||||
lda #$0 ; 2
|
lda #$0 ; 2
|
||||||
@ -284,14 +285,14 @@ scroll_done:
|
|||||||
;
|
;
|
||||||
; 86 init
|
; 86 init
|
||||||
; 10* (unrolled)
|
; 10* (unrolled)
|
||||||
; 3* hgr_scroll_line_loop: 16
|
; 3* hgr_scroll_line_loop: 15
|
||||||
; high bit 20
|
; high bit 20
|
||||||
; prepare bits: 16
|
; prepare bits: 18
|
||||||
; output new byte: 23
|
; output new byte: 20
|
||||||
; 1* hgr_scroll_line_loop: 21
|
; 1* hgr_scroll_line_loop: 20
|
||||||
; high bit 20
|
; high bit 20
|
||||||
; prepare bits: 16
|
; prepare bits: 18
|
||||||
; output new byte: 23
|
; output new byte: 20
|
||||||
; 1*
|
; 1*
|
||||||
; increment and loop: 7
|
; increment and loop: 7
|
||||||
; 5 return
|
; 5 return
|
||||||
@ -300,8 +301,9 @@ scroll_done:
|
|||||||
; (91*40)+7=3647 -- remove branch in highbit code
|
; (91*40)+7=3647 -- remove branch in highbit code
|
||||||
; (89*40)+7=3567 -- convert 5 asl to 4 ror
|
; (89*40)+7=3567 -- convert 5 asl to 4 ror
|
||||||
; (89*40)+91=3651 -- re-write with col40 pre-calculated
|
; (89*40)+91=3651 -- re-write with col40 pre-calculated
|
||||||
; (79*3 + 84*1 + 7)*10+91 = 3341
|
; (79*3 + 84*1 + 7)*10+91 = 3341 -- unroll 4 times
|
||||||
; (75*3 + 80*1 + 7)*10+105= 3225
|
; (75*3 + 80*1 + 7)*10+105= 3225 -- move to INL=OUTL+1
|
||||||
|
; (73*3 + 78*1 + 7)*10+105= 3145 -- use X register for next
|
||||||
hgr_scroll_line:
|
hgr_scroll_line:
|
||||||
|
|
||||||
setup_column_40:
|
setup_column_40:
|
||||||
@ -407,9 +409,9 @@ hgr_scroll_line_loop:
|
|||||||
sta CURRENT ; 3
|
sta CURRENT ; 3
|
||||||
|
|
||||||
lda (INL),Y ; get subsequent pixel block ; 5
|
lda (INL),Y ; get subsequent pixel block ; 5
|
||||||
sta NEXT ; 3
|
tax ; NEXT ; 2
|
||||||
;===================
|
;===================
|
||||||
; 20
|
; 15
|
||||||
|
|
||||||
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
||||||
; makes some color flicker, is there a better way?
|
; makes some color flicker, is there a better way?
|
||||||
@ -420,7 +422,7 @@ high_bit0:
|
|||||||
bne keep_high_bit0 ; 3
|
bne keep_high_bit0 ; 3
|
||||||
move_high_bit0:
|
move_high_bit0:
|
||||||
; -1
|
; -1
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
jmp done_high_bit0 ; 3
|
jmp done_high_bit0 ; 3
|
||||||
keep_high_bit0:
|
keep_high_bit0:
|
||||||
lda CURRENT ; 3
|
lda CURRENT ; 3
|
||||||
@ -432,19 +434,20 @@ done_high_bit0:
|
|||||||
; else: 18
|
; else: 18
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
prepare_bits0:
|
prepare_bits0:
|
||||||
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
; this method 2 cycles faster than asl x 5
|
; this method 2 cycles faster than asl x 5
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
and #$60 ; 2
|
and #$60 ; 2
|
||||||
|
ora HIGH ; 3
|
||||||
sta NEXT ; 3
|
sta HIGH ; 3
|
||||||
;==========
|
;==========
|
||||||
; 16
|
; 18
|
||||||
|
|
||||||
output_new0:
|
output_new0:
|
||||||
; get current, mask off bottom 2 bits (no longer needed)
|
; get current, mask off bottom 2 bits (no longer needed)
|
||||||
@ -454,12 +457,11 @@ output_new0:
|
|||||||
lsr ; 2
|
lsr ; 2
|
||||||
and #$1f ; 2
|
and #$1f ; 2
|
||||||
ora HIGH ; 3
|
ora HIGH ; 3
|
||||||
ora NEXT ; 3
|
|
||||||
sta (OUTL),Y ; 6
|
sta (OUTL),Y ; 6
|
||||||
|
|
||||||
iny ; 2
|
iny ; 2
|
||||||
;===========
|
;===========
|
||||||
; 23
|
; 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -469,9 +471,9 @@ output_new0:
|
|||||||
sta CURRENT ; 3
|
sta CURRENT ; 3
|
||||||
|
|
||||||
lda (INL),Y ; get subsequent pixel block ; 5
|
lda (INL),Y ; get subsequent pixel block ; 5
|
||||||
sta NEXT ; 3
|
tax ; NEXT ; 2
|
||||||
;===================
|
;===================
|
||||||
; 20
|
; 15
|
||||||
|
|
||||||
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
||||||
; makes some color flicker, is there a better way?
|
; makes some color flicker, is there a better way?
|
||||||
@ -482,7 +484,7 @@ high_bit1:
|
|||||||
bne keep_high_bit1 ; 3
|
bne keep_high_bit1 ; 3
|
||||||
move_high_bit1:
|
move_high_bit1:
|
||||||
; -1
|
; -1
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
jmp done_high_bit1 ; 3
|
jmp done_high_bit1 ; 3
|
||||||
keep_high_bit1:
|
keep_high_bit1:
|
||||||
lda CURRENT ; 3
|
lda CURRENT ; 3
|
||||||
@ -496,17 +498,17 @@ done_high_bit1:
|
|||||||
|
|
||||||
prepare_bits1:
|
prepare_bits1:
|
||||||
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
; this method 2 cycles faster than asl x 5
|
; this method 2 cycles faster than asl x 5
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
and #$60 ; 2
|
and #$60 ; 2
|
||||||
|
ora HIGH ; 3
|
||||||
sta NEXT ; 3
|
sta HIGH ; 3
|
||||||
;==========
|
;==========
|
||||||
; 16
|
; 18
|
||||||
|
|
||||||
output_new1:
|
output_new1:
|
||||||
; get current, mask off bottom 2 bits (no longer needed)
|
; get current, mask off bottom 2 bits (no longer needed)
|
||||||
@ -516,12 +518,11 @@ output_new1:
|
|||||||
lsr ; 2
|
lsr ; 2
|
||||||
and #$1f ; 2
|
and #$1f ; 2
|
||||||
ora HIGH ; 3
|
ora HIGH ; 3
|
||||||
ora NEXT ; 3
|
|
||||||
sta (OUTL),Y ; 6
|
sta (OUTL),Y ; 6
|
||||||
|
|
||||||
iny ; 2
|
iny ; 2
|
||||||
;===========
|
;===========
|
||||||
; 23
|
; 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -531,9 +532,9 @@ output_new1:
|
|||||||
sta CURRENT ; 3
|
sta CURRENT ; 3
|
||||||
|
|
||||||
lda (INL),Y ; get subsequent pixel block ; 5
|
lda (INL),Y ; get subsequent pixel block ; 5
|
||||||
sta NEXT ; 3
|
tax ; NEXT ; 2
|
||||||
;===================
|
;===================
|
||||||
; 16
|
; 15
|
||||||
|
|
||||||
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
||||||
; makes some color flicker, is there a better way?
|
; makes some color flicker, is there a better way?
|
||||||
@ -544,7 +545,7 @@ high_bit2:
|
|||||||
bne keep_high_bit2 ; 3
|
bne keep_high_bit2 ; 3
|
||||||
move_high_bit2:
|
move_high_bit2:
|
||||||
; -1
|
; -1
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
jmp done_high_bit2 ; 3
|
jmp done_high_bit2 ; 3
|
||||||
keep_high_bit2:
|
keep_high_bit2:
|
||||||
lda CURRENT ; 3
|
lda CURRENT ; 3
|
||||||
@ -558,17 +559,17 @@ done_high_bit2:
|
|||||||
|
|
||||||
prepare_bits2:
|
prepare_bits2:
|
||||||
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
; this method 2 cycles faster than asl x 5
|
; this method 2 cycles faster than asl x 5
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
and #$60 ; 2
|
and #$60 ; 2
|
||||||
|
ora HIGH ; 3
|
||||||
sta NEXT ; 3
|
sta HIGH ; 3
|
||||||
;==========
|
;==========
|
||||||
; 16
|
; 18
|
||||||
|
|
||||||
output_new2:
|
output_new2:
|
||||||
; get current, mask off bottom 2 bits (no longer needed)
|
; get current, mask off bottom 2 bits (no longer needed)
|
||||||
@ -577,13 +578,13 @@ output_new2:
|
|||||||
lsr ; 2
|
lsr ; 2
|
||||||
lsr ; 2
|
lsr ; 2
|
||||||
and #$1f ; 2
|
and #$1f ; 2
|
||||||
|
|
||||||
ora HIGH ; 3
|
ora HIGH ; 3
|
||||||
ora NEXT ; 3
|
|
||||||
sta (OUTL),Y ; 6
|
sta (OUTL),Y ; 6
|
||||||
|
|
||||||
iny ; 2
|
iny ; 2
|
||||||
;===========
|
;===========
|
||||||
; 23
|
; 20
|
||||||
|
|
||||||
|
|
||||||
;============= Unroll 3
|
;============= Unroll 3
|
||||||
@ -600,9 +601,9 @@ thirtynine:
|
|||||||
not_thirtynine:
|
not_thirtynine:
|
||||||
lda (INL),Y ; get subsequent pixel block ; 5
|
lda (INL),Y ; get subsequent pixel block ; 5
|
||||||
done_thirtynine:
|
done_thirtynine:
|
||||||
sta NEXT ; 3
|
tax ; NEXT ; 2
|
||||||
;===================
|
;===================
|
||||||
; usually: 21
|
; usually: 20
|
||||||
; rarely: 18
|
; rarely: 18
|
||||||
|
|
||||||
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
; if in bit 2 or 6 of horiz scroll, shift the color bit over
|
||||||
@ -614,7 +615,7 @@ high_bit:
|
|||||||
bne keep_high_bit ; 3
|
bne keep_high_bit ; 3
|
||||||
move_high_bit:
|
move_high_bit:
|
||||||
; -1
|
; -1
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
jmp done_high_bit ; 3
|
jmp done_high_bit ; 3
|
||||||
keep_high_bit:
|
keep_high_bit:
|
||||||
lda CURRENT ; 3
|
lda CURRENT ; 3
|
||||||
@ -628,17 +629,17 @@ done_high_bit:
|
|||||||
|
|
||||||
prepare_bits:
|
prepare_bits:
|
||||||
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
; get right byte, bottom 2 bits, shifted left to be in 6+5
|
||||||
lda NEXT ; 3
|
txa ; NEXT ; 2
|
||||||
; this method 2 cycles faster than asl x 5
|
; this method 2 cycles faster than asl x 5
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
ror ; 2
|
ror ; 2
|
||||||
and #$60 ; 2
|
and #$60 ; 2
|
||||||
|
ora HIGH ; 3
|
||||||
sta NEXT ; 3
|
sta HIGH ; 3
|
||||||
;==========
|
;==========
|
||||||
; 16
|
; 18
|
||||||
|
|
||||||
output_new:
|
output_new:
|
||||||
; get current, mask off bottom 2 bits (no longer needed)
|
; get current, mask off bottom 2 bits (no longer needed)
|
||||||
@ -648,12 +649,11 @@ output_new:
|
|||||||
lsr ; 2
|
lsr ; 2
|
||||||
and #$1f ; 2
|
and #$1f ; 2
|
||||||
ora HIGH ; 3
|
ora HIGH ; 3
|
||||||
ora NEXT ; 3
|
|
||||||
sta (OUTL),Y ; 6
|
sta (OUTL),Y ; 6
|
||||||
|
|
||||||
iny ; 2
|
iny ; 2
|
||||||
;===========
|
;===========
|
||||||
; 23
|
; 20
|
||||||
|
|
||||||
|
|
||||||
cpy #40 ; 2
|
cpy #40 ; 2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user