dos33fsprogs/xmas_2018/merry.s.orig

619 lines
13 KiB
ArmAsm

;=====================================
; XMAS2018 -- Merry Christmas Part
;=====================================
merry:
;====================================================
; ensure we have proper graphics mode (page1 visible)
bit HIRES ; 4
bit SET_GR ; 4
bit FULLGR ; 4
bit PAGE1 ; 4
;===================
; init vars
lda #0
sta FRAME
sta FRAMEH
;=============================
; Load graphic hgr -- already loaded at $6000
;================================================
; Merry Christmas Loop
;================================================
merry_begin:
cli ; start music interrupt
; jsr play_music ; 6+1032
lda #$00
sta scroll_hgr_smc+1
jsr scroll_hgr_left
lda #$28
sta scroll_hgr_smc+1
jsr scroll_hgr_left
lda #$50
sta scroll_hgr_smc+1
jsr scroll_hgr_left
bit KEYRESET ; clear keypress ; 4
; sei ; disable interrupts/music
rts ; 6
; 0 1 2 3 4 5 6 7
; 00 = $2000 $2400 $2800 $2c00 $3000 $3400 $3800 $3c00
; 08 = $2080 $2480 $2880 $2c80 $3080 $3480 $3880 $3c80
; 16 = $2100 $2500 $2900 $2d00 $3100 $3500 $3900 $3d00
; 24 = $2180 $2580 $2980 $2d80 $3180 $3580 $3980 $3d80
; 32 = $2200 $2600 $2a00 $2e00 $3200 $3600 $3a00 $3e00
; 40 = $2280 $2680 $2a80 $2e80 $3280 $3680 $3a80 $3e80
; 48 = $2300 $2700 $2b00 $2f00 $3300 $3700 $3b00 $3f00
; 56 = $2380 $2780 $2b80 $2f80 $3380 $3780 $3b80 $3f80
; -----
; 64 = $2028 $2428 $2828 $2c28 $3028 $3428 $3828 $3c28
; 72 = $20a8 $24a8 $28a8 $2ca8 $30a8 $34a8 $38a8 $3ca8
; 80 = $2128 $2528 $2928 $2d28 $3128 $3528 $3928 $3d28
; 88 = $21a8 $25a8 $29a8 $2da8 $31a8 $35a8 $39a8 $3da8
; 96 = $2228 $2628 $2a28 $2e28 $3228 $3628 $3a28 $3e28
; 104 = $22a8 $26a8 $2aa8 $2ea8 $32a8 $36a8 $3aa8 $3ea8
; 112 = $2328 $2728 $2b28 $2f28 $3328 $3728 $3b28 $3f28
; 120 = $23a8 $27a8 $2ba8 $2fa8 $33a8 $37a8 $3ba8 $3fa8
; -----
; 128 = $2050 $2450 $2850 $2c50 $3050 $3450 $3850 $3c50
; 136 = $20d0 $24d0 $28d0 $2cd0 $30d0 $34d0 $38d0 $3cd0
; 144 = $2150 $2550 $2950 $2d50 $3150 $3550 $3950 $3d50
; 152 = $21d0 $25d0 $29d0 $2dd0 $31d0 $35d0 $39d0 $3dd0
; 160 = $2250 $2650 $2a50 $2e50 $3250 $3650 $3a50 $3e50
; 168 = $22d0 $26d0 $2ad0 $2ed0 $32d0 $36d0 $3ad0 $3ed0
; 176 = $2350 $2750 $2b50 $2f50 $3350 $3750 $3b50 $3f50
; 184 = $23d0 $27d0 $2bd0 $2fd0 $33d0 $37d0 $3bd0 $3fd0
; -----
; int count=0;
HIGH = $00
CURRENT = $01
NEXT = $02
COUNTH = $03
COUNTL = $04
OFFSCREEN = $05
FLIPHIGH = $06
;===========================================
;===========================================
;===========================================
; Scroll HGR Left
;
; very slowly scroll screen left, 1/3 of screen at a time
; scrolls page pointed to by INL:INH into OUTL:OUTH
; Timing
; scroll_hgr_left: 8
; 140* scroll_hgr_loop: 10 setup
; 12 set fliphigh
; 6 check keypress
; 64*left_one_loop 6+2945
; 23 (increments)
; 22 increment counts
; 10 check and loop
; 6 return
; total time = 14 + 140*(10+12+6+22+10+64*(41+23+(2951)))
; 67,431,293 cycles = roughly 67s -- original
; 64,564,093 cycles = roughly 64s -- optimize inner loop a bit
; 33,347,034 cycles = roughly 33s -- don't shift hidden page
; 30,569,434 cycles = roughly 30s -- unroll 4 times
; 29,476,314 cycles = roughly 29s -- add back INH for +1 address
; 28,813,247 cycles = roughly 29s -- use X register for NEXT
; 28,813,247 cycles = roughly 29s -- use X register for NEXT
; 27,031,274 cycles = roughly 27s -- save LAST / skip OUTL
; 27,022,814 cycles = roughly 27s -- move some things around
scroll_hgr_left:
lda #$0 ; 2
sta COUNTH ; 3
sta COUNTL ; 3
;===========
; 8
; repeats 140 times
scroll_hgr_loop:
lda #$40 ; 2
sta OUTH ; 3
scroll_hgr_smc:
lda #$0 ; 2
sta OUTL ; 3
;============
; 10
; 0 000 00
; 1 001 01
;*2 010 10
; 3 011 11
; 4 100 00
; 5 101 01
;*6 110 10
; Sets FLIPHIGH if we are on 2nd or 6th iteration out of 7
lda COUNTL ; 3
and #$3 ; 2
sec ; 2
sbc #2 ; 2
sta FLIPHIGH ; 3
;===========
; 12
lda KEYPRESS ; 4
bmi scroll_done ; 3
; -1
;============
; 6
; repeats 64 times, once for east hline of 1/3 the screen
left_one_loop:
; scroll first page
jsr hgr_scroll_line ; 6+????
;============
; 6+????
clc ; 2
lda OUTL ; 3
adc #$80 ; 2
sta OUTL ; 3
lda OUTH ; 3
adc #$0 ; 2
sta OUTH ; 3
cmp #$60 ; 2
bne left_one_loop ; 3
;==========
; 23
; -1
inc COUNTL ; 5
lda COUNTL ; 3
cmp #7 ; 2
bne scroll_noinc_counth ; 3
lda #0 ; 2
sta COUNTL ; 3
inc COUNTH ; 5
;===========
; 22
scroll_noinc_counth:
lda COUNTH ; 3
cmp #20 ; 2
beq scroll_done ; 3
;-1
jmp scroll_hgr_loop ; 3
;==========
; 10
scroll_done:
rts ; 6
;===========================================
; hgr_scroll_line
;===========================================
;
; 93 init
; 10* (unrolled)
; 3* hgr_scroll_line_loop: 10
; high bit 20
; prepare bits: 18
; output new byte: 20
; 1* hgr_scroll_line_loop: 15
; high bit 20
; prepare bits: 18
; output new byte: 20
; 1*
; increment and loop: 7
; 5 return
;
; (93*40)+7=3727 -- original total
; (91*40)+7=3647 -- remove branch in highbit code
; (89*40)+7=3567 -- convert 5 asl to 4 ror
; (89*40)+91=3651 -- re-write with col40 pre-calculated
; (79*3 + 84*1 + 7)*10+91 = 3341 -- unroll 4 times
; (75*3 + 80*1 + 7)*10+105= 3225 -- move to INL=OUTL+1
; (73*3 + 78*1 + 7)*10+105= 3145 -- use X register for next
; (68*3 + 73*1 + 7)*10+112= 2952 -- use LAST instead of load
hgr_scroll_line:
setup_column_40:
lda COUNTH ; 3
asl ; 2
clc ; 2
adc OUTL ; 3
sta INL ; 3
clc ; necessary? ; 2
lda OUTH ; 3
adc #$20 ; 2
sta INH ; 3
ldy #0 ; 2
lda (INL),Y ; 5
sta HIGH ; 3
ldx COUNTL ; 3
;===========
; 36
count0:
beq done_count ; if 0, need to do nothing ; 3
cpx #1 ; -1/2
count1:
beq shiftright2 ; if 1, C>>2 ; 3
cpx #2 ; -1/2
count2:
beq shiftright4 ; if 2, C>>4 ; 3
count3:
sta TEMP ; save C ; -1/3
iny ; 2
lda (INL),Y ; 5
sta HIGH ; 3
cpx #3 ; 2
bne not3 ; 3
asl ; if 3, D<<1 | C>>6 ; -1/2
and #$2 ; 2
sta TEMPY ; 3
lda TEMP ; 3
lsr ; 2
lsr ; 2
lsr ; 2
lsr ; 2
lsr ; 2
lsr ; 2
and #$1 ; 2
ora TEMPY ; 3
jmp done_count ; 3
not3:
cpx #4 ; 2
count4:
beq shiftright1 ; if 4, D>>1 ; 3
count5:
cpx #5 ; 2/-1
beq shiftright3 ; if 5, D>>3 ; 3
count6: ; if 6, D>>5
; -1
shiftright5:
lsr ; 2
shiftright4:
lsr ; 2
shiftright3:
lsr ; 2
shiftright2:
lsr ; 2
shiftright1:
lsr ; 2
shiftright0:
done_count:
and #$7f ; 2
sta OFFSCREEN ; 3
lda HIGH ; 3
and #$80 ; 2
ora OFFSCREEN ; 3
sta OFFSCREEN ; 3
ldy #0 ; 2
;====================
; best case(0)= 19
; worse case(3)= 56
ldx OUTL ; 3
inx ; 2
stx INL ; 3
ldx OUTH ; 3
stx INH ; 3
;===========
; 14
lda (OUTL),Y ; get initial NEXT ; 5
tax ; 2
;===========
; 7
; repeated 10 times
hgr_scroll_line_loop:
;============= Unroll 0
stx CURRENT ; CURRENT=NEXT ; 3
lda (INL),Y ; get subsequent pixel block ; 5
tax ; NEXT ; 2
;===================
; 10
; if in bit 2 or 6 of horiz scroll, shift the color bit over
; makes some color flicker, is there a better way?
high_bit0:
lda FLIPHIGH ; 3 3 ; 3
bne keep_high_bit0 ; 3 2 ; 3
move_high_bit0:
; -1
txa ; NEXT ; 2 ; 2
jmp done_high_bit0 ; 3 ; 3
keep_high_bit0:
lda CURRENT ; 3 ; 3
done_high_bit0:
and #$80 ; 2 2 ; 2
sta HIGH ; 3 3 ; 3
;===================
; 2or6: 15
; else: 14
.if 0
high_bit0:
lda FLIPHIGH ; 3 3 ; 3
beq keep_high_bit0 ; 2 3 ; 3
; -1
lda CURRENT ; 3 ; 3
.byte $24 ; 3 ; 3
keep_high_bit0:
txa ; NEXT ; 2 ; 2
done_high_bit0:
and #$80 ; 2 2 ; 2
sta HIGH ; 3 3 ; 3
;===================
; 2or6: 13
; else: 16
.endif
prepare_bits0:
; get right byte, bottom 2 bits, shifted left to be in 6+5
txa ; NEXT ; 2
; this method 2 cycles faster than asl x 5
ror ; 2
ror ; 2
ror ; 2
ror ; 2
and #$60 ; 2
ora HIGH ; 3
sta HIGH ; 3
;==========
; 18
output_new0:
; get current, mask off bottom 2 bits (no longer needed)
; then OR in the saved high (color) bit as well as NEXT bits
lda CURRENT ; 3
lsr ; 2
lsr ; 2
and #$1f ; 2
ora HIGH ; 3
sta (OUTL),Y ; 6
iny ; 2
;===========
; 20
;============= Unroll 1
stx CURRENT ; CURRENT=NEXT ; 3
lda (INL),Y ; get subsequent pixel block ; 5
tax ; NEXT ; 2
;===================
; 10
; if in bit 2 or 6 of horiz scroll, shift the color bit over
; makes some color flicker, is there a better way?
high_bit1:
lda FLIPHIGH ; 3
bne keep_high_bit1 ; 3
move_high_bit1:
; -1
txa ; NEXT ; 2
jmp done_high_bit1 ; 3
keep_high_bit1:
lda CURRENT ; 3
done_high_bit1:
and #$80 ; 2
sta HIGH ; 3
;===================
; 2or6: 15
; else: 14
prepare_bits1:
; get right byte, bottom 2 bits, shifted left to be in 6+5
txa ; NEXT ; 2
; this method 2 cycles faster than asl x 5
ror ; 2
ror ; 2
ror ; 2
ror ; 2
and #$60 ; 2
ora HIGH ; 3
sta HIGH ; 3
;==========
; 18
output_new1:
; get current, mask off bottom 2 bits (no longer needed)
; then OR in the saved high (color) bit as well as NEXT bits
lda CURRENT ; 3
lsr ; 2
lsr ; 2
and #$1f ; 2
ora HIGH ; 3
sta (OUTL),Y ; 6
iny ; 2
;===========
; 20
;============= Unroll 2
stx CURRENT ; CURRENT=NEXT ; 3
lda (INL),Y ; get subsequent pixel block ; 5
tax ; NEXT ; 2
;===================
; 10
; if in bit 2 or 6 of horiz scroll, shift the color bit over
; makes some color flicker, is there a better way?
high_bit2:
lda FLIPHIGH ; 3
bne keep_high_bit2 ; 3
move_high_bit2:
; -1
txa ; NEXT ; 2
jmp done_high_bit2 ; 3
keep_high_bit2:
lda CURRENT ; 3
done_high_bit2:
and #$80 ; 2
sta HIGH ; 3
;===================
; 2or6: 15
; else: 14
prepare_bits2:
; get right byte, bottom 2 bits, shifted left to be in 6+5
txa ; NEXT ; 2
; this method 2 cycles faster than asl x 5
ror ; 2
ror ; 2
ror ; 2
ror ; 2
and #$60 ; 2
ora HIGH ; 3
sta HIGH ; 3
;==========
; 18
output_new2:
; get current, mask off bottom 2 bits (no longer needed)
; then OR in the saved high (color) bit as well as NEXT bits
lda CURRENT ; 3
lsr ; 2
lsr ; 2
and #$1f ; 2
ora HIGH ; 3
sta (OUTL),Y ; 6
iny ; 2
;===========
; 20
;============= Unroll 3
stx CURRENT ; CURRENT=NEXT ; 3
cpy #39 ; 2
bne not_thirtynine ; 3
thirtynine:
; -1
lda OFFSCREEN ; 3
jmp done_thirtynine ; 3
not_thirtynine:
lda (INL),Y ; get subsequent pixel block ; 5
done_thirtynine:
tax ; NEXT ; 2
;===================
; usually: 15
; rarely: 15
; if in bit 2 or 6 of horiz scroll, shift the color bit over
; makes some color flicker, is there a better way?
high_bit:
lda FLIPHIGH ; 3
bne keep_high_bit ; 3
move_high_bit:
; -1
txa ; NEXT ; 2
jmp done_high_bit ; 3
keep_high_bit:
lda CURRENT ; 3
done_high_bit:
and #$80 ; 2
sta HIGH ; 3
;===================
; 2or6: 15
; else: 14
prepare_bits:
; get right byte, bottom 2 bits, shifted left to be in 6+5
txa ; NEXT ; 2
; this method 2 cycles faster than asl x 5
ror ; 2
ror ; 2
ror ; 2
ror ; 2
and #$60 ; 2
ora HIGH ; 3
sta HIGH ; 3
;==========
; 18
output_new:
; get current, mask off bottom 2 bits (no longer needed)
; then OR in the saved high (color) bit as well as NEXT bits
lda CURRENT ; 3
lsr ; 2
lsr ; 2
and #$1f ; 2
ora HIGH ; 3
sta (OUTL),Y ; 6
iny ; 2
;===========
; 20
cpy #40 ; 2
beq hgr_scroll_return ; 3
; -1
jmp hgr_scroll_line_loop ; 3
;===========
; 7
hgr_scroll_return:
; -1
rts ; 6