Eliminate redundent register loads

This commit is contained in:
Lucas Scharenbroich 2022-07-28 11:57:14 -05:00
parent f5a27362a7
commit 0f920acd03
3 changed files with 128 additions and 148 deletions

View File

@ -33,8 +33,8 @@ _RestoreBG0Opcodes
lda LastPatchOffset ; If zero, there are no saved opcodes
sta :exit_offset
ldx :virt_line_x2
ldal BTableLow,x ; Get the address of the first code field line
@ -47,8 +47,7 @@ _RestoreBG0Opcodes
txa ; lda :virt_line_x2
and #$001E
eor #$FFFF
adc #32
min :lines_left_x2
sta :draw_count_x2 ; Do half of this many lines
@ -60,12 +59,13 @@ _RestoreBG0Opcodes
adc :exit_offset ; Add some offsets to get the base address in the code field line
jsr RestoreOpcode
jsr (RestoreOpcode,x)
lda :virt_line_x2 ; advance to the virtual line after the segment we just
clc ; filled in
adc :draw_count_x2
sta :virt_line_x2
lda :lines_left_x2 ; subtract the number of lines we just completed
@ -113,16 +113,16 @@ _ApplyBG0XPosPre
:virt_line equ tmp1
:lines_left equ tmp2
:draw_count equ tmp3
:stk_save equ tmp0
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:entry_offset equ tmp5
:exit_bra equ tmp6
:exit_address equ tmp7
:base_address equ tmp8
:draw_count_x2 equ tmp9
:opcode equ tmp0
:opcode equ tmp9
:odd_entry_offset equ tmp10
; If there are saved opcodes that have not been restored, do not run this routine
@ -133,10 +133,12 @@ _ApplyBG0XPos
; This code is fairly succinct. See the corresponding code in Vert.s for more detailed comments.
lda StartYMod208 ; This is the base line of the virtual screen
sta :virt_line ; Keep track of it
sta :virt_line_x2 ; Keep track of it
lda ScreenHeight
sta :lines_left
sta :lines_left_x2
; Calculate the exit and entry offsets into the code fields. This is a bit tricky, because odd-aligned
; rendering causes the left and right edges to move in a staggered fashion.
@ -280,32 +282,30 @@ _ApplyBG0XPos
; 3. Writes the JMP entry point to enter the code field
phb ; Save the existing bank
sta :stk_save
lda :virt_line
asl ; This will clear the carry bit
ldx :virt_line_x2
ldal BTableLow,x ; Get the address of the first code field line
tay ; Save it to use as the base address
adc :exit_offset ; Add some offsets to get the base address in the code field line
sta :exit_address
sty :base_address
sep #$20
ldal BTableHigh,x
plb ; This is the bank that will receive the updates
rep #$20
lda :virt_line
and #$000F
and #$001E
eor #$FFFF
adc #16
min :lines_left
adc #32
min :lines_left_x2
sta :draw_count ; Do this many lines
sta :draw_count_x2
; First step is to set the BRA instruction to exit the code field at the proper location. There
@ -317,64 +317,63 @@ _ApplyBG0XPos
; screen
; y is already set to :base_address
tax ; :draw_count_x2
lda :exit_address ; Save from this location
jsr SaveOpcode
clc ; advance to the virtual line after the segment we just
adc :virt_line_x2 ; filled in
sta :virt_line_x2
lda :exit_address ; Save from this location
jsr (SaveOpcode,x) ; X = :exit_address on return
txy ; ldy :exit_address -- starting at this address
ldx :draw_count_x2 ; Do this many lines
lda :exit_bra ; Copy this value into all of the lines
ldy :exit_address ; starting at this address
jsr SetConst
jsr (SetConst,x) ; All registers are preserved
; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an
; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly
sep #$20
ldx :draw_count_x2
; ldx :draw_count_x2
lda :entry_offset
ldy :base_address
jsr SetCodeEntry
jsr (SetCodeEntry,x) ; All registers are preserved
; Now, patch in the opcode
ldx :draw_count_x2
; ldx :draw_count_x2
lda :opcode
ldy :base_address ; Y-register is preserved, this can be removed
jsr SetCodeEntryOpcode
jsr (SetCodeEntryOpcode,x) ; All registers are preserved
; If this is an odd entry, also set the odd_entry low byte and save the operand high byte
lda :odd_entry_offset
beq :not_odd
ldx :draw_count_x2
ldy :base_address ; Y-register is preserved, this can be removed
jsr SetOddCodeEntry
; NOTE: SetOddCodeEntry and SaveHighOperand can probably be combined to eliminate call/return overhead
ldx :draw_count_x2
ldy :base_address ; Y-register is preserved, this can be removed
pei :exit_address
jmp :SaveHighOperand ; Only used once, so "inline" it
; ldx :draw_count_x2
jsr (SetOddCodeEntry,x) ; All registers are preserved
; ldx :draw_count_x2
jmp (:SaveHighOperand,x) ; Only used once, so "inline" it
rep #$20
rep #$21 ; clear the carry
; Do the end of the loop -- update the virtual line counter and reduce the number
; of lines left to render
lda :virt_line ; advance to the virtual line after the segment we just
clc ; filled in
adc :draw_count
sta :virt_line
lda :lines_left ; subtract the number of lines we just completed
lda :lines_left_x2 ; subtract the number of lines we just completed
sbc :draw_count
sta :lines_left
sbc :draw_count_x2
sta :lines_left_x2
jne :loop
lda :stk_save
@ -387,45 +386,43 @@ _ApplyBG0XPos
; Y = starting line * $1000
; A = code field location * $1000
jmp (:tbl,x)
:tbl da :bottom
da :bottom
da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12
da :do13,:do14,:do15,:do16
:do15 plx
:do15 ldx :exit_address ; accumulator is in 8-bit mode, so can't use TAX
bra :x15
:do14 plx
:do14 ldx :exit_address
bra :x14
:do13 plx
:do13 ldx :exit_address
bra :x13
:do12 plx
:do12 ldx :exit_address
bra :x12
:do11 plx
:do11 ldx :exit_address
bra :x11
:do10 plx
:do10 ldx :exit_address
bra :x10
:do09 plx
:do09 ldx :exit_address
bra :x09
:do08 plx
:do08 ldx :exit_address
bra :x08
:do07 plx
:do07 ldx :exit_address
bra :x07
:do06 plx
:do06 ldx :exit_address
bra :x06
:do05 plx
:do05 ldx :exit_address
bra :x05
:do04 plx
:do04 ldx :exit_address
bra :x04
:do03 plx
:do03 ldx :exit_address
bra :x03
:do02 plx
:do02 ldx :exit_address
bra :x02
:do01 plx
:do01 ldx :exit_address
bra :x01
:do16 plx
:do16 ldx :exit_address
:x16 lda $F002,x
:x15 lda $E002,x
@ -469,9 +466,7 @@ _ApplyBG0XPos
; Y = starting line * $1000
; A = code field location * $1000
jmp (:tbl,x)
:tbl da :bottom
da :bottom
da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12
@ -550,9 +545,7 @@ SaveOpcode
; Y = starting line * $1000
; A = code field location * $1000
jmp (:tbl,x)
:tbl da :bottom
da :bottom
da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12
@ -631,8 +624,7 @@ RestoreOpcode
; Y = starting line * $1000
; A = address low byte
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45
@ -663,8 +655,7 @@ SetCodeEntry
; Y = starting line * $1000
; A = address low byte
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45
@ -695,8 +686,7 @@ SetOddCodeEntry
; Y = starting line * $1000
; A = opcode value
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45

View File

@ -87,9 +87,8 @@ Counter equ tmp3
; A = value
; Set M to 0 or 1
SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45

View File

@ -7,29 +7,36 @@
; lines in the correct order
:rtbl_idx equ tmp0
:virt_line equ tmp1
:lines_left equ tmp2
:draw_count equ tmp3
:rtbl_idx_x2 equ tmp0
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:stk_save equ tmp4
; First task is to fill in the STK_ADDR values by copying them from the RTable array. We
; copy from RTable[i] into BlitField[StartY+i]. As with all of this code, the difficult part
; is decomposing the update across banks
stz :rtbl_idx ; Start copying from the first entry in the table
stz :rtbl_idx_x2 ; Start copying from the first entry in the table
lda StartY ; This is the base line of the virtual screen
jsr Mod208
sta StartYMod208
sta :virt_line ; Keep track of it
sta :virt_line_x2 ; Keep track of it
phb ; Save the current bank
tsc ; we intentionally leak one byte of stack in each loop
sta :stk_save ; iteration, so save the stack to repair at the end
; copy a range of address from the table into the destination bank. If we restrict ourselves to
; rectangular playfields, this can be optimized to just subtracting a constant value. See the
; Templates::SetScreenAddrs subroutine.
lda ScreenHeight
sta :lines_left
sta :lines_left_x2
; This is the verbose part -- figure out how many lines to draw. We don't want to artificially limit
; the height of the visible screen (for example, doing an animated wipe while scrolling), so the screen
@ -38,62 +45,47 @@ _ApplyBG0YPos
; For larger values, we want to break things up on 16-line boundaries based on the virt_line value. So,
; draw_count = min(lines_left, (16 - (virt_line % 16))
; Note that almost everything in this loop can be done with 8-bit operations sincc the values are
; all under 200. The one exception is the virt_line value which could exceed 256. This will be
; a later optimization and might save around 10 cycles per iteration, or up to ~120 cycles per frame
; and ~2,500 per secord. This is ~1% of our total CPU budget and is *just* enough cycles to be
; interesting.... Another 8 cycles could be removed by doing all calculatinos pre-multiplied by 2
; to avoid several 'asl' instructions
lda :virt_line
ldx :virt_line_x2
ldal BTableLow,x ; Get the address of the first code field line
sep #$20
ldal BTableHigh,x
ldal BTableHigh,x ; Target bank in low byte, current bank in high
plb ; This is the bank that will receive the updates
rep #$20
lda :virt_line
and #$000F
and #$001E
eor #$FFFF
adc #16
min :lines_left
adc #32
min :lines_left_x2
sta :draw_count ; Do this many lines
sta :draw_count_x2 ; Do this many lines
lda :rtbl_idx ; Read from this location in the RTable
clc ; pre-advance virt_line_2 because we have the value
adc :virt_line_x2
sta :virt_line_x2
jsr CopyRTableToStkAddr
jsr (:CopyRTableToStkAddr,x) ; X = rtbl_idx_x2 on return
lda :virt_line ; advance to the virtual line after the segment we just
clc ; filled in
adc :draw_count
sta :virt_line
txa ; carry flag is unchanged
adc :draw_count_x2 ; advance the index into the RTable
sta :rtbl_idx_x2
lda :rtbl_idx ; advance the index into the RTable
adc :draw_count
sta :rtbl_idx
lda :lines_left ; subtract the number of lines we just completed
lda :lines_left_x2 ; subtract the number of lines we just completed
sbc :draw_count
sta :lines_left
sbc :draw_count_x2
sta :lines_left_x2
jne :loop
lda :stk_save
; Unrolled copy routine to move RTable intries into STK_ADDR position.
@ -101,44 +93,43 @@ _ApplyBG0YPos
; A = intect into the RTable array (x2)
; Y = starting line * $1000
; X = number of lines (x2)
jmp (:tbl,x)
:tbl da :none
da :none
da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12
da :do13,:do14,:do15,:do16
:do15 tax
:do15 ldx :rtbl_idx_x2
bra :x15
:do14 tax
:do14 ldx :rtbl_idx_x2
bra :x14
:do13 tax
:do13 ldx :rtbl_idx_x2
bra :x13
:do12 tax
:do12 ldx :rtbl_idx_x2
bra :x12
:do11 tax
:do11 ldx :rtbl_idx_x2
bra :x11
:do10 tax
:do10 ldx :rtbl_idx_x2
bra :x10
:do09 tax
:do09 ldx :rtbl_idx_x2
bra :x09
:do08 tax
:do08 ldx :rtbl_idx_x2
bra :x08
:do07 tax
:do07 ldx :rtbl_idx_x2
bra :x07
:do06 tax
:do06 ldx :rtbl_idx_x2
bra :x06
:do05 tax
:do05 ldx :rtbl_idx_x2
bra :x05
:do04 tax
:do04 ldx :rtbl_idx_x2
bra :x04
:do03 tax
:do03 ldx :rtbl_idx_x2
bra :x03
:do02 tax
:do02 ldx :rtbl_idx_x2
bra :x02
:do01 tax
:do01 ldx :rtbl_idx_x2
bra :x01
:do16 tax
:do16 ldx :rtbl_idx_x2
ldal RTable+30,x
sta STK_ADDR+$F000,y
:x15 ldal RTable+28,x