Eliminate redundent register loads

This commit is contained in:
Lucas Scharenbroich 2022-07-28 11:57:14 -05:00
parent f5a27362a7
commit 0f920acd03
3 changed files with 128 additions and 148 deletions

View File

@ -33,8 +33,8 @@ _RestoreBG0Opcodes
lda LastPatchOffset ; If zero, there are no saved opcodes lda LastPatchOffset ; If zero, there are no saved opcodes
sta :exit_offset sta :exit_offset
:loop
ldx :virt_line_x2 ldx :virt_line_x2
:loop
ldal BTableLow,x ; Get the address of the first code field line ldal BTableLow,x ; Get the address of the first code field line
tay tay
@ -47,8 +47,7 @@ _RestoreBG0Opcodes
txa ; lda :virt_line_x2 txa ; lda :virt_line_x2
and #$001E and #$001E
eor #$FFFF eor #$FFFF
inc sec
clc
adc #32 adc #32
min :lines_left_x2 min :lines_left_x2
sta :draw_count_x2 ; Do half of this many lines sta :draw_count_x2 ; Do half of this many lines
@ -60,12 +59,13 @@ _RestoreBG0Opcodes
clc clc
adc :exit_offset ; Add some offsets to get the base address in the code field line adc :exit_offset ; Add some offsets to get the base address in the code field line
jsr RestoreOpcode jsr (RestoreOpcode,x)
lda :virt_line_x2 ; advance to the virtual line after the segment we just lda :virt_line_x2 ; advance to the virtual line after the segment we just
clc ; filled in clc ; filled in
adc :draw_count_x2 adc :draw_count_x2
sta :virt_line_x2 sta :virt_line_x2
tax
lda :lines_left_x2 ; subtract the number of lines we just completed lda :lines_left_x2 ; subtract the number of lines we just completed
sec sec
@ -113,16 +113,16 @@ _ApplyBG0XPosPre
_ApplyBG0XPos _ApplyBG0XPos
:virt_line equ tmp1 :stk_save equ tmp0
:lines_left equ tmp2 :virt_line_x2 equ tmp1
:draw_count equ tmp3 :lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4 :exit_offset equ tmp4
:entry_offset equ tmp5 :entry_offset equ tmp5
:exit_bra equ tmp6 :exit_bra equ tmp6
:exit_address equ tmp7 :exit_address equ tmp7
:base_address equ tmp8 :base_address equ tmp8
:draw_count_x2 equ tmp9 :opcode equ tmp9
:opcode equ tmp0
:odd_entry_offset equ tmp10 :odd_entry_offset equ tmp10
; If there are saved opcodes that have not been restored, do not run this routine ; If there are saved opcodes that have not been restored, do not run this routine
@ -133,10 +133,12 @@ _ApplyBG0XPos
; This code is fairly succinct. See the corresponding code in Vert.s for more detailed comments. ; This code is fairly succinct. See the corresponding code in Vert.s for more detailed comments.
:ok :ok
lda StartYMod208 ; This is the base line of the virtual screen lda StartYMod208 ; This is the base line of the virtual screen
sta :virt_line ; Keep track of it asl
sta :virt_line_x2 ; Keep track of it
lda ScreenHeight lda ScreenHeight
sta :lines_left asl
sta :lines_left_x2
; Calculate the exit and entry offsets into the code fields. This is a bit tricky, because odd-aligned ; Calculate the exit and entry offsets into the code fields. This is a bit tricky, because odd-aligned
; rendering causes the left and right edges to move in a staggered fashion. ; rendering causes the left and right edges to move in a staggered fashion.
@ -280,32 +282,30 @@ _ApplyBG0XPos
; 3. Writes the JMP entry point to enter the code field ; 3. Writes the JMP entry point to enter the code field
phb ; Save the existing bank phb ; Save the existing bank
tsc
sta :stk_save
:loop :loop
lda :virt_line ldx :virt_line_x2
asl ; This will clear the carry bit
tax
ldal BTableLow,x ; Get the address of the first code field line ldal BTableLow,x ; Get the address of the first code field line
tay ; Save it to use as the base address tay ; Save it to use as the base address
clc
adc :exit_offset ; Add some offsets to get the base address in the code field line adc :exit_offset ; Add some offsets to get the base address in the code field line
sta :exit_address sta :exit_address
sty :base_address sty :base_address
sep #$20
ldal BTableHigh,x ldal BTableHigh,x
pha pha
plb ; This is the bank that will receive the updates plb
rep #$20
lda :virt_line txa
and #$000F and #$001E
eor #$FFFF eor #$FFFF
inc sec
clc adc #32
adc #16 min :lines_left_x2
min :lines_left
sta :draw_count ; Do this many lines
asl
sta :draw_count_x2 sta :draw_count_x2
; First step is to set the BRA instruction to exit the code field at the proper location. There ; First step is to set the BRA instruction to exit the code field at the proper location. There
@ -317,64 +317,63 @@ _ApplyBG0XPos
; screen ; screen
; y is already set to :base_address ; y is already set to :base_address
tax ; :draw_count_x2 tax ; :draw_count_x2
lda :exit_address ; Save from this location clc ; advance to the virtual line after the segment we just
jsr SaveOpcode adc :virt_line_x2 ; filled in
sta :virt_line_x2
lda :exit_address ; Save from this location
jsr (SaveOpcode,x) ; X = :exit_address on return
txy ; ldy :exit_address -- starting at this address
ldx :draw_count_x2 ; Do this many lines ldx :draw_count_x2 ; Do this many lines
lda :exit_bra ; Copy this value into all of the lines lda :exit_bra ; Copy this value into all of the lines
ldy :exit_address ; starting at this address jsr (SetConst,x) ; All registers are preserved
jsr SetConst
; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an ; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an
; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly ; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly
sep #$20 sep #$20
ldx :draw_count_x2 ; ldx :draw_count_x2
lda :entry_offset lda :entry_offset
ldy :base_address ldy :base_address
jsr SetCodeEntry jsr (SetCodeEntry,x) ; All registers are preserved
; Now, patch in the opcode ; Now, patch in the opcode
ldx :draw_count_x2 ; ldx :draw_count_x2
lda :opcode lda :opcode
ldy :base_address ; Y-register is preserved, this can be removed jsr (SetCodeEntryOpcode,x) ; All registers are preserved
jsr SetCodeEntryOpcode
; If this is an odd entry, also set the odd_entry low byte and save the operand high byte ; If this is an odd entry, also set the odd_entry low byte and save the operand high byte
lda :odd_entry_offset lda :odd_entry_offset
beq :not_odd beq :not_odd
ldx :draw_count_x2 ; NOTE: SetOddCodeEntry and SaveHighOperand can probably be combined to eliminate call/return overhead
ldy :base_address ; Y-register is preserved, this can be removed
jsr SetOddCodeEntry
ldx :draw_count_x2 ; ldx :draw_count_x2
ldy :base_address ; Y-register is preserved, this can be removed jsr (SetOddCodeEntry,x) ; All registers are preserved
pei :exit_address
jmp :SaveHighOperand ; Only used once, so "inline" it ; ldx :draw_count_x2
jmp (:SaveHighOperand,x) ; Only used once, so "inline" it
:save_high_op_rtn :save_high_op_rtn
:not_odd :not_odd
rep #$20 rep #$21 ; clear the carry
; Do the end of the loop -- update the virtual line counter and reduce the number ; Do the end of the loop -- update the virtual line counter and reduce the number
; of lines left to render ; of lines left to render
lda :virt_line ; advance to the virtual line after the segment we just lda :lines_left_x2 ; subtract the number of lines we just completed
clc ; filled in
adc :draw_count
sta :virt_line
lda :lines_left ; subtract the number of lines we just completed
sec sec
sbc :draw_count sbc :draw_count_x2
sta :lines_left sta :lines_left_x2
jne :loop jne :loop
lda :stk_save
tcs
plb plb
rts rts
@ -387,45 +386,43 @@ _ApplyBG0XPos
; Y = starting line * $1000 ; Y = starting line * $1000
; A = code field location * $1000 ; A = code field location * $1000
:SaveHighOperand :SaveHighOperand
jmp (:tbl,x) da :bottom
:tbl da :bottom
da :do01,:do02,:do03,:do04 da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08 da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12 da :do09,:do10,:do11,:do12
da :do13,:do14,:do15,:do16 da :do13,:do14,:do15,:do16
:do15 plx :do15 ldx :exit_address ; accumulator is in 8-bit mode, so can't use TAX
bra :x15 bra :x15
:do14 plx :do14 ldx :exit_address
bra :x14 bra :x14
:do13 plx :do13 ldx :exit_address
bra :x13 bra :x13
:do12 plx :do12 ldx :exit_address
bra :x12 bra :x12
:do11 plx :do11 ldx :exit_address
bra :x11 bra :x11
:do10 plx :do10 ldx :exit_address
bra :x10 bra :x10
:do09 plx :do09 ldx :exit_address
bra :x09 bra :x09
:do08 plx :do08 ldx :exit_address
bra :x08 bra :x08
:do07 plx :do07 ldx :exit_address
bra :x07 bra :x07
:do06 plx :do06 ldx :exit_address
bra :x06 bra :x06
:do05 plx :do05 ldx :exit_address
bra :x05 bra :x05
:do04 plx :do04 ldx :exit_address
bra :x04 bra :x04
:do03 plx :do03 ldx :exit_address
bra :x03 bra :x03
:do02 plx :do02 ldx :exit_address
bra :x02 bra :x02
:do01 plx :do01 ldx :exit_address
bra :x01 bra :x01
:do16 plx :do16 ldx :exit_address
:x16 lda $F002,x :x16 lda $F002,x
sta OPCODE_HIGH_SAVE+$F000,y sta OPCODE_HIGH_SAVE+$F000,y
:x15 lda $E002,x :x15 lda $E002,x
@ -469,9 +466,7 @@ _ApplyBG0XPos
; Y = starting line * $1000 ; Y = starting line * $1000
; A = code field location * $1000 ; A = code field location * $1000
SaveOpcode SaveOpcode
jmp (:tbl,x) da :bottom
:tbl da :bottom
da :do01,:do02,:do03,:do04 da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08 da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12 da :do09,:do10,:do11,:do12
@ -550,9 +545,7 @@ SaveOpcode
; Y = starting line * $1000 ; Y = starting line * $1000
; A = code field location * $1000 ; A = code field location * $1000
RestoreOpcode RestoreOpcode
jmp (:tbl,x) da :bottom
:tbl da :bottom
da :do01,:do02,:do03,:do04 da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08 da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12 da :do09,:do10,:do11,:do12
@ -631,8 +624,7 @@ RestoreOpcode
; Y = starting line * $1000 ; Y = starting line * $1000
; A = address low byte ; A = address low byte
SetCodeEntry SetCodeEntry
jmp (:tbl,x) da :bottom-00,:bottom-03,:bottom-06,:bottom-09
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45 da :bottom-36,:bottom-39,:bottom-42,:bottom-45
@ -663,8 +655,7 @@ SetCodeEntry
; Y = starting line * $1000 ; Y = starting line * $1000
; A = address low byte ; A = address low byte
SetOddCodeEntry SetOddCodeEntry
jmp (:tbl,x) da :bottom-00,:bottom-03,:bottom-06,:bottom-09
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45 da :bottom-36,:bottom-39,:bottom-42,:bottom-45
@ -695,8 +686,7 @@ SetOddCodeEntry
; Y = starting line * $1000 ; Y = starting line * $1000
; A = opcode value ; A = opcode value
SetCodeEntryOpcode SetCodeEntryOpcode
jmp (:tbl,x) da :bottom-00,:bottom-03,:bottom-06,:bottom-09
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45 da :bottom-36,:bottom-39,:bottom-42,:bottom-45

View File

@ -87,9 +87,8 @@ Counter equ tmp3
; A = value ; A = value
; ;
; Set M to 0 or 1 ; Set M to 0 or 1
SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards SetConst
jmp (:tbl,x) da :bottom-00,:bottom-03,:bottom-06,:bottom-09
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45 da :bottom-36,:bottom-39,:bottom-42,:bottom-45

View File

@ -7,29 +7,36 @@
; lines in the correct order ; lines in the correct order
_ApplyBG0YPos _ApplyBG0YPos
:rtbl_idx equ tmp0 :rtbl_idx_x2 equ tmp0
:virt_line equ tmp1 :virt_line_x2 equ tmp1
:lines_left equ tmp2 :lines_left_x2 equ tmp2
:draw_count equ tmp3 :draw_count_x2 equ tmp3
:stk_save equ tmp4
; First task is to fill in the STK_ADDR values by copying them from the RTable array. We ; First task is to fill in the STK_ADDR values by copying them from the RTable array. We
; copy from RTable[i] into BlitField[StartY+i]. As with all of this code, the difficult part ; copy from RTable[i] into BlitField[StartY+i]. As with all of this code, the difficult part
; is decomposing the update across banks ; is decomposing the update across banks
stz :rtbl_idx ; Start copying from the first entry in the table stz :rtbl_idx_x2 ; Start copying from the first entry in the table
lda StartY ; This is the base line of the virtual screen lda StartY ; This is the base line of the virtual screen
jsr Mod208 jsr Mod208
sta StartYMod208 sta StartYMod208
sta :virt_line ; Keep track of it asl
sta :virt_line_x2 ; Keep track of it
phb ; Save the current bank
tsc ; we intentionally leak one byte of stack in each loop
sta :stk_save ; iteration, so save the stack to repair at the end
; copy a range of address from the table into the destination bank. If we restrict ourselves to ; copy a range of address from the table into the destination bank. If we restrict ourselves to
; rectangular playfields, this can be optimized to just subtracting a constant value. See the ; rectangular playfields, this can be optimized to just subtracting a constant value. See the
; Templates::SetScreenAddrs subroutine. ; Templates::SetScreenAddrs subroutine.
lda ScreenHeight lda ScreenHeight
sta :lines_left asl
sta :lines_left_x2
; This is the verbose part -- figure out how many lines to draw. We don't want to artificially limit ; This is the verbose part -- figure out how many lines to draw. We don't want to artificially limit
; the height of the visible screen (for example, doing an animated wipe while scrolling), so the screen ; the height of the visible screen (for example, doing an animated wipe while scrolling), so the screen
@ -38,62 +45,47 @@ _ApplyBG0YPos
; For larger values, we want to break things up on 16-line boundaries based on the virt_line value. So, ; For larger values, we want to break things up on 16-line boundaries based on the virt_line value. So,
; ;
; draw_count = min(lines_left, (16 - (virt_line % 16)) ; draw_count = min(lines_left, (16 - (virt_line % 16))
;
; Note that almost everything in this loop can be done with 8-bit operations sincc the values are
; all under 200. The one exception is the virt_line value which could exceed 256. This will be
; a later optimization and might save around 10 cycles per iteration, or up to ~120 cycles per frame
; and ~2,500 per secord. This is ~1% of our total CPU budget and is *just* enough cycles to be
; interesting.... Another 8 cycles could be removed by doing all calculatinos pre-multiplied by 2
; to avoid several 'asl' instructions
phb
:loop :loop
lda :virt_line ldx :virt_line_x2
asl
tax
ldal BTableLow,x ; Get the address of the first code field line ldal BTableLow,x ; Get the address of the first code field line
tay tay
sep #$20 ldal BTableHigh,x ; Target bank in low byte, current bank in high
ldal BTableHigh,x
pha pha
plb ; This is the bank that will receive the updates
rep #$20
lda :virt_line txa
and #$000F and #$001E
eor #$FFFF eor #$FFFF
inc sec
clc adc #32
adc #16 min :lines_left_x2
min :lines_left
sta :draw_count ; Do this many lines sta :draw_count_x2 ; Do this many lines
asl
tax tax
lda :rtbl_idx ; Read from this location in the RTable clc ; pre-advance virt_line_2 because we have the value
asl adc :virt_line_x2
sta :virt_line_x2
jsr CopyRTableToStkAddr plb
jsr (:CopyRTableToStkAddr,x) ; X = rtbl_idx_x2 on return
lda :virt_line ; advance to the virtual line after the segment we just txa ; carry flag is unchanged
clc ; filled in adc :draw_count_x2 ; advance the index into the RTable
adc :draw_count sta :rtbl_idx_x2
sta :virt_line
lda :rtbl_idx ; advance the index into the RTable
adc :draw_count
sta :rtbl_idx
lda :lines_left ; subtract the number of lines we just completed lda :lines_left_x2 ; subtract the number of lines we just completed
sec sec
sbc :draw_count sbc :draw_count_x2
sta :lines_left sta :lines_left_x2
jne :loop jne :loop
plb
:out lda :stk_save
tcs
plb
rts rts
; Unrolled copy routine to move RTable intries into STK_ADDR position. ; Unrolled copy routine to move RTable intries into STK_ADDR position.
@ -101,44 +93,43 @@ _ApplyBG0YPos
; A = intect into the RTable array (x2) ; A = intect into the RTable array (x2)
; Y = starting line * $1000 ; Y = starting line * $1000
; X = number of lines (x2) ; X = number of lines (x2)
CopyRTableToStkAddr :CopyRTableToStkAddr
jmp (:tbl,x) da :none
:tbl da :none
da :do01,:do02,:do03,:do04 da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08 da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12 da :do09,:do10,:do11,:do12
da :do13,:do14,:do15,:do16 da :do13,:do14,:do15,:do16
:do15 tax :do15 ldx :rtbl_idx_x2
bra :x15 bra :x15
:do14 tax :do14 ldx :rtbl_idx_x2
bra :x14 bra :x14
:do13 tax :do13 ldx :rtbl_idx_x2
bra :x13 bra :x13
:do12 tax :do12 ldx :rtbl_idx_x2
bra :x12 bra :x12
:do11 tax :do11 ldx :rtbl_idx_x2
bra :x11 bra :x11
:do10 tax :do10 ldx :rtbl_idx_x2
bra :x10 bra :x10
:do09 tax :do09 ldx :rtbl_idx_x2
bra :x09 bra :x09
:do08 tax :do08 ldx :rtbl_idx_x2
bra :x08 bra :x08
:do07 tax :do07 ldx :rtbl_idx_x2
bra :x07 bra :x07
:do06 tax :do06 ldx :rtbl_idx_x2
bra :x06 bra :x06
:do05 tax :do05 ldx :rtbl_idx_x2
bra :x05 bra :x05
:do04 tax :do04 ldx :rtbl_idx_x2
bra :x04 bra :x04
:do03 tax :do03 ldx :rtbl_idx_x2
bra :x03 bra :x03
:do02 tax :do02 ldx :rtbl_idx_x2
bra :x02 bra :x02
:do01 tax :do01 ldx :rtbl_idx_x2
bra :x01 bra :x01
:do16 tax :do16 ldx :rtbl_idx_x2
ldal RTable+30,x ldal RTable+30,x
sta STK_ADDR+$F000,y sta STK_ADDR+$F000,y
:x15 ldal RTable+28,x :x15 ldal RTable+28,x