Reorg of exception handling code in the core blitter

* Moved V-flag handling outside of the 32-byte exception handler
* Switched relative branches to JMP to save a cycle per word
* Updated macros to create a full code snippet instead of assuming
  certain values exist in the exception handler buffer
This commit is contained in:
Lucas Scharenbroich 2021-11-19 10:24:09 -06:00
parent 0a9eb76ab5
commit 4c31a0d056
2 changed files with 69 additions and 44 deletions

View File

@ -219,7 +219,7 @@ mixed cmp #$FFFF ; All 1's in the mask is fully transparent
; This is the slowest path because there is a *lot* of work to do. So much that it's ; This is the slowest path because there is a *lot* of work to do. So much that it's
; worth it to change up the environment to optimize things a bit more. ; worth it to change up the environment to optimize things a bit more.
; ;
; Need to fill in the first 8 bytes of the JMP handler with the following code sequence ; Need to fill in the first 10 bytes of the JMP handler with the following code sequence
; ;
; lda (00),y ; lda (00),y
; and #MASK ; and #MASK
@ -252,6 +252,9 @@ mixed cmp #$FFFF ; All 1's in the mask is fully transparent
lda ]1 lda ]1
sta: $0006,y sta: $0006,y
lda #$0D80 ; branch to the prologue (BRA *+15)
sta: $0008,y
ldy _Y_REG ; restore original y-register value and move on ldy _Y_REG ; restore original y-register value and move on
bra next bra next
@ -321,6 +324,9 @@ mixed cmp #$FFFF ; All 1's in the mask is fully transparent
ldal ]1,x ldal ]1,x
sta: $0006,y sta: $0006,y
lda #$0D80 ; branch to the prologue (BRA *+15)
sta: $0008,y
ldy _Y_REG ; restore original y-register value and move on ldy _Y_REG ; restore original y-register value and move on
bra next bra next
@ -346,12 +352,11 @@ next
; ]3 : code field offset ; ]3 : code field offset
CopyMaskedDWord MAC CopyMaskedDWord MAC
; Need to fill in the first 8 bytes of the JMP handler with the following code sequence ; Need to fill in the first 6 bytes of the JMP handler with the following code sequence
; ;
; lda (00),y ; lda (00),y
; and $80,x ; and $80,x
; ora $00,x ; ora $00,x
; bcc *+4
ldx _X_REG ; Get the addressing offset ldx _X_REG ; Get the addressing offset
ldal JTableOffset,x ; Get the address offset and add to the base address ldal JTableOffset,x ; Get the address offset and add to the base address
@ -377,7 +382,7 @@ CopyMaskedDWord MAC
sta: $0004,x ; Set ORA 00,x opcode sta: $0004,x ; Set ORA 00,x opcode
rep #$30 rep #$30
lda #$0290 ; BCC *+4 lda #$0F80 ; branch to the prologue (BRA *+17)
sta: $0006,x sta: $0006,x
eom eom
@ -392,25 +397,17 @@ CopyMaskedDWord MAC
; ]3 : code field offset ; ]3 : code field offset
CopyMaskedDynSpriteWord MAC CopyMaskedDynSpriteWord MAC
; Need to fill in the first 12(!!) bytes of the JMP handler with the following code sequence ; Need to fill in the first 12 bytes of the JMP handler with the following code sequence
; ;
; lda (00),y ; lda (00),y
; and $80,x ; and $80,x
; ora $00,x ; ora $00,x
; and #MASK ; and #MASK
; ora #DATA ; ora #DATA
; bra *+11
; ;
; If MASK == 0, then we can do a PEA. If MASK == $FFFF, then fall back to the simple Dynamic Masked ; If MASK == 0, then we can do a PEA. If MASK == $FFFF, then fall back to the simple Dynamic Masked
; code. ; code.
;
; If the tile priority bit is set, then we use an alternate bit of code that changes to oder
; of operations to and can't make an assumption about the transparency
;
; lda (00),y
; and #MASK
; ora #DATA
; and $80,x
; ora $00,x
ldx _X_REG ; Get the addressing offset ldx _X_REG ; Get the addressing offset
@ -432,11 +429,21 @@ CopyMaskedDynSpriteWord MAC
ora #$80 ora #$80
sta: $0003,x ; Set AND 00,x operand sta: $0003,x ; Set AND 00,x operand
lda #$35 lda #$35
sta: $0002,x ; Set AND 00,x operand sta: $0002,x ; Set AND 00,x opcode
lda #$15 lda #$15
sta: $0004,x ; Set ORA 00,x operand sta: $0004,x ; Set ORA 00,x opcode
rep #$30
lda #$0290 ; BCC *+4 lda #$29
sta: $0006,x sta: $0006,y ; AND #$0000 opcode
lda #$09
sta: $0009,y ; ORA #$0000 opcode
rep #$20
lda ]1+32 ; insert the tile mask and data into the exception
sta: $0007,y ; handler.
lda ]1
sta: $000A,y
lda #$0990 ; BCC *+11
sta: $000C,x
eom eom

View File

@ -37,8 +37,18 @@ PagePatches da {long_0-base+2}
da {loop_back-base+2} da {loop_back-base+2}
da {loop_exit_3-base+2} da {loop_exit_3-base+2}
da {even_exit-base+2} da {even_exit-base+2}
da {jmp_rtn_1-base+2}
da {jmp_rtn_2-base+2}
]index equ 0
lup 82 ; All the snippet addresses. The two JMP
da {snippets-base+{]index*32}+31} ; instructino are at the end of each of
da {snippets-base+{]index*32}+28} ; the 32-byte buffers
]index equ ]index+1
--^
PagePatchNum equ *-PagePatches PagePatchNum equ *-PagePatches
; Location that need a bank byte set for long addressing modes
BankPatches da {long_0-base+3} BankPatches da {long_0-base+3}
da {long_1-base+3} da {long_1-base+3}
da {long_2-base+3} da {long_2-base+3}
@ -483,7 +493,9 @@ BuildBank
; 13 banks for a total of 208 lines, which is what is required to render 26 tiles ; 13 banks for a total of 208 lines, which is what is required to render 26 tiles
; to cover the full screen vertical scrolling. ; to cover the full screen vertical scrolling.
; ;
; The 'base' location is always assumed to be on a 4kb ($1000) boundary ; The 'base' location is always assumed to be on a 4kb ($1000) boundary. We make sure that
; the code is assembled on a page boundary to help will alignment
ds \,$00 ; pad to the next page boundary
base base
entry_1 ldx #0000 ; Used for LDA 00,x addressing entry_1 ldx #0000 ; Used for LDA 00,x addressing
entry_2 ldy #0000 ; Used for LDA (00),y addressing entry_2 ldy #0000 ; Used for LDA (00),y addressing
@ -538,7 +550,7 @@ long_3 stal *+5-base
full_return jml blt_return ; Full exit full_return jml blt_return ; Full exit
; Re-enable interrupts and continue -- the even_exit JMP from the previous line will jump here every ; Re-enable interrupts and continue -- the even_exit JMP from the previous line will jump here every
; 8 or 16 lines in order to give the system some extra time to handle interrupts. ; 8 or 16 lines in order to give the system time to handle interrupts.
enable_int ldal stk_save+1 ; restore the stack enable_int ldal stk_save+1 ; restore the stack
tcs tcs
sep #$20 ; 8-bit mode sep #$20 ; 8-bit mode
@ -554,13 +566,26 @@ enable_int ldal stk_save+1 ; restore the stack
rep #$20 rep #$20
bra entry_1 bra entry_1
; The even/odd branch of this line's exception handler will return here. This is mostly
; a space-saving measure to allow for more code in the exeption handers themselved, but
; also simplified the relocation process since we only have to update a single address
; in each exception handler, rather than two.
;
; Oce working, this code should be able to be interleaved with the r_jmp_rtn code
; above to eliminate a couple of branches
jmp_rtn
bvs jmp_rtn_v ; overflow set means this is the right edge (entry)
clc ; carry is set only for edge operations; force clear
jmp_rtn_1 jmp l_jmp_rtn-base
jmp_rtn_v rep #$41 ; clear V and C
jmp_rtn_2 jmp r_jmp_rtn-base
; This is the spot that needs to be page-aligned. In addition to simplifying the entry address ; This is the spot that needs to be page-aligned. In addition to simplifying the entry address
; and only needing to update a byte instad of a word, because the code breaks out of the ; and only needing to update a byte instad of a word, because the code breaks out of the
; code field with a BRA instruction, we keep everything within a page to avoid the 1-cycle ; code field with a BRA instruction, we keep everything within a page to avoid the 1-cycle
; page-crossing penalty of the branch. ; page-crossing penalty of the branch.
ds 166 ds \,$00 ; pad to the next page boundary
loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is
loop_exit_2 jmp even_exit-base ; +3 odd-aligned loop_exit_2 jmp even_exit-base ; +3 odd-aligned
@ -685,32 +710,25 @@ epilogue_1 tsc
; r_edge rep #$41 ; r_edge rep #$41
; brl r_jmp_rtn ; 3 ; brl r_jmp_rtn ; 3
; Each snippet is provided 32 bytes of space. The constant code is filled in from the end and
; it is the responsibility of the code that fills in the hander to create valid program in the
; first 23 bytes are available to be manipulated.
;
; Note that the code that's assembled in the first bytes of these snippets is just an example. Every
; routine that created an exception handler *MUST* write a full set of instructions since there is
; no guarantee of what was written previously.
ds \,$00 ; pad to the next page boundary ds \,$00 ; pad to the next page boundary
]index equ 0 ]index equ 0
snippets lup 82 snippets lup 82
ds 2 ; space for a 2-byte sequence; LDA (00),y LDA 00,x LDA 0,s ds 2 ; space for all exception handlers
and #$0000 ; the mask operand will be set when the tile is drawn and #$0000 ; the mask operand will be set when the tile is drawn
ora #$0000 ; the data operand will be set when the tile is drawn ora #$0000 ; the data operand will be set when the tile is drawn
bcs *+6 ds 15 ; extra padding
pha
brl loop+3+{3*]index} ; use relative branch for convenience bcs :byte ; if C = 0, just push the data and return
bvs *+6 ; overflow set means this is the right edge (entry) pha ; 1 byte
clc ; carry is set only for edge operations; force clear jmp loop+3+{3*]index}-base ; 3 bytes : use relative branch for convenience
brl l_jmp_rtn :byte jmp jmp_rtn-base ; 3 bytes
rep #$41 ; clear V and C
brl r_jmp_rtn ; 25 bytes
ds 7 ; padding
]index equ ]index+1 ]index equ ]index+1
--^ --^
top top