Rough outline of streamlined sprite subsystem

* Split the creation of the sprite stamps from adding the
  sprites themselves.  This allows for 48 stamps that can
  be pre-rendered and quickly reassigned to sprites for
  animations.

* Inlined all calls to PushDirtyTile.  This both removed
  significant overhead from calling the small function and,
  since almost all callers we checking multiple tiles, we
  were able to avoid incrementing the count each time and
  just add a single incrments at the end.

* Switched from recording each tile that a sprite intersects
  with each from to only recording the top-left tile and the
  overlap size.  This reduced overhead for larger sprites
  and removed the needs for an end-of-list marker.

* Much more aggressive caching of Sprite and Tile Store
  values in order to streamline the inner tile dispatch
  routines.

* Moving TileStore and Sprites (and other supporting
  data structures) into a separate data bank.  Needed just
  for size purposes and provide micro-optimizations by
  opening up the use of abs,y addressing modes.

* Revamped multi-sprite rendering code to avoid the need to
  copy any masks and all stacked sprites can be drawn
  via a sequence of and [addrX],y; ora (addrX),y where
  addrX is set once per tile.

* General streamlining to reduct overhead. This work was
  focused on removing as much per-tile overhead as possible.
This commit is contained in:
Lucas Scharenbroich 2022-04-20 07:43:16 -05:00
parent df0d0ccada
commit 8bb17895a9
9 changed files with 1244 additions and 324 deletions

View File

@ -87,11 +87,11 @@ ActiveSpriteCount equ 102
BankLoad equ 104
TileStoreBankAndBank01 equ 106
TileStoreBankAndTileDataBank equ 108
Next equ 110
TileStoreBankDoubled equ 110
Next equ 112
activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames)
AppSpace equ 160 ; 16 bytes of space reserved for application use
tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers
; tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers
blttmp equ 192 ; 32 bytes of local cache/scratch space for blitter
tmp8 equ 224 ; another 16 bytes of temporary space to be used as scratch
@ -112,6 +112,36 @@ tmp5 equ 250
tmp6 equ 252
tmp7 equ 254
; Defines for the second direct page (used in the tile blitters)
sprite_ptr0 equ 0 ; Each tile can render up to 4 sprite blocks. The sprite
sprite_ptr1 equ 4 ; data and mask values live in different banks, but have a
sprite_ptr2 equ 8 ; parallel structure. The high word of each point is set to
sprite_ptr3 equ 12 ; the mask bank. With the Bank register set, both data and mask
; ; can be accessed through the same pointer, e.g. lda (sprite_ptr0)
; ; and [sprite_ptr0]
tmp_sprite_data equ 16 ; 32 byte temporary buffer to build up sprite data values
tmp_sprite_mask equ 48 ; 32 byte temporary buffer to build up sprite mask values
tmp_tile_data equ 80 ; 32 byte temporary buffer to build up tile data values
tmp_tile_mask equ 112 ; 32 byte temporary buffer to build up tile mask values
; Temporary direct page locations used by some of the complex tile renderers
_X_REG equ 144
_Y_REG equ 146
_T_PTR equ 148 ; Copy of the tile address pointer
_BASE_ADDR equ 150 ; Copy of BTableLow for this tile
_SPR_X_REG equ 152 ; Cache address of sprite plane source for a tile
_JTBL_CACHE equ 154 ; Cache the offset to the exception handler for a column
_OP_CACHE equ 156 ; Cache of a relevant operand / oeprator
_TILE_ID equ 158 ; Copy of the tile descriptor
; Define free space the the application to use
FREE_SPACE_DP2 equ 160
; End direct page values
DIRTY_BIT_BG0_X equ $0001
DIRTY_BIT_BG0_Y equ $0002
DIRTY_BIT_BG1_X equ $0004
@ -153,17 +183,55 @@ SPRITE_8X8 equ $0000
SPRITE_VFLIP equ $0400
SPRITE_HFLIP equ $0200
MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows)
TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot
; Stamp storage parameters
VBUFF_STRIDE_BYTES equ 12*4 ; Each line has 4 slots of 16 pixels + 8 buffer pixels
VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES ; Each row is comprised of 8 lines
VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3 ; Allocate space fo 16 rows + 8 rows of buffer
VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4 ; Start at an offset so $0000 can be used as an empty value
VBUFF_SLOT_COUNT equ 48 ; Have space for this many stamps
; Tile storage parameters
TILE_STORE_WIDTH equ 41
TILE_STORE_HEIGHT equ 26
MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows)
TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot
TS_TILE_ID equ TILE_STORE_SIZE*0 ; tile descriptor for this location
TS_DIRTY equ TILE_STORE_SIZE*1 ; Flag. Used to prevent a tile from being queued multiple times per frame
TS_SPRITE_FLAG equ TILE_STORE_SIZE*2 ; Bitfield of all sprites that intersect this tile. 0 if no sprites.
TS_TILE_ADDR equ TILE_STORE_SIZE*3 ; cached value, the address of the tiledata for this tile
TS_CODE_ADDR_LOW equ TILE_STORE_SIZE*4 ; const value, address of this tile in the code fields
TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 ; const value
TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5
TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used
TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000.
TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender.
TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank
TS_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize tile dispatch in the Render function
TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function
TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function
; 16 consecutive entries to provide directly addressable space for holding the VBUFF address for the
; sprites that may be rendered at a given tile. Given a tile store offset, X, the way to address the
; address for the Y'th sprite is
;
; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE},x
;
; Moving to the next tile can be done with a constant.
;
; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE}+{41*row}+{2*col},x
TS_VBUFF_0 equ TILE_STORE_SIZE*12
TS_VBUFF_1 equ TILE_STORE_SIZE*13
TS_VBUFF_2 equ TILE_STORE_SIZE*14
TS_VBUFF_3 equ TILE_STORE_SIZE*15
TS_VBUFF_4 equ TILE_STORE_SIZE*16
TS_VBUFF_5 equ TILE_STORE_SIZE*17
TS_VBUFF_6 equ TILE_STORE_SIZE*18
TS_VBUFF_7 equ TILE_STORE_SIZE*19
TS_VBUFF_8 equ TILE_STORE_SIZE*20
TS_VBUFF_9 equ TILE_STORE_SIZE*21
TS_VBUFF_10 equ TILE_STORE_SIZE*22
TS_VBUFF_11 equ TILE_STORE_SIZE*23
TS_VBUFF_12 equ TILE_STORE_SIZE*22
TS_VBUFF_13 equ TILE_STORE_SIZE*23
TS_VBUFF_14 equ TILE_STORE_SIZE*24
TS_VBUFF_15 equ TILE_STORE_SIZE*25

View File

@ -195,15 +195,13 @@ _RenderDirtyTile
pei TileStoreBankAndBank01 ; Special value that has the TileStore bank in LSB and $01 in MSB
plb
txy
ldx TileStore+TS_TILE_DISP,y ; get the finalized tile descriptor
ldal DirtyTileProcs,x ; load and patch in the appropriate subroutine
lda TileStore+TS_DIRTY_TILE_DISP,x ; load and patch in the appropriate subroutine
stal :tiledisp+1
ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
lda TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile
tay
ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile
lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
tax
plb ; set the bank
@ -671,7 +669,7 @@ dirty_sprite
stx spriteIdx+6
jmp BlitFourSprites
DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH
DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH
;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH
; Blit tiles directly to the screen.
@ -998,4 +996,4 @@ BlitOneSprite
_R0W0
cli
pld
rts
rts

View File

@ -21,35 +21,85 @@ InitSprites
; Clear values in the sprite array
ldx #{MAX_SPRITES-1}*2
:loop3 stz _Sprites+TILE_STORE_ADDR_1,x
dex
dex
bpl :loop3
; ldx #{MAX_SPRITES-1}*2
;:loop3 stz _Sprites+TILE_STORE_ADDR_1,x
; dex
; dex
; bpl :loop3
; Initialize the VBUFF address offsets in the data and mask banks for each sprite
;
; The internal grid 13 tiles wide where each sprite has a 2x2 interior square with a
; The internal grid 12 tiles wide where each sprite has a 2x2 interior square with a
; tile-size buffer all around. We pre-render each sprite with all four vert/horz flips
VBUFF_STRIDE_BYTES equ 13*4
VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES
VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3
VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4
;
; Eventually we should be able to have a separate rendering path for vertically flipped
; sprites and will be able to double the capacity of the stamp buffer
ldx #0
lda #VBUFF_SPRITE_START
clc
:loop4 sta _Sprites+VBUFF_ADDR,x
:loop4 sta VBuffAddrTable,x
adc #VBUFF_SPRITE_STEP
inx
inx
cpx #MAX_SPRITES*2
cpx #VBUFF_SLOT_COUNT*2
bcc :loop4
; Precalculate some bank values
jsr _CacheSpriteBanks
rts
; Utility function to calculate the difference in tile positions between a sprite's current
; position and it's previous position. This gets interesting because the number of tiles
; that a sprite covers can change based on the relative alignemen of the sprite with the
; background.
;
; Ideally, we would be able to quickly calculate exactly which new background tiles a sprite
; intersects with and which ones it has left to minimize the number of TileStore entries
; that need to be updated.
;
; In the short-term, we just do an equality test which lets us know if the sprite is
; covering the exact same tiles.
; Render a sprite stamp into the sprite buffer. Stamps exits independent of the sprites
; and sprite reference a specific stamp. This is necessary because it's common for a
; spite to change its graphic as its animating, but it is too costly to have to set up
; the stamp every time. So this allows users to create stamps in advance and then
; assign them to the sprites as needed.
;
; Currently, we support a maximum of 48 stamps.
;
; Input:
; A = sprite descriptor
; X = stamp slot
; Return:
; A = vbuff address to be assigned to Sprite[VBUFF_ADDR]
CreateSpriteStamp ENT
phb
phk
plb
jsr _CreateSpriteStamp
plb
rtl
_CreateSpriteStamp
pha ; Save the descriptor
jsr _GetBaseTileAddr ; Get the address of the tile data
pha
txa
asl
tax
ldy VBuffAddrTable,x ; Load the address of the stamp slot
plx ; Pop the tile address
pla ; Pop the sprite ID
phy ; VBUFF_ADDR value
jsr _DrawSpriteStamp ; Render the sprite data and create a stamp
pla ; Pop the VBUFF_ADDR and return
rts
; Add a new sprite to the rendering pipeline
;
@ -90,12 +140,12 @@ _AddSprite
pla
sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor
jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK
sta _Sprites+TILE_DATA_OFFSET,x
lda #SPRITE_STATUS_OCCUPIED+SPRITE_STATUS_ADDED
sta _Sprites+SPRITE_STATUS,x
stz _Sprites+VBUFF_ADDR,x ; Clear the VBUFF address, just to initialize it
phy
tya
and #$00FF
@ -106,7 +156,7 @@ _AddSprite
sta _Sprites+SPRITE_X,x ; X coordinate
jsr _PrecalcAllSpriteInfo ; Cache sprite property values (simple stuff)
jsr _DrawSpriteSheet ; Render the sprite into internal space
; jsr _DrawSpriteSheet ; Render the sprite into internal space
; Mark the dirty bit to indicate that the active sprite list needs to be rebuilt in the next
; render call
@ -117,11 +167,161 @@ _AddSprite
lda _SpriteBits,x ; Get the bit flag for this sprite slot
tsb SpriteMap ; Mark it in the sprite map bit field
txa ; And return the sprite ID
clc ; Mark that the sprite was successfully added
; txa ; And return the sprite ID
; clc ; Mark that the sprite was successfully added
rts
; Alternate implementation that uses the TS_COVERAGE_SIZE and TS_LOOKUP_INDEX properties to
; load the old values directly from the TileStoreLookup table, rather than caching them.
; This is more efficient, because the work in MarkDirtySprite is independent of the
; sprite size and, by inlining the _PushDirtyTile logic, we can save a fair amount of overhead
_ClearSpriteFromTileStore2
ldx _Sprites+TS_COVERAGE_SIZE,y
jmp (csfts_tbl,x)
csfts_tbl dw csfts_1x1,csfts_1x2,csfts_1x3,csfts_out
dw csfts_2x1,csfts_2x2,csfts_2x3,csfts_out
dw csfts_3x1,csfts_3x2,csfts_3x3,csfts_out
dw csfts_out,csfts_out,csfts_out,csfts_out
; Just a single value to clear and add to the dirty tile list
csfts_1x1 ldx _Sprites+TS_LOOKUP_INDEX,y
lda TileStoreLookup,x
tax
lda TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
sta TileStore+TS_SPRITE_FLAG,x
lda TileStore+TS_DIRTY,x
bne csfts_1x1_out
inc ; any non-zero value will work
sta TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value
txa
ldx DirtyTileCount
sta DirtyTiles,x
inx
inx
stx DirtyTileCount
csfts_1x2
csfts_1x3
csfts_2x1
csfts_2x3
csfts_3x1
csfts_3x2
csfts_3x3
csfts_1x1_out
rts
; This is a more interesting case where the ability to batch things up starts to produce some
; efficiency gains
csfts_2x2 ldx _Sprites+TS_LOOKUP_INDEX,y ; Get the address of the old top-left corner
tay
ldx TileStoreLookup,y
lda TileStore+TS_SPRITE_FLAG,x
and _SpriteBits
sta TileStore+TS_SPRITE_FLAG,x
lda TileStore+TS_DIRTY,x
beq *+3
phx
ldx TileStoreLookup+2,y
lda TileStore+TS_SPRITE_FLAG,x
and _SpriteBits
sta TileStore+TS_SPRITE_FLAG,x
lda TileStore+TS_DIRTY,x
beq *+3
phx
ldx TileStoreLookup+TS_LOOKUP_SPAN,y
lda TileStore+TS_SPRITE_FLAG,x
and _SpriteBits
sta TileStore+TS_SPRITE_FLAG,x
lda TileStore+TS_DIRTY,x
beq *+3
phx
ldx TileStoreLookup+TS_LOOKUP_SPAN+2,y
lda TileStore+TS_SPRITE_FLAG,x
and _SpriteBits
sta TileStore+TS_SPRITE_FLAG,x
ldy DirtyTileCount
lda TileStore+TS_DIRTY,x
beq skip_2x2
txa
sta DirtyTiles,y
sta TileStore+TS_DIRTY,x
skip_2x2
pla
beq :done1
sta DirtyTiles+2,x
tay
sta TileStore+TS_DIRTY,y
pla
beq :done2
sta DirtyTiles+4,x
tay
sta TileStore+TS_DIRTY,y
pla
beq :done3
sta DirtyTiles+6,x
tay
sta TileStore+TS_DIRTY,y
; Maximum number of dirty tiles reached. Just fall through.
pla
txa
adc #8
sta DirtyTileCount
rts
:done3
txa
adc #6
sta DirtyTileCount
rts
:done2
txa
adc #4
sta DirtyTileCount
rts
:done1
inx
inx
stx DirtyTileCount
rts
lda _SpriteBitsNot,y ; Cache the bit value for this sprite
ldy TileStoreLookup,x ; Get the tile store offset
and TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
csfts_out rts
; Run through the list of tile store offsets that this sprite was last drawn into and mark
; those tiles as dirty. The largest number of tiles that a sprite could possibly cover is 20
; (an unaligned 4x3 sprite), covering a 5x4 area of play field tiles.
@ -129,68 +329,68 @@ _AddSprite
; Y register = sprite record index
_CSFTS_Out rts
_ClearSpriteFromTileStore
ldx _Sprites+TILE_STORE_ADDR_1,y
beq _CSFTS_Out
ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but
and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in
stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10.
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_1,y
; beq _CSFTS_Out
; ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but
; and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in
; stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10.
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_2,y
beq _CSFTS_Out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_2,y
; beq _CSFTS_Out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_3,y
beq _CSFTS_Out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_3,y
; beq _CSFTS_Out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_4,y
beq _CSFTS_Out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_4,y
; beq _CSFTS_Out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_5,y
beq :out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_5,y
; beq :out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_6,y
beq :out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_6,y
; beq :out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_7,y
beq :out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_7,y
; beq :out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_8,y
beq :out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jsr _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_8,y
; beq :out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jsr _PushDirtyTileX
ldx _Sprites+TILE_STORE_ADDR_9,y
beq :out
ldal TileStore+TS_SPRITE_FLAG,x
and _SpriteBitsNot,y
stal TileStore+TS_SPRITE_FLAG,x
jmp _PushDirtyTileX
; ldx _Sprites+TILE_STORE_ADDR_9,y
; beq :out
; ldal TileStore+TS_SPRITE_FLAG,x
; and _SpriteBitsNot,y
; stal TileStore+TS_SPRITE_FLAG,x
; jmp _PushDirtyTileX
:out rts
@ -383,7 +583,7 @@ _DoPhase2
RebuildSpriteArray
lda SpriteMap ; Get the bit field
; Unrolled loop to get the sprite index values that coorespond to the set bit positions
; Unrolled loop to get the sprite index values that correspond to the set bit positions
pea $FFFF ; end-of-list marker
]step equ 0
@ -442,6 +642,20 @@ _RenderSprites
; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls
; by +X, but the sprite moves by -X (so it's relative position is unchanged), then
; it does NOT need to be marked as dirty.
;
; OPTIMIZATION NOTE: At this point, a decent chunk of per-tile time is spent cupdating the sprite flgas
; for a given TileStore entry. When a sprite needs to be redrawn (such as when the
; screen scrolls), the code marks every tile the sprite was on as no longer occupied
; and then marks the occupied tiles. While simple, this is very redundent when the
; screen in scrolling slowly since it is very likely that the same sprite covers the
; exact same tiles. Each pair of markings requires 35 cycles, so a basic 16x16 sprite
; could save >300 cycles per frame. With 4 or 5 sprites on screen, the saving passes
; our 1% threshold for useful optimizations.
;
; Since we cache the tile location and effective sprite coverage, we need a fast
; way to compare the old and new positions and get a list of the new tiles the sprite
; occupies and old locations that it no longer covers. It's possible that just testing
; for equality would be the easiest win to know when we can skip everything.
stz forceSpriteFlag
lda StartX
@ -531,10 +745,15 @@ _CacheSpriteBanks
ora #^TileStore
sta TileStoreBankAndTileDataBank
lda #>TileStore
and #$FF00
ora #^TileStore
sta TileStoreBankDoubled
rts
; This is 13 blocks wide
SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52
SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES
; A = x coordinate
; Y = y coordinate
@ -574,11 +793,19 @@ SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52
; it's tile information, or changing its position.
;
; X = sprite index
_stamp_step dw 0,12,24,36
_PrecalcAllSpriteInfo
lda _Sprites+SPRITE_ID,x
and #$3E00
; and #$3E00
xba
sta _Sprites+SPRITE_DISP,x ; use bits 9 through 13 for full dispatch
and #$0006
tay
lda _Sprites+VBUFF_ADDR,x
clc
adc _stamp_step,y
sta _Sprites+SPRITE_DISP,x
; Set the
; Set the sprite's width and height
lda #4
@ -673,19 +900,26 @@ RemoveSprite ENT
rtl
_RemoveSprite
cmp #MAX_SPRITES
bcc :ok
rts
:ok
asl
tax
_RemoveSpriteX
lda _Sprites+SPRITE_STATUS,x
ora #SPRITE_STATUS_REMOVED
sta _Sprites+SPRITE_STATUS,x
rts
; Update the sprite's flags. We do not allow the size of a sprite to be changed. That requires
; the sprite to be removed and re-added.
;
; A = Sprite ID
; X = Sprite Tile ID and Flags
; X = New Sprite Flags
; Y = New Sprite Stamp Address
UpdateSprite ENT
phb
phk
@ -695,36 +929,34 @@ UpdateSprite ENT
rtl
_UpdateSprite
phx ; swap X/A to be more efficient
tax
pla
_UpdateSpriteX
cpx #MAX_SPRITES*2 ; Make sure we're in bounds
cmp #MAX_SPRITES
bcc :ok
rts
:ok
_UpdateSpriteXnc
cmp _Sprites+SPRITE_ID,x ; Don't do anything if there is no change
beq :no_sprite_change
phx ; Save X to swap into A
asl
tax
pla
cmp _Sprites+SPRITE_ID,x ; If the flags changed, need to redraw the sprite
bne :sprite_flag_change ; on the next frame
tya
cmp _Sprites+VBUFF_ADDR,x ; Did the stamp change?
bne :sprite_stamp_change
rts ; Nothing changed, so just return
:sprite_flag_change
sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor
jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK
cmp _Sprites+TILE_DATA_OFFSET,x
beq :no_tile_change
sta _Sprites+TILE_DATA_OFFSET,x
tya
:sprite_stamp_change
sta _Sprites+VBUFF_ADDR,x ; Just save this to stay in sync
jsr _PrecalcAllSpriteInfo ; Cache stuff
jsr _DrawSpriteSheet ; Render the sprite into internal space if the tile id has changed
:no_tile_change
lda _Sprites+SPRITE_STATUS,x
lda _Sprites+SPRITE_STATUS,x ; Mark this sprite as updated
ora #SPRITE_STATUS_UPDATED
sta _Sprites+SPRITE_STATUS,x
:no_sprite_change
rts
jmp _PrecalcAllSpriteInfo ; Cache stuff and return
; Move a sprite to a new location. If the tile ID of the sprite needs to be changed, then
; a full remove/add cycle needs to happen
@ -741,17 +973,16 @@ MoveSprite ENT
rtl
_MoveSprite
phx ; swap X/A to be more efficient
tax
pla
_MoveSpriteX
cpx #MAX_SPRITES*2 ; Make sure we're in bounds
cmp #MAX_SPRITES
bcc :ok
rts
:ok
_MoveSpriteXnc
phx ; Save X to swap into A
asl
tax
pla
cmp _Sprites+SPRITE_X,x
bne :changed1
sta _Sprites+SPRITE_X,x ; Update the X coordinate
@ -766,13 +997,11 @@ _MoveSpriteXnc
:changed2
sta _Sprites+SPRITE_Y,x ; Update the Y coordinate
jsr _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values
lda _Sprites+SPRITE_STATUS,x
ora #SPRITE_STATUS_MOVED
sta _Sprites+SPRITE_STATUS,x
rts
jmp _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values
; Sprite data structures. We cache quite a few pieces of information about the sprite
; to make calculations faster, so this is hidden from the caller.
@ -797,22 +1026,25 @@ SPRITE_STATUS_UPDATED equ $0004 ; Sprite's non-position attributes were
SPRITE_STATUS_REMOVED equ $0008 ; Sprite has been removed.
SPRITE_STATUS equ {MAX_SPRITES*0}
TILE_DATA_OFFSET equ {MAX_SPRITES*2}
VBUFF_ADDR equ {MAX_SPRITES*4} ; Fixed address in sprite/mask banks
; TILE_DATA_OFFSET equ {MAX_SPRITES*2}
VBUFF_ADDR equ {MAX_SPRITES*4} ; Base address of the sprite's stamp in the data/mask banks
SPRITE_ID equ {MAX_SPRITES*6}
SPRITE_X equ {MAX_SPRITES*8}
SPRITE_Y equ {MAX_SPRITES*10}
TILE_STORE_ADDR_1 equ {MAX_SPRITES*12}
TILE_STORE_ADDR_2 equ {MAX_SPRITES*14}
TILE_STORE_ADDR_3 equ {MAX_SPRITES*16}
TILE_STORE_ADDR_4 equ {MAX_SPRITES*18}
TILE_STORE_ADDR_5 equ {MAX_SPRITES*20}
TILE_STORE_ADDR_6 equ {MAX_SPRITES*22}
TILE_STORE_ADDR_7 equ {MAX_SPRITES*24}
TILE_STORE_ADDR_8 equ {MAX_SPRITES*26}
TILE_STORE_ADDR_9 equ {MAX_SPRITES*28}
TILE_STORE_ADDR_10 equ {MAX_SPRITES*30}
SPRITE_DISP equ {MAX_SPRITES*32} ; pre-calculated index for jmp (abs,x) based on sprite size
; TILE_STORE_ADDR_1 equ {MAX_SPRITES*12}
TS_LOOKUP_INDEX equ {MAX_SPRITES*12} ; The index into the TileStoreLookup table corresponding to the top-left corner of the sprite
; TILE_STORE_ADDR_2 equ {MAX_SPRITES*14}
TS_COVERAGE_SIZE equ {MAX_SPRITES*14} ; Index into the lookup table of how many TileStore tiles are covered by this sprite
;TILE_STORE_ADDR_3 equ {MAX_SPRITES*16}
TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X memory locations
;TILE_STORE_ADDR_4 equ {MAX_SPRITES*18}
;TILE_STORE_ADDR_5 equ {MAX_SPRITES*20}
;TILE_STORE_ADDR_6 equ {MAX_SPRITES*22}
;TILE_STORE_ADDR_7 equ {MAX_SPRITES*24}
;TILE_STORE_ADDR_8 equ {MAX_SPRITES*26}
;TILE_STORE_ADDR_9 equ {MAX_SPRITES*28}
;TILE_STORE_ADDR_10 equ {MAX_SPRITES*30}
SPRITE_DISP equ {MAX_SPRITES*32} ; cached address of the specific stamp based on flags
SPRITE_CLIP_LEFT equ {MAX_SPRITES*34}
SPRITE_CLIP_RIGHT equ {MAX_SPRITES*36}
SPRITE_CLIP_TOP equ {MAX_SPRITES*38}

View File

@ -81,105 +81,206 @@ _LocalToTileStore
; ...
;
; For the Y-coordinate, we just use "mod 8" instead of "mod 4"
mdsOut rts
mdsOut2
lda #6 ; Pick a value for a 0x0 tile sprite
sta _Sprites+TS_COVERAGE_SIZE,y ; zero the list of tile store addresses
rts
_MarkDirtySprite
lda #0
sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit
lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots
sta SpriteBit
lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield
bne mdsOut
bne mdsOut2
; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined
; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that
; encloses the top-left on-screen corner of the sprite
; Add the first visible row of the sprite to the Y-scroll offset to find the first line in the
; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406]
clc
lda _Sprites+SPRITE_CLIP_TOP,y
adc StartYMod208 ; Adjust for the scroll offset
tax ; cache
cmp #208 ; check if we went too far positive
bcc *+5
sbc #208
pha ; Cache
and #$FFF8 ; mask first to ensure LSR will clear the carry
lsr
lsr ; This is the row in the Tile Store for top-left corner of the sprite
and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines
lsr
tax
lda TileStoreLookupYTable,x ; Even numbers from [0, 100] (50 elements)
sta RowTop
pla
; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but
; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to
; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4
; Get the position of the top edge within the tile and then add it to the sprite's height
; to calculate the number of tiles that are overlapped. We use the actual width and height
; values here so small sprites (like 4x4 bullets) only force an update to the actual tiles
; that are intersected, rather than assuming an 8x8 sprite always takes up that amount of
; space.
txa
and #$0007
sta tmp0 ; save to adjust sprite origin
tax ; cache again. This is a bit faster than recalculating
lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111
adc _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111
dec
clc
adc tmp0
and #$0018
sta AreaIndex
; Repeat to get the same information for the columns
txa
asl
tax
lda :vbuff_mul,x
sta tmp0
; Add the horizontal position to the horizontal offset to find the first column in the
; code field that needs to be drawn. The range of values is 0 to 159+163 = [0, 322]
clc
lda _Sprites+SPRITE_CLIP_LEFT,y
adc StartXMod164
tax
cmp #164
bcc *+5
sbc #164
and #$FFFC
lsr
and #$FFFE ; Same pre-multiply by 2 for later
sta ColLeft
; sta ColLeft ; Even numbers from [0, 160] (80 elements)
adc RowTop
sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table
; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier
; in the buffer based on the horizontal offset and move up for each vertical offset.
txa
and #$0003
sta tmp1 ; save to adjust sprite origin
tax
lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08
adc tmp0 ; add to the vertical offset
; Subtract this value from the SPRITE_DISP address
eor #$FFFF ; A = -X - 1
sec ; C = 1
adc _Sprites+SPRITE_DISP,y ; A = SPRITE_DISP + (-X - 1) + 1 = SPRITE_DISP - X
sta VBuffOrigin ; this is the final (adjusted) origin for this sprite
; Load the base address of the appropriate TS_VBUFF_? offset for this sprite index and
; store it as an indirect address.
lda _Sprites+TS_VBUFF_BASE_ADDR,y
sta tmp0
; We know the starting corner of the TileStore. Now, we need to figure out now many tiles
; the sprite covers. This is a function of the sprite's width and height and the specific
; location of the upper-left corner of the sprite within the corner tile.
txa
adc _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08
dec
clc
adc tmp1
and #$000C
lsr ; max value = 4 = 0x04
and #$0006
ora AreaIndex
sta AreaIndex
ora AreaIndex ; merge into the area index
; No need to copy the TileStore addresses into the Sprite's TILE_STORE_ADDR values. Just
; hold a copy of the corner offset into the lookup table and the sprite's size in tiles.
; Then, when we need to erase we can just lookup the values in the TileStoreLookup table.
sta _Sprites+TS_COVERAGE_SIZE,y
tax
; lda TileStoreBaseIndex
; sta _Sprites+TS_LOOKUP_INDEX,y
; Jump to the appropriate marking routine
jmp (:mark,x)
mdsOut rts
;_MarkDirtySprite
;
; lda #0
; sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit
; lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots
; sta SpriteBit
; lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield
; bne mdsOut
; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined
; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that
; encloses the top-left on-screen corner of the sprite
; clc
; lda _Sprites+SPRITE_CLIP_TOP,y
; adc StartYMod208 ; Adjust for the scroll offset
; tax ; cache
; cmp #208 ; check if we went too far positive
; bcc *+5
; sbc #208
; lsr
; lsr ; This is the row in the Tile Store for top-left corner of the sprite
; and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines
; sta RowTop
; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but
; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to
; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4
; txa
; and #$0007
; sta tmp0 ; save to adjust sprite origin
; lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111
; dec
; clc
; adc tmp0
; and #$0018
; sta AreaIndex
; Repeat to get the same information for the columns
; clc
; lda _Sprites+SPRITE_CLIP_LEFT,y
; adc StartXMod164
; tax
; cmp #164
; bcc *+5
; sbc #164
; lsr
; and #$FFFE ; Same pre-multiply by 2 for later
; sta ColLeft
; txa
; and #$0003
; sta tmp1 ; save to adjust sprite origin;
; lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08
; dec
; clc
; adc tmp1
; lsr ; max value = 4 = 0x04
; and #$0006
; ora AreaIndex
; sta AreaIndex
; Calculate the modified origin address for the sprite. We need to look at the sprite flip bits
; to determine which of the four sprite stamps is the correct one to use. Then, offset that origin
; based on the (x, y) and (startx, starty) positions.
lda _Sprites+SPRITE_DISP,y ; Each stamp is 12 bytes
and #$0006
tax
lda :stamp_step,x
clc
adc _Sprites+VBUFF_ADDR,y
sec
sbc tmp1 ; Subtract the horizontal within-tile displacement
asl tmp0
ldx tmp0
sec
sbc :vbuff_mul,x
sta VBuffOrigin
lda #^TileStore
sta tmp1
; lda _Sprites+SPRITE_DISP,y ; Get the sprite's base display address
; sec
; sbc tmp1 ; Subtract the horizontal within-tile displacement
; asl tmp0
; ldx tmp0
; sec
; sbc :vbuff_mul,x
; sta VBuffOrigin
; lda #^TileStore
; sta tmp1
; Dispatch to cover the tiles
ldx AreaIndex
jmp (:mark,x)
; ldx AreaIndex
; jmp (:mark,x)
:mark dw :mark1x1,:mark1x2,:mark1x3,mdsOut
dw :mark2x1,:mark2x2,:mark2x3,mdsOut
dw :mark3x1,:mark3x2,:mark3x3,mdsOut
dw mdsOut,mdsOut,mdsOut,mdsOut
:stamp_step dw 0,12,24,36
:vbuff_mul dw 0,52,104,156,208,260,312,364
; Dispatch to the calculated sizing
; Begin a list of subroutines to cover all of the valid sprite size combinations. This is all unrolled code,
@ -191,11 +292,170 @@ _MarkDirtySprite
;
; There *might* be some speed gained by pushing a list of :mark_R_C addressed onto the stack in the clipping routing
; and dispatching that way, but probably not...
:mark1x1_v2
tax ; Get the TileStoreBaseIndex
ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile
lda SpriteBit ; Mark this tile as having this sprite
ora TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
lda VBuffOrigin
sta (tmp0),y ; Fill in the slot for this sprite on this tile
lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, mark it
bne exit1x1
ldx DirtyTileCount
tya
sta DirtyTiles,x
sta TileStore+TS_DIRTY,y
inx
inx
stx DirtyTileCount
exit1x1
rts
:mark2x2_v2
; Put the TileStoreBaseIndex into the X-register
tax
; Push a sentinel value of the stack that we use to inline all of the dirty tile array updates faster
; and the end of this routine.
pea #$0000
; Now, move through each of the TileStore locations and set the necessary fields. We have to do the
; following
;
; 1. Set the marker bit in the TS_SPRITE_FLAG so the renderer knows which vbuff addresses to load
; 2. Set the address of the sprite stamp graphics that are used. This can change every frame.
; 3. Mark the tile as dirty and put it on the list if it was marked dirty for the first time.
ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile
lda SpriteBit ; Mark this tile as having this sprite
ora TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, queue it up
bne *+3
phy
lda VBuffOrigin
sta (tmp0),y ; Fill in the slot for this sprite on this tile
; Move to the next tile
ldy TileStoreLookup+2,x
adc #4 ; Weave in the VBuffOrigin values to save a load every
sta (tmp0),y ; other iteration
lda SpriteBit
ora TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
lda TileStore+TS_DIRTY,y
bne *+3
phy
; Third tile
ldy TileStoreLookup+TS_LOOKUP_SPAN,x
lda SpriteBit
ora TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
lda TileStore+TS_DIRTY,y
bne *+3
phy
lda VBuffOrigin
adc #SPRITE_PLANE_SPAN
sta (tmp0),y
; Fourth tile
ldy TileStoreLookup+TS_LOOKUP_SPAN+2,x
adc #4+SPRITE_PLANE_SPAN
sta (tmp0),y
lda SpriteBit
ora TileStore+TS_SPRITE_FLAG,y
sta TileStore+TS_SPRITE_FLAG,y
; Lift this above the last TS_DIRTY check
ldx DirtyTileCount
; Check the TS_DIRTY flag for this tile. We handle it immediately, if needed
lda TileStore+TS_DIRTY,y
bne skip
; Now, update the Dirty Tile array
tya
sta DirtyTiles,x
sta TileStore+TS_DIRTY,y
skip
pla
beq :done1
sta DirtyTiles+2,x
tay
sta TileStore+TS_DIRTY,y
pla
beq :done2
sta DirtyTiles+4,x
tay
sta TileStore+TS_DIRTY,y
pla
beq :done3
sta DirtyTiles+6,x
tay
sta TileStore+TS_DIRTY,y
; Maximum number of dirty tiles reached. Just fall through.
pla
txa
adc #8
sta DirtyTileCount
rts
:done3
txa
adc #6
sta DirtyTileCount
rts
:done2
txa
adc #4
sta DirtyTileCount
rts
:done1
inx
inx
stx DirtyTileCount
rts
:mark1x1
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
lda #0
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_1,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_2,y
rts
; NOTE: If we rework the _PushDirtyTile to use the Y register instead of the X register, we can
@ -209,112 +469,112 @@ _MarkDirtySprite
:mark1x2
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_2,y
lda #0
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_2,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_3,y
rts
:mark1x3
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_0_2
sta _Sprites+TILE_STORE_ADDR_3,y
lda #0
sta _Sprites+TILE_STORE_ADDR_4,y
; sta _Sprites+TILE_STORE_ADDR_3,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_4,y
rts
:mark2x1
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_2,y
lda #0
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_2,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_3,y
rts
:mark2x2
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_3,y
jsr :mark_1_1
sta _Sprites+TILE_STORE_ADDR_4,y
lda #0
sta _Sprites+TILE_STORE_ADDR_5,y
; sta _Sprites+TILE_STORE_ADDR_4,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_5,y
rts
:mark2x3
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_0_2
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_3,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_4,y
; sta _Sprites+TILE_STORE_ADDR_4,y
jsr :mark_1_1
sta _Sprites+TILE_STORE_ADDR_5,y
; sta _Sprites+TILE_STORE_ADDR_5,y
jsr :mark_1_2
sta _Sprites+TILE_STORE_ADDR_6,y
lda #0
sta _Sprites+TILE_STORE_ADDR_7,y
; sta _Sprites+TILE_STORE_ADDR_6,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_7,y
rts
:mark3x1
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_2_0
sta _Sprites+TILE_STORE_ADDR_3,y
lda #0
sta _Sprites+TILE_STORE_ADDR_4,y
; sta _Sprites+TILE_STORE_ADDR_3,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_4,y
rts
:mark3x2
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_2_0
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_3,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_4,y
; sta _Sprites+TILE_STORE_ADDR_4,y
jsr :mark_1_1
sta _Sprites+TILE_STORE_ADDR_5,y
; sta _Sprites+TILE_STORE_ADDR_5,y
jsr :mark_2_1
sta _Sprites+TILE_STORE_ADDR_6,y
lda #0
sta _Sprites+TILE_STORE_ADDR_7,y
; sta _Sprites+TILE_STORE_ADDR_6,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_7,y
rts
:mark3x3
jsr :mark_0_0
sta _Sprites+TILE_STORE_ADDR_1,y
; sta _Sprites+TILE_STORE_ADDR_1,y
jsr :mark_1_0
sta _Sprites+TILE_STORE_ADDR_2,y
; sta _Sprites+TILE_STORE_ADDR_2,y
jsr :mark_2_0
sta _Sprites+TILE_STORE_ADDR_3,y
; sta _Sprites+TILE_STORE_ADDR_3,y
jsr :mark_0_1
sta _Sprites+TILE_STORE_ADDR_4,y
; sta _Sprites+TILE_STORE_ADDR_4,y
jsr :mark_1_1
sta _Sprites+TILE_STORE_ADDR_5,y
; sta _Sprites+TILE_STORE_ADDR_5,y
jsr :mark_2_1
sta _Sprites+TILE_STORE_ADDR_6,y
; sta _Sprites+TILE_STORE_ADDR_6,y
jsr :mark_0_2
sta _Sprites+TILE_STORE_ADDR_7,y
; sta _Sprites+TILE_STORE_ADDR_7,y
jsr :mark_1_2
sta _Sprites+TILE_STORE_ADDR_8,y
; sta _Sprites+TILE_STORE_ADDR_8,y
jsr :mark_2_2
sta _Sprites+TILE_STORE_ADDR_9,y
lda #0
sta _Sprites+TILE_STORE_ADDR_10,y
; sta _Sprites+TILE_STORE_ADDR_9,y
; lda #0
; sta _Sprites+TILE_STORE_ADDR_10,y
rts
; Begin List of subroutines to mark each tile offset

View File

@ -1,23 +1,42 @@
; Alternate entry point that takes arguments in registers instead of using a _Sprite
; record
;
; Y = VBUFF address
; X = Tile Data address
; A = Sprite Flags
_DrawSpriteStamp
sty tmp1
stx tmp2
and #DISP_MASK ; dispatch to all of the different orientations
sta tmp3
jmp _DSSCommon
; Function to render a sprite from a sprite definition into the internal data buffers
;
; X = sprite index
_DrawSpriteSheet
; _DrawSpriteSheet
DISP_VFLIP equ $0004 ; hard code these because they are internal values
DISP_HFLIP equ $0002
DISP_MASK equ $0018 ; Isolate the size bits
phx
lda _Sprites+VBUFF_ADDR,x
sta tmp1
lda _Sprites+TILE_DATA_OFFSET,x
sta tmp2
lda _Sprites+SPRITE_DISP,x
and #DISP_MASK ; dispatch to all of the different orientations
sta tmp3
; phx
;
; lda _Sprites+VBUFF_ADDR,x
; sta tmp1
;
; lda _Sprites+TILE_DATA_OFFSET,x
; sta tmp2
;
; lda _Sprites+SPRITE_DISP,x
; and #DISP_MASK ; dispatch to all of the different orientations
; sta tmp3
;
; jsr _DSSCommon
;
; plx
; rts
_DSSCommon
; Set bank
phb
pea #^tiledata ; Set the bank to the tile data
@ -58,8 +77,6 @@ DISP_MASK equ $0018 ; Isolate the size bits
; Restore bank
plb ; pop extra byte
plb
plx
rts
;
; X = _Sprites array offset

View File

@ -254,8 +254,16 @@ NextCol
; A double-sized table of lookup values. This is basically the cross-product of TileStoreYTable and
; NextCol. If is double-width and double-height so that, if we know a tile's address position
; of (X + 41*Y), then any relative tile store address can be looked up by adding a constan value.
;TileStore2DLookup ds {26*41*2}*4
; of (X + 41*Y), then any relative tile store address can be looked up by adding a constant value.
;
; 50 rows by 80 columns + 2 extra rows and columns
TS_LOOKUP_WIDTH equ 80
TS_LOOKUP_HEIGHT equ 50
TS_LOOKUP_SPAN equ {TS_LOOKUP_WIDTH+2}
TS_LOOKUP_ROWS equ {TS_LOOKUP_HEIGHT+2}
TileStoreLookupYTable ds {TS_LOOKUP_HEIGHT*2}
TileStoreLookup ds {TS_LOOKUP_SPAN*TS_LOOKUP_ROWS*2}
; This is a double-length table that holds the right-edge adresses of the playfield on the physical
; screen. At most, it needs to hold 200 addresses for a full height playfield. It is double-length
@ -296,7 +304,5 @@ BG1YOffsetTable lup 26
dw 1,1,1,2,2,2,2,2,1,1,1,0,0,0,0,0
--^
; Table of base VBUFF addresses for each sprite stamp slot
VBuffAddrTable ds 2*VBUFF_SLOT_COUNT

View File

@ -160,7 +160,7 @@ SetScreenRect sty ScreenHeight ; Save the screen height and
; Generalized routine that calculates the on-screen address of the tiles and takes the
; StartX and StartY values into consideration. This routine really exists to support
; the dirty tile rendering mode and the tiles *must* be aligned with the playfield.
; the dirty tile rendering mode and the tiles *must* be aligned with the playfield.
; That is, StartX % 4 == 0 and StartY % 8 == 0. If these conditions are not met, then
; screen will not render correctly.
_RecalcTileScreenAddrs

View File

@ -41,17 +41,6 @@
TILE_CTRL_MASK equ $FE00
TILE_PROC_MASK equ $F800 ; Select tile proc for rendering
; Temporary direct page locatinos used by some of the complex tile renderers
_X_REG equ tiletmp
_Y_REG equ tiletmp+2
_T_PTR equ tiletmp+4 ; Copy of the tile address pointer
_BASE_ADDR equ tiletmp+6 ; Copy of BTableLow for this tile
_SPR_X_REG equ tiletmp+8 ; Cache address of sprite plane source for a tile
_JTBL_CACHE equ tiletmp+10 ; Cache the offset to the exception handler for a column
_OP_CACHE equ tiletmp+12 ; Cache of a relevant operand / oeprator
_TILE_ID equ tiletmp+14 ; Copy of the tile descriptor
; Low-level function to take a tile descriptor and return the address in the tiledata
; bank. This is not too useful in the fast-path because the fast-path does more
; incremental calculations, but it is handy for other utility functions
@ -113,56 +102,36 @@ _RenderTileBG1
; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile
; Store record contains all of the low-level information that's needed to call the renderer.
;
; This routine sets the direct page register to the second page since we use that space to
; build and cache tile and sprite data, when necessary
; Y = address of tile
_RenderTile2
pea >TileStore ; Need that addressing flexibility here. Caller is responsible for restoring bank reg
plb
plb
txy ; We can be better than this....
lda TileStore+TS_SPRITE_FLAG,x ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not
bne do_dirty_sprite
lda TileStore+TS_TILE_ID,y ; build the finalized tile descriptor
ldx TileStore+TS_SPRITE_FLAG,y ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not
beq :nosprite
; Handle the non-sprite tile blit
; txa
; jsr BuildActiveSpriteArray ; Build the max 4 array of active sprites for this tile
; sta ActiveSpriteCount
sep #$20
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later
lda TileStore+TS_VBUFF_ARRAY_ADDR,y ; Scratch space
sta _SPR_X_REG
phy
ldy spriteIdx
lda (_SPR_X_REG),y
sta _SPR_X_REG
ply
lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000)
sta _BASE_ADDR+1 ; so we can get by just copying the high byte
rep #$20
lda TileStore+TS_TILE_ID,y
ora #TILE_SPRITE_BIT
; ldx TileStore+TS_VBUFF_ARRAY_ADDR,y
; stx _SPR_X_REG
:nosprite
sta _TILE_ID ; Some tile blitters need to get the tile descriptor
and #TILE_CTRL_MASK
xba
tax
ldal TileProcs,x ; load and patch in the appropriate subroutine
lda TileStore+TS_BASE_TILE_DISP,x ; Get the address of the renderer for this tile
stal :tiledisp+1
ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
lda TileStore+TS_TILE_ID,x
sta _TILE_ID ; Some tile blitters need to get the tile descriptor
sep #$20 ; load the bank of the target code field line
lda TileStore+TS_CODE_ADDR_HIGH,y
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
pha
rep #$20
lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field
pha
lda TileStore+TS_BASE_ADDR,y ; load the base address of the code field
sta _BASE_ADDR
lda TileStore+TS_WORD_OFFSET,y
ply
plb ; set the bank
lda TileStore+TS_WORD_OFFSET,x
plx
plb ; set the bank to the code field that will be updated
; B is set to the correct code field bank
; A is set to the tile word offset (0 through 80 in steps of 4)
@ -171,6 +140,194 @@ _RenderTile2
:tiledisp jmp $0000 ; render the tile
; Let's make a macro helper for the bit test tree
; dobit src_offset,dest,next_target,end_target
dobit MAC
beq last_bit
ldx: ]1,y
stx ]2
jmp ]3
last_bit ldx: ]1,y
stx ]2
jmp ]4
EOM
; The sprite code is just responsible for quickly copying all of the sprite data
; into the direct page temp area.
do_dirty_sprite
pei TileStoreBankAndTileDataBank ; Special value that has the TileStore bank in LSB and TileData bank in MSB
plb
; Cache a couple of values into the direct page, but preserve the Accumulator
ldy TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
sty tileAddr
; This is very similar to the code in the dirty tile renderer, but we can't reuse
; because that code draws directly to the graphics screen, and this code draws
; to a temporary budder that has a different stride.
ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile
lsr
bcc :loop_0_bit_1
dobit $0000;sprite_ptr0;:loop_1_bit_1;CopyOneSprite
:loop_0_bit_1 lsr
bcc :loop_0_bit_2
dobit $0002;sprite_ptr0;:loop_1_bit_2;CopyOneSprite
:loop_0_bit_2 lsr
bcc :loop_0_bit_3
dobit $0004;sprite_ptr0;:loop_1_bit_3;CopyOneSprite
:loop_0_bit_3 lsr
bcc :loop_0_bit_4
dobit $0006;sprite_ptr0;:loop_1_bit_4;CopyOneSprite
:loop_0_bit_4 lsr
bcc :loop_0_bit_5
dobit $0008;sprite_ptr0;:loop_1_bit_5;CopyOneSprite
:loop_0_bit_5 lsr
bcc :loop_0_bit_6
dobit $000A;sprite_ptr0;:loop_1_bit_6;CopyOneSprite
:loop_0_bit_6 lsr
bcc :loop_0_bit_7
dobit $000C;sprite_ptr0;:loop_1_bit_7;CopyOneSprite
:loop_0_bit_7 lsr
bcc :loop_0_bit_8
dobit $000E;sprite_ptr0;:loop_1_bit_8;CopyOneSprite
:loop_0_bit_8 lsr
bcc :loop_0_bit_9
dobit $0010;sprite_ptr0;:loop_1_bit_9;CopyOneSprite
:loop_0_bit_9 lsr
bcc :loop_0_bit_10
ldx: $0012,y
stx spriteIdx
cmp #0
jne :loop_1_bit_10
jmp CopyOneSprite
:loop_0_bit_10 lsr
bcc :loop_0_bit_11
dobit $0014;sprite_ptr0;:loop_1_bit_11;CopyOneSprite
:loop_0_bit_11 lsr
bcc :loop_0_bit_12
dobit $0016;sprite_ptr0;:loop_1_bit_12;CopyOneSprite
:loop_0_bit_12 lsr
bcc :loop_0_bit_13
dobit $0018;sprite_ptr0;:loop_1_bit_13;CopyOneSprite
:loop_0_bit_13 lsr
bcc :loop_0_bit_14
dobit $001A;sprite_ptr0;:loop_1_bit_14;CopyOneSprite
:loop_0_bit_14 lsr
bcc :loop_0_bit_15
dobit $001C;sprite_ptr0;:loop_1_bit_15;CopyOneSprite
:loop_0_bit_15 ldx: $001E,y
stx spriteIdx
jmp CopyOneSprite
; We can optimize later, for now just copy the sprite data and mask into its own
; direct page buffer and combine with the tile data later
; We set up direct page pointers to the mask bank and use the bank register for the
; data.
CopyFourSpritesAbove
; Copy three sprites into a temporary direct page buffer
LDA_IL equ $A7 ; lda [dp]
LDA_ILY equ $B7 ; lda [dp],y
AND_IL equ $27 ; and [dp]
AND_ILY equ $37 ; and [dp],y
CopyThreeSprites
]line equ 0
lup 8
ldy #]line*SPRITE_PLANE_SPAN
lda (spriteIdx+8),y
db AND_ILY,spriteIdx+4 ; Can't use long indirect inside LUP because of ']'
ora (spriteIdx+4),y
db AND_ILY,spriteIdx+0
ora (spriteIdx+0),y
sta tmp_sprite_data+{]line*4}
db LDA_ILY,spriteIdx+8
db AND_ILY,spriteIdx+4
db AND_ILY,spriteIdx+0
sta tmp_sprite_mask+{]line*4}
ldy #]line*SPRITE_PLANE_SPAN+2
lda (spriteIdx+8),y
db AND_ILY,spriteIdx+4
ora (spriteIdx+4),y
db AND_ILY,spriteIdx+0
ora (spriteIdx+0),y
sta tmp_sprite_data+{]line*4}+2
db LDA_ILY,spriteIdx+8
db AND_ILY,spriteIdx+4
db AND_ILY,spriteIdx+0
sta tmp_sprite_mask+{]line*4}+2
]line equ ]line+1
--^
; jmp FinishTile
; Copy two sprites into a temporary direct page buffer
CopyTwoSprites
]line equ 0
lup 8
ldy #]line*SPRITE_PLANE_SPAN
lda (spriteIdx+4),y
db AND_ILY,spriteIdx+0
ora (spriteIdx+0),y
sta tmp_sprite_data+{]line*4}
db LDA_ILY,spriteIdx+4
db AND_ILY,spriteIdx+0
sta tmp_sprite_mask+{]line*4}
ldy #]line*SPRITE_PLANE_SPAN+2
lda (spriteIdx+4),y
db AND_ILY,spriteIdx+0
ora (spriteIdx+0),y
sta tmp_sprite_data+{]line*4}+2
db LDA_ILY,spriteIdx+4
db AND_ILY,spriteIdx+0
sta tmp_sprite_mask+{]line*4}+2
]line equ ]line+1
--^
; jmp FinishTile
; Copy a single piece of sprite data into a temporary direct page . X = spriteIdx
CopyOneSprite
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta tmp_sprite_data+{]line*4}
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta tmp_sprite_data+{]line*4}+2
ldal spritemask+{]line*SPRITE_PLANE_SPAN},x
sta tmp_sprite_mask+{]line*4}
ldal spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
sta tmp_sprite_mask+{]line*4}+2
]line equ ]line+1
--^
; jmp FinishTile
; Reference all of the tile rendering subroutines defined in the TileXXXXX files. Each file defines
; 8 entry points:
;
@ -518,7 +675,7 @@ _CopyBG1Tile
; a tile.
;
; TileStore+TS_TILE_ID : Tile descriptor
; TileStore+TS_DIRTY : $FFFF is clean, otherwise stores a back-reference to the DirtyTiles array
; TileStore+TS_DIRTY : $0000 is clean, any other value indicated a dirty tile
; TileStore+TS_TILE_ADDR : Address of the tile in the tile data buffer
; TileStore+TS_CODE_ADDR_LOW : Low word of the address in the code field that receives the tile
; TileStore+TS_CODE_ADDR_HIGH : High word of the address in the code field that receives the tile
@ -590,11 +747,14 @@ InitTiles
lda #0
stal TileStore+TS_TILE_ID,x ; clear the tile store with the special zero tile
stal TileStore+TS_TILE_ADDR,x
stal TileStore+TS_TILE_DISP,x
stal TileStore+TS_SPRITE_FLAG,x ; no sprites are set at the beginning
lda #$FFFF ; none of the tiles are dirty
stal TileStore+TS_DIRTY,x
stal TileStore+TS_DIRTY,x ; none of the tiles are dirty
lda DirtyTileProcs ; Fill in with the first dispatch address
stal TileStore+TS_DIRTY_TILE_DISP,x
lda TileProcs ; Same for non-dirty, non-sprite base case
stal TileStore+TS_BASE_TILE_DISP,x
lda :vbuff ; array of sprite vbuff addresses per tile
stal TileStore+TS_VBUFF_ARRAY_ADDR,x
@ -700,7 +860,16 @@ _SetTile
ldal TileStore+TS_TILE_ID,x
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
xba
stal TileStore+TS_TILE_DISP,x
tay
lda DirtyTileProcs,y
stal TileStore+TS_DIRTY_TILE_DISP,x
ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address
and #TILE_CTRL_MASK
xba
tay
lda TileProcs,y
stal TileStore+TS_BASE_TILE_DISP,x
; txa ; Add this tile to the list of dirty tiles to refresh
jmp _PushDirtyTileX ; on the next call to _ApplyTiles
@ -731,11 +900,12 @@ _PushDirtyTile
; alternate entry point if the x-register is already set
_PushDirtyTileX
ldal TileStore+TS_DIRTY,x
bpl :occupied2
bne :occupied2
txa ; any non-negative value will work, this saves work below
inc ; any non-zero value will work
stal TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value
txa
ldx DirtyTileCount ; 4
sta DirtyTiles,x ; 6
inx ; 2
@ -783,7 +953,15 @@ ApplyTiles ENT
plb
rtl
; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code
; field. In this function we switch to the second direct page which holds the temporary
; working buffers for tile rendering.
_ApplyTiles
tdc
clc
adc #$100 ; move to the next page
tcd
bra :begin
:loop
@ -801,4 +979,124 @@ _ApplyTiles
:begin ldy DirtyTileCount
bne :loop
rts
tdc ; Move back to the original direct page
sec
sbc #$100
tcd
rts
; To make processing the tile faster, we do them in chunks of eight. This allows the loop to be
; unrolled, which means we don't have to keep track of the register value and makes it faster to
; clear the dirty tile flag after being processed.
tdc ; Move to the dedicated direct page for tile rendering
clc
adc #$100
tcd
phb ; Save the current bank
tsc
sta tmp0 ; Save it on the direct page
bra at_loop
; The DirtyTiles array and the TileStore information is in the Tile Store bank. Because we
; process up to 8 tiles as a time and the tile code sets the bank register to the target
; code field bank, we need to restore the bank register each time. So, we pre-push
; 8 copies of the TileStore bank onto the stack.
at_exit
tdc ; Move back to the original direct page
sec
sbc #$100
tcd
plb ; Restore the original data bank and return
rts
dt_base equ $FE ; top of second direct page space
at_loop
lda tmp0
tcs
lda DirtyTileCount ; This is pre-multiplied by 2
beq at_exit ; If there are no items, exit
ldx TileStoreBankDoubled
phx
phx
phx
cmp #16 ; If there are >= 8 elements, then
bcs at_chunk ; do a full chunk
stz DirtyTileCount ; Otherwise, this pass will handle them all
tax
jmp (at_table,x)
at_table da at_exit,at_one,at_two,at_three
da at_four,at_five,at_six,at_seven
at_chunk sec
sbc #16
sta DirtyTileCount ; Fall through
; Because all of the registers get used in the _RenderTile2 subroutine, we
; push the values from the DirtyTiles array onto the stack and then pop off
; the values as we go
ldy dt_base ; Reload the base index
ldx DirtyTiles+14,y ; Load the TileStore offset
stz TileStore+TS_DIRTY,x ; Clear this tile's dirty flag
jsr _RenderTile2 ; Draw the tile
plb ; Reset the data bank to the TileStore bank
at_seven
ldy dt_base
ldx DirtyTiles+12,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
at_six
ldy dt_base
ldx DirtyTiles+10,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
at_five
ldy dt_base
ldx DirtyTiles+8,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
at_four
ldy dt_base
ldx DirtyTiles+6,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
at_three
ldy dt_base
ldx DirtyTiles+4,y
jsr _RenderTile2
plb
at_two
ldy dt_base
ldx DirtyTiles+2,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
at_one
ldy dt_base
ldx DirtyTiles+0,y
stz TileStore+TS_DIRTY,x
jsr _RenderTile2
plb
jmp at_loop

View File

@ -46,6 +46,47 @@ _TBApplySpriteData
--^
rts
_TBApplySpriteDataOne
ldx spriteIdx
]line equ 0
lup 8
lda blttmp+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda blttmp+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
rts
_TBApplySpriteDataTwo
]line equ 0
lup 8
lda blttmp+{]line*4}
ldx spriteIdx+2
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
ldx spriteIdx
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda blttmp+{]line*4}+2
ldx spriteIdx+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
ldx spriteIdx
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
rts
; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is
; because we can avoid needing to use both the X and Y registers during the compositing process and
; reserve Y to hold the code field address.