From 8bb17895a9db16750c99698d3dc4cf2e03657c6c Mon Sep 17 00:00:00 2001 From: Lucas Scharenbroich Date: Wed, 20 Apr 2022 07:43:16 -0500 Subject: [PATCH] Rough outline of streamlined sprite subsystem * Split the creation of the sprite stamps from adding the sprites themselves. This allows for 48 stamps that can be pre-rendered and quickly reassigned to sprites for animations. * Inlined all calls to PushDirtyTile. This both removed significant overhead from calling the small function and, since almost all callers we checking multiple tiles, we were able to avoid incrementing the count each time and just add a single incrments at the end. * Switched from recording each tile that a sprite intersects with each from to only recording the top-left tile and the overlap size. This reduced overhead for larger sprites and removed the needs for an end-of-list marker. * Much more aggressive caching of Sprite and Tile Store values in order to streamline the inner tile dispatch routines. * Moving TileStore and Sprites (and other supporting data structures) into a separate data bank. Needed just for size purposes and provide micro-optimizations by opening up the use of abs,y addressing modes. * Revamped multi-sprite rendering code to avoid the need to copy any masks and all stacked sprites can be drawn via a sequence of and [addrX],y; ora (addrX),y where addrX is set once per tile. * General streamlining to reduct overhead. This work was focused on removing as much per-tile overhead as possible. --- src/Defs.s | 82 ++++++- src/Render.s | 14 +- src/Sprite.s | 472 ++++++++++++++++++++++++++++---------- src/Sprite2.s | 478 ++++++++++++++++++++++++++++++--------- src/SpriteRender.s | 45 ++-- src/blitter/Tables.s | 18 +- src/blitter/Template.s | 2 +- src/blitter/Tiles.s | 416 +++++++++++++++++++++++++++++----- src/blitter/Tiles10000.s | 41 ++++ 9 files changed, 1244 insertions(+), 324 deletions(-) diff --git a/src/Defs.s b/src/Defs.s index 568d504..d2d8017 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -87,11 +87,11 @@ ActiveSpriteCount equ 102 BankLoad equ 104 TileStoreBankAndBank01 equ 106 TileStoreBankAndTileDataBank equ 108 -Next equ 110 +TileStoreBankDoubled equ 110 +Next equ 112 activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames) -AppSpace equ 160 ; 16 bytes of space reserved for application use -tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers +; tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers blttmp equ 192 ; 32 bytes of local cache/scratch space for blitter tmp8 equ 224 ; another 16 bytes of temporary space to be used as scratch @@ -112,6 +112,36 @@ tmp5 equ 250 tmp6 equ 252 tmp7 equ 254 +; Defines for the second direct page (used in the tile blitters) + +sprite_ptr0 equ 0 ; Each tile can render up to 4 sprite blocks. The sprite +sprite_ptr1 equ 4 ; data and mask values live in different banks, but have a +sprite_ptr2 equ 8 ; parallel structure. The high word of each point is set to +sprite_ptr3 equ 12 ; the mask bank. With the Bank register set, both data and mask +; ; can be accessed through the same pointer, e.g. lda (sprite_ptr0) +; ; and [sprite_ptr0] + +tmp_sprite_data equ 16 ; 32 byte temporary buffer to build up sprite data values +tmp_sprite_mask equ 48 ; 32 byte temporary buffer to build up sprite mask values +tmp_tile_data equ 80 ; 32 byte temporary buffer to build up tile data values +tmp_tile_mask equ 112 ; 32 byte temporary buffer to build up tile mask values + +; Temporary direct page locations used by some of the complex tile renderers +_X_REG equ 144 +_Y_REG equ 146 +_T_PTR equ 148 ; Copy of the tile address pointer +_BASE_ADDR equ 150 ; Copy of BTableLow for this tile +_SPR_X_REG equ 152 ; Cache address of sprite plane source for a tile +_JTBL_CACHE equ 154 ; Cache the offset to the exception handler for a column +_OP_CACHE equ 156 ; Cache of a relevant operand / oeprator +_TILE_ID equ 158 ; Copy of the tile descriptor + +; Define free space the the application to use +FREE_SPACE_DP2 equ 160 + +; End direct page values + + DIRTY_BIT_BG0_X equ $0001 DIRTY_BIT_BG0_Y equ $0002 DIRTY_BIT_BG1_X equ $0004 @@ -153,17 +183,55 @@ SPRITE_8X8 equ $0000 SPRITE_VFLIP equ $0400 SPRITE_HFLIP equ $0200 -MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows) -TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot +; Stamp storage parameters +VBUFF_STRIDE_BYTES equ 12*4 ; Each line has 4 slots of 16 pixels + 8 buffer pixels +VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES ; Each row is comprised of 8 lines +VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3 ; Allocate space fo 16 rows + 8 rows of buffer +VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4 ; Start at an offset so $0000 can be used as an empty value +VBUFF_SLOT_COUNT equ 48 ; Have space for this many stamps + +; Tile storage parameters +TILE_STORE_WIDTH equ 41 +TILE_STORE_HEIGHT equ 26 +MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows) +TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot TS_TILE_ID equ TILE_STORE_SIZE*0 ; tile descriptor for this location TS_DIRTY equ TILE_STORE_SIZE*1 ; Flag. Used to prevent a tile from being queued multiple times per frame TS_SPRITE_FLAG equ TILE_STORE_SIZE*2 ; Bitfield of all sprites that intersect this tile. 0 if no sprites. TS_TILE_ADDR equ TILE_STORE_SIZE*3 ; cached value, the address of the tiledata for this tile TS_CODE_ADDR_LOW equ TILE_STORE_SIZE*4 ; const value, address of this tile in the code fields -TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 ; const value +TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000. TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender. TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank -TS_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize tile dispatch in the Render function +TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function +TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function + +; 16 consecutive entries to provide directly addressable space for holding the VBUFF address for the +; sprites that may be rendered at a given tile. Given a tile store offset, X, the way to address the +; address for the Y'th sprite is +; +; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE},x +; +; Moving to the next tile can be done with a constant. +; +; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE}+{41*row}+{2*col},x + +TS_VBUFF_0 equ TILE_STORE_SIZE*12 +TS_VBUFF_1 equ TILE_STORE_SIZE*13 +TS_VBUFF_2 equ TILE_STORE_SIZE*14 +TS_VBUFF_3 equ TILE_STORE_SIZE*15 +TS_VBUFF_4 equ TILE_STORE_SIZE*16 +TS_VBUFF_5 equ TILE_STORE_SIZE*17 +TS_VBUFF_6 equ TILE_STORE_SIZE*18 +TS_VBUFF_7 equ TILE_STORE_SIZE*19 +TS_VBUFF_8 equ TILE_STORE_SIZE*20 +TS_VBUFF_9 equ TILE_STORE_SIZE*21 +TS_VBUFF_10 equ TILE_STORE_SIZE*22 +TS_VBUFF_11 equ TILE_STORE_SIZE*23 +TS_VBUFF_12 equ TILE_STORE_SIZE*22 +TS_VBUFF_13 equ TILE_STORE_SIZE*23 +TS_VBUFF_14 equ TILE_STORE_SIZE*24 +TS_VBUFF_15 equ TILE_STORE_SIZE*25 diff --git a/src/Render.s b/src/Render.s index b010f38..9e5e63f 100644 --- a/src/Render.s +++ b/src/Render.s @@ -195,15 +195,13 @@ _RenderDirtyTile pei TileStoreBankAndBank01 ; Special value that has the TileStore bank in LSB and $01 in MSB plb - txy - ldx TileStore+TS_TILE_DISP,y ; get the finalized tile descriptor - ldal DirtyTileProcs,x ; load and patch in the appropriate subroutine + lda TileStore+TS_DIRTY_TILE_DISP,x ; load and patch in the appropriate subroutine stal :tiledisp+1 - ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) - lda TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile - tay + ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile + lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) + tax plb ; set the bank @@ -671,7 +669,7 @@ dirty_sprite stx spriteIdx+6 jmp BlitFourSprites -DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH +DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH ;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH ; Blit tiles directly to the screen. @@ -998,4 +996,4 @@ BlitOneSprite _R0W0 cli pld - rts + rts diff --git a/src/Sprite.s b/src/Sprite.s index b50cd47..a3bcb89 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -21,35 +21,85 @@ InitSprites ; Clear values in the sprite array - ldx #{MAX_SPRITES-1}*2 -:loop3 stz _Sprites+TILE_STORE_ADDR_1,x - dex - dex - bpl :loop3 +; ldx #{MAX_SPRITES-1}*2 +;:loop3 stz _Sprites+TILE_STORE_ADDR_1,x +; dex +; dex +; bpl :loop3 ; Initialize the VBUFF address offsets in the data and mask banks for each sprite ; -; The internal grid 13 tiles wide where each sprite has a 2x2 interior square with a +; The internal grid 12 tiles wide where each sprite has a 2x2 interior square with a ; tile-size buffer all around. We pre-render each sprite with all four vert/horz flips -VBUFF_STRIDE_BYTES equ 13*4 -VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES -VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3 -VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4 +; +; Eventually we should be able to have a separate rendering path for vertically flipped +; sprites and will be able to double the capacity of the stamp buffer ldx #0 lda #VBUFF_SPRITE_START clc -:loop4 sta _Sprites+VBUFF_ADDR,x +:loop4 sta VBuffAddrTable,x adc #VBUFF_SPRITE_STEP inx inx - cpx #MAX_SPRITES*2 + cpx #VBUFF_SLOT_COUNT*2 bcc :loop4 ; Precalculate some bank values jsr _CacheSpriteBanks rts +; Utility function to calculate the difference in tile positions between a sprite's current +; position and it's previous position. This gets interesting because the number of tiles +; that a sprite covers can change based on the relative alignemen of the sprite with the +; background. +; +; Ideally, we would be able to quickly calculate exactly which new background tiles a sprite +; intersects with and which ones it has left to minimize the number of TileStore entries +; that need to be updated. +; +; In the short-term, we just do an equality test which lets us know if the sprite is +; covering the exact same tiles. + + +; Render a sprite stamp into the sprite buffer. Stamps exits independent of the sprites +; and sprite reference a specific stamp. This is necessary because it's common for a +; spite to change its graphic as its animating, but it is too costly to have to set up +; the stamp every time. So this allows users to create stamps in advance and then +; assign them to the sprites as needed. +; +; Currently, we support a maximum of 48 stamps. +; +; Input: +; A = sprite descriptor +; X = stamp slot +; Return: +; A = vbuff address to be assigned to Sprite[VBUFF_ADDR] +CreateSpriteStamp ENT + phb + phk + plb + jsr _CreateSpriteStamp + plb + rtl + +_CreateSpriteStamp + pha ; Save the descriptor + jsr _GetBaseTileAddr ; Get the address of the tile data + pha + + txa + asl + tax + ldy VBuffAddrTable,x ; Load the address of the stamp slot + + plx ; Pop the tile address + pla ; Pop the sprite ID + phy ; VBUFF_ADDR value + jsr _DrawSpriteStamp ; Render the sprite data and create a stamp + + pla ; Pop the VBUFF_ADDR and return + rts ; Add a new sprite to the rendering pipeline ; @@ -90,12 +140,12 @@ _AddSprite pla sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor - jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK - sta _Sprites+TILE_DATA_OFFSET,x lda #SPRITE_STATUS_OCCUPIED+SPRITE_STATUS_ADDED sta _Sprites+SPRITE_STATUS,x + stz _Sprites+VBUFF_ADDR,x ; Clear the VBUFF address, just to initialize it + phy tya and #$00FF @@ -106,7 +156,7 @@ _AddSprite sta _Sprites+SPRITE_X,x ; X coordinate jsr _PrecalcAllSpriteInfo ; Cache sprite property values (simple stuff) - jsr _DrawSpriteSheet ; Render the sprite into internal space +; jsr _DrawSpriteSheet ; Render the sprite into internal space ; Mark the dirty bit to indicate that the active sprite list needs to be rebuilt in the next ; render call @@ -117,11 +167,161 @@ _AddSprite lda _SpriteBits,x ; Get the bit flag for this sprite slot tsb SpriteMap ; Mark it in the sprite map bit field - txa ; And return the sprite ID - clc ; Mark that the sprite was successfully added +; txa ; And return the sprite ID +; clc ; Mark that the sprite was successfully added rts +; Alternate implementation that uses the TS_COVERAGE_SIZE and TS_LOOKUP_INDEX properties to +; load the old values directly from the TileStoreLookup table, rather than caching them. +; This is more efficient, because the work in MarkDirtySprite is independent of the +; sprite size and, by inlining the _PushDirtyTile logic, we can save a fair amount of overhead +_ClearSpriteFromTileStore2 + ldx _Sprites+TS_COVERAGE_SIZE,y + jmp (csfts_tbl,x) +csfts_tbl dw csfts_1x1,csfts_1x2,csfts_1x3,csfts_out + dw csfts_2x1,csfts_2x2,csfts_2x3,csfts_out + dw csfts_3x1,csfts_3x2,csfts_3x3,csfts_out + dw csfts_out,csfts_out,csfts_out,csfts_out + +; Just a single value to clear and add to the dirty tile list +csfts_1x1 ldx _Sprites+TS_LOOKUP_INDEX,y + lda TileStoreLookup,x + tax + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBitsNot,y + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + bne csfts_1x1_out + + inc ; any non-zero value will work + sta TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value + + txa + ldx DirtyTileCount + sta DirtyTiles,x + inx + inx + stx DirtyTileCount +csfts_1x2 +csfts_1x3 +csfts_2x1 +csfts_2x3 +csfts_3x1 +csfts_3x2 +csfts_3x3 +csfts_1x1_out + rts + +; This is a more interesting case where the ability to batch things up starts to produce some +; efficiency gains +csfts_2x2 ldx _Sprites+TS_LOOKUP_INDEX,y ; Get the address of the old top-left corner + tay + ldx TileStoreLookup,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+2,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+TS_LOOKUP_SPAN,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+TS_LOOKUP_SPAN+2,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + ldy DirtyTileCount + + lda TileStore+TS_DIRTY,x + beq skip_2x2 + + txa + sta DirtyTiles,y + sta TileStore+TS_DIRTY,x + +skip_2x2 + pla + beq :done1 + sta DirtyTiles+2,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done2 + sta DirtyTiles+4,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done3 + sta DirtyTiles+6,x + tay + sta TileStore+TS_DIRTY,y + +; Maximum number of dirty tiles reached. Just fall through. + + pla + txa + adc #8 + sta DirtyTileCount + rts +:done3 + txa + adc #6 + sta DirtyTileCount + rts +:done2 + txa + adc #4 + sta DirtyTileCount + rts +:done1 + inx + inx + stx DirtyTileCount + + rts + + + + lda _SpriteBitsNot,y ; Cache the bit value for this sprite + + ldy TileStoreLookup,x ; Get the tile store offset + + + and TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + +csfts_out rts + ; Run through the list of tile store offsets that this sprite was last drawn into and mark ; those tiles as dirty. The largest number of tiles that a sprite could possibly cover is 20 ; (an unaligned 4x3 sprite), covering a 5x4 area of play field tiles. @@ -129,68 +329,68 @@ _AddSprite ; Y register = sprite record index _CSFTS_Out rts _ClearSpriteFromTileStore - ldx _Sprites+TILE_STORE_ADDR_1,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but - and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in - stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10. - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_1,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but +; and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in +; stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10. +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_2,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_2,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_3,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_3,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_4,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_4,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_5,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_5,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_6,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_6,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_7,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_7,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_8,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_8,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_9,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jmp _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_9,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jmp _PushDirtyTileX :out rts @@ -383,7 +583,7 @@ _DoPhase2 RebuildSpriteArray lda SpriteMap ; Get the bit field -; Unrolled loop to get the sprite index values that coorespond to the set bit positions +; Unrolled loop to get the sprite index values that correspond to the set bit positions pea $FFFF ; end-of-list marker ]step equ 0 @@ -442,6 +642,20 @@ _RenderSprites ; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls ; by +X, but the sprite moves by -X (so it's relative position is unchanged), then ; it does NOT need to be marked as dirty. +; +; OPTIMIZATION NOTE: At this point, a decent chunk of per-tile time is spent cupdating the sprite flgas +; for a given TileStore entry. When a sprite needs to be redrawn (such as when the +; screen scrolls), the code marks every tile the sprite was on as no longer occupied +; and then marks the occupied tiles. While simple, this is very redundent when the +; screen in scrolling slowly since it is very likely that the same sprite covers the +; exact same tiles. Each pair of markings requires 35 cycles, so a basic 16x16 sprite +; could save >300 cycles per frame. With 4 or 5 sprites on screen, the saving passes +; our 1% threshold for useful optimizations. +; +; Since we cache the tile location and effective sprite coverage, we need a fast +; way to compare the old and new positions and get a list of the new tiles the sprite +; occupies and old locations that it no longer covers. It's possible that just testing +; for equality would be the easiest win to know when we can skip everything. stz forceSpriteFlag lda StartX @@ -531,10 +745,15 @@ _CacheSpriteBanks ora #^TileStore sta TileStoreBankAndTileDataBank + lda #>TileStore + and #$FF00 + ora #^TileStore + sta TileStoreBankDoubled + rts ; This is 13 blocks wide -SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52 +SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; A = x coordinate ; Y = y coordinate @@ -574,11 +793,19 @@ SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52 ; it's tile information, or changing its position. ; ; X = sprite index +_stamp_step dw 0,12,24,36 _PrecalcAllSpriteInfo lda _Sprites+SPRITE_ID,x - and #$3E00 +; and #$3E00 xba - sta _Sprites+SPRITE_DISP,x ; use bits 9 through 13 for full dispatch + and #$0006 + tay + lda _Sprites+VBUFF_ADDR,x + clc + adc _stamp_step,y + sta _Sprites+SPRITE_DISP,x + +; Set the ; Set the sprite's width and height lda #4 @@ -673,19 +900,26 @@ RemoveSprite ENT rtl _RemoveSprite + cmp #MAX_SPRITES + bcc :ok + rts + +:ok + asl tax -_RemoveSpriteX lda _Sprites+SPRITE_STATUS,x ora #SPRITE_STATUS_REMOVED sta _Sprites+SPRITE_STATUS,x + rts ; Update the sprite's flags. We do not allow the size of a sprite to be changed. That requires ; the sprite to be removed and re-added. ; ; A = Sprite ID -; X = Sprite Tile ID and Flags +; X = New Sprite Flags +; Y = New Sprite Stamp Address UpdateSprite ENT phb phk @@ -695,36 +929,34 @@ UpdateSprite ENT rtl _UpdateSprite - phx ; swap X/A to be more efficient - tax - pla - -_UpdateSpriteX - cpx #MAX_SPRITES*2 ; Make sure we're in bounds + cmp #MAX_SPRITES bcc :ok rts :ok -_UpdateSpriteXnc - cmp _Sprites+SPRITE_ID,x ; Don't do anything if there is no change - beq :no_sprite_change + phx ; Save X to swap into A + asl + tax + pla + cmp _Sprites+SPRITE_ID,x ; If the flags changed, need to redraw the sprite + bne :sprite_flag_change ; on the next frame + tya + cmp _Sprites+VBUFF_ADDR,x ; Did the stamp change? + bne :sprite_stamp_change + rts ; Nothing changed, so just return + +:sprite_flag_change sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor - jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK - cmp _Sprites+TILE_DATA_OFFSET,x - beq :no_tile_change - sta _Sprites+TILE_DATA_OFFSET,x + tya +:sprite_stamp_change + sta _Sprites+VBUFF_ADDR,x ; Just save this to stay in sync - jsr _PrecalcAllSpriteInfo ; Cache stuff - jsr _DrawSpriteSheet ; Render the sprite into internal space if the tile id has changed - -:no_tile_change - lda _Sprites+SPRITE_STATUS,x + lda _Sprites+SPRITE_STATUS,x ; Mark this sprite as updated ora #SPRITE_STATUS_UPDATED sta _Sprites+SPRITE_STATUS,x -:no_sprite_change - rts + jmp _PrecalcAllSpriteInfo ; Cache stuff and return ; Move a sprite to a new location. If the tile ID of the sprite needs to be changed, then ; a full remove/add cycle needs to happen @@ -741,17 +973,16 @@ MoveSprite ENT rtl _MoveSprite - phx ; swap X/A to be more efficient - tax - pla - -_MoveSpriteX - cpx #MAX_SPRITES*2 ; Make sure we're in bounds + cmp #MAX_SPRITES bcc :ok rts :ok -_MoveSpriteXnc + phx ; Save X to swap into A + asl + tax + pla + cmp _Sprites+SPRITE_X,x bne :changed1 sta _Sprites+SPRITE_X,x ; Update the X coordinate @@ -766,13 +997,11 @@ _MoveSpriteXnc :changed2 sta _Sprites+SPRITE_Y,x ; Update the Y coordinate - jsr _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values - lda _Sprites+SPRITE_STATUS,x ora #SPRITE_STATUS_MOVED sta _Sprites+SPRITE_STATUS,x - rts + jmp _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values ; Sprite data structures. We cache quite a few pieces of information about the sprite ; to make calculations faster, so this is hidden from the caller. @@ -797,22 +1026,25 @@ SPRITE_STATUS_UPDATED equ $0004 ; Sprite's non-position attributes were SPRITE_STATUS_REMOVED equ $0008 ; Sprite has been removed. SPRITE_STATUS equ {MAX_SPRITES*0} -TILE_DATA_OFFSET equ {MAX_SPRITES*2} -VBUFF_ADDR equ {MAX_SPRITES*4} ; Fixed address in sprite/mask banks +; TILE_DATA_OFFSET equ {MAX_SPRITES*2} +VBUFF_ADDR equ {MAX_SPRITES*4} ; Base address of the sprite's stamp in the data/mask banks SPRITE_ID equ {MAX_SPRITES*6} SPRITE_X equ {MAX_SPRITES*8} SPRITE_Y equ {MAX_SPRITES*10} -TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} -TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} -TILE_STORE_ADDR_3 equ {MAX_SPRITES*16} -TILE_STORE_ADDR_4 equ {MAX_SPRITES*18} -TILE_STORE_ADDR_5 equ {MAX_SPRITES*20} -TILE_STORE_ADDR_6 equ {MAX_SPRITES*22} -TILE_STORE_ADDR_7 equ {MAX_SPRITES*24} -TILE_STORE_ADDR_8 equ {MAX_SPRITES*26} -TILE_STORE_ADDR_9 equ {MAX_SPRITES*28} -TILE_STORE_ADDR_10 equ {MAX_SPRITES*30} -SPRITE_DISP equ {MAX_SPRITES*32} ; pre-calculated index for jmp (abs,x) based on sprite size +; TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} +TS_LOOKUP_INDEX equ {MAX_SPRITES*12} ; The index into the TileStoreLookup table corresponding to the top-left corner of the sprite +; TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} +TS_COVERAGE_SIZE equ {MAX_SPRITES*14} ; Index into the lookup table of how many TileStore tiles are covered by this sprite +;TILE_STORE_ADDR_3 equ {MAX_SPRITES*16} +TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X memory locations +;TILE_STORE_ADDR_4 equ {MAX_SPRITES*18} +;TILE_STORE_ADDR_5 equ {MAX_SPRITES*20} +;TILE_STORE_ADDR_6 equ {MAX_SPRITES*22} +;TILE_STORE_ADDR_7 equ {MAX_SPRITES*24} +;TILE_STORE_ADDR_8 equ {MAX_SPRITES*26} +;TILE_STORE_ADDR_9 equ {MAX_SPRITES*28} +;TILE_STORE_ADDR_10 equ {MAX_SPRITES*30} +SPRITE_DISP equ {MAX_SPRITES*32} ; cached address of the specific stamp based on flags SPRITE_CLIP_LEFT equ {MAX_SPRITES*34} SPRITE_CLIP_RIGHT equ {MAX_SPRITES*36} SPRITE_CLIP_TOP equ {MAX_SPRITES*38} diff --git a/src/Sprite2.s b/src/Sprite2.s index 82a6dd6..9d83bea 100644 --- a/src/Sprite2.s +++ b/src/Sprite2.s @@ -81,105 +81,206 @@ _LocalToTileStore ; ... ; ; For the Y-coordinate, we just use "mod 8" instead of "mod 4" -mdsOut rts +mdsOut2 + lda #6 ; Pick a value for a 0x0 tile sprite + sta _Sprites+TS_COVERAGE_SIZE,y ; zero the list of tile store addresses + rts + _MarkDirtySprite - - lda #0 - sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit - lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots - sta SpriteBit - lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield - bne mdsOut + bne mdsOut2 -; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined -; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that -; encloses the top-left on-screen corner of the sprite +; Add the first visible row of the sprite to the Y-scroll offset to find the first line in the +; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406] clc lda _Sprites+SPRITE_CLIP_TOP,y adc StartYMod208 ; Adjust for the scroll offset - tax ; cache - cmp #208 ; check if we went too far positive - bcc *+5 - sbc #208 + pha ; Cache + and #$FFF8 ; mask first to ensure LSR will clear the carry lsr - lsr ; This is the row in the Tile Store for top-left corner of the sprite - and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines + lsr + tax + lda TileStoreLookupYTable,x ; Even numbers from [0, 100] (50 elements) sta RowTop + pla -; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but -; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to -; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4 +; Get the position of the top edge within the tile and then add it to the sprite's height +; to calculate the number of tiles that are overlapped. We use the actual width and height +; values here so small sprites (like 4x4 bullets) only force an update to the actual tiles +; that are intersected, rather than assuming an 8x8 sprite always takes up that amount of +; space. txa and #$0007 - sta tmp0 ; save to adjust sprite origin + tax ; cache again. This is a bit faster than recalculating - lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 + adc _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 dec - clc - adc tmp0 and #$0018 sta AreaIndex -; Repeat to get the same information for the columns + txa + asl + tax + lda :vbuff_mul,x + sta tmp0 + +; Add the horizontal position to the horizontal offset to find the first column in the +; code field that needs to be drawn. The range of values is 0 to 159+163 = [0, 322] clc lda _Sprites+SPRITE_CLIP_LEFT,y adc StartXMod164 tax - cmp #164 - bcc *+5 - sbc #164 + and #$FFFC lsr - and #$FFFE ; Same pre-multiply by 2 for later - sta ColLeft +; sta ColLeft ; Even numbers from [0, 160] (80 elements) + adc RowTop + sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table + + +; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier +; in the buffer based on the horizontal offset and move up for each vertical offset. txa and #$0003 - sta tmp1 ; save to adjust sprite origin + tax - lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 + adc tmp0 ; add to the vertical offset + +; Subtract this value from the SPRITE_DISP address + + eor #$FFFF ; A = -X - 1 + sec ; C = 1 + adc _Sprites+SPRITE_DISP,y ; A = SPRITE_DISP + (-X - 1) + 1 = SPRITE_DISP - X + + sta VBuffOrigin ; this is the final (adjusted) origin for this sprite + +; Load the base address of the appropriate TS_VBUFF_? offset for this sprite index and +; store it as an indirect address. + + lda _Sprites+TS_VBUFF_BASE_ADDR,y + sta tmp0 + +; We know the starting corner of the TileStore. Now, we need to figure out now many tiles +; the sprite covers. This is a function of the sprite's width and height and the specific +; location of the upper-left corner of the sprite within the corner tile. + + txa + adc _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 dec - clc - adc tmp1 + and #$000C lsr ; max value = 4 = 0x04 - and #$0006 - ora AreaIndex - sta AreaIndex + ora AreaIndex ; merge into the area index + +; No need to copy the TileStore addresses into the Sprite's TILE_STORE_ADDR values. Just +; hold a copy of the corner offset into the lookup table and the sprite's size in tiles. +; Then, when we need to erase we can just lookup the values in the TileStoreLookup table. + + sta _Sprites+TS_COVERAGE_SIZE,y + tax +; lda TileStoreBaseIndex +; sta _Sprites+TS_LOOKUP_INDEX,y + +; Jump to the appropriate marking routine + + jmp (:mark,x) + +mdsOut rts +;_MarkDirtySprite +; +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit +; lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots +; sta SpriteBit + +; lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield +; bne mdsOut + +; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined +; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that +; encloses the top-left on-screen corner of the sprite + +; clc +; lda _Sprites+SPRITE_CLIP_TOP,y +; adc StartYMod208 ; Adjust for the scroll offset +; tax ; cache +; cmp #208 ; check if we went too far positive +; bcc *+5 +; sbc #208 +; lsr +; lsr ; This is the row in the Tile Store for top-left corner of the sprite +; and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines +; sta RowTop + +; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but +; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to +; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4 + +; txa +; and #$0007 +; sta tmp0 ; save to adjust sprite origin + +; lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 +; dec +; clc +; adc tmp0 +; and #$0018 +; sta AreaIndex + +; Repeat to get the same information for the columns + +; clc +; lda _Sprites+SPRITE_CLIP_LEFT,y +; adc StartXMod164 +; tax +; cmp #164 +; bcc *+5 +; sbc #164 +; lsr +; and #$FFFE ; Same pre-multiply by 2 for later +; sta ColLeft + +; txa +; and #$0003 +; sta tmp1 ; save to adjust sprite origin; + +; lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 +; dec +; clc +; adc tmp1 +; lsr ; max value = 4 = 0x04 +; and #$0006 +; ora AreaIndex +; sta AreaIndex ; Calculate the modified origin address for the sprite. We need to look at the sprite flip bits ; to determine which of the four sprite stamps is the correct one to use. Then, offset that origin ; based on the (x, y) and (startx, starty) positions. - lda _Sprites+SPRITE_DISP,y ; Each stamp is 12 bytes - and #$0006 - tax - lda :stamp_step,x - clc - adc _Sprites+VBUFF_ADDR,y - sec - sbc tmp1 ; Subtract the horizontal within-tile displacement - asl tmp0 - ldx tmp0 - sec - sbc :vbuff_mul,x - sta VBuffOrigin - lda #^TileStore - sta tmp1 +; lda _Sprites+SPRITE_DISP,y ; Get the sprite's base display address +; sec +; sbc tmp1 ; Subtract the horizontal within-tile displacement +; asl tmp0 +; ldx tmp0 +; sec +; sbc :vbuff_mul,x +; sta VBuffOrigin +; lda #^TileStore +; sta tmp1 ; Dispatch to cover the tiles - ldx AreaIndex - jmp (:mark,x) +; ldx AreaIndex +; jmp (:mark,x) :mark dw :mark1x1,:mark1x2,:mark1x3,mdsOut dw :mark2x1,:mark2x2,:mark2x3,mdsOut dw :mark3x1,:mark3x2,:mark3x3,mdsOut dw mdsOut,mdsOut,mdsOut,mdsOut -:stamp_step dw 0,12,24,36 :vbuff_mul dw 0,52,104,156,208,260,312,364 + ; Dispatch to the calculated sizing ; Begin a list of subroutines to cover all of the valid sprite size combinations. This is all unrolled code, @@ -191,11 +292,170 @@ _MarkDirtySprite ; ; There *might* be some speed gained by pushing a list of :mark_R_C addressed onto the stack in the clipping routing ; and dispatching that way, but probably not... + +:mark1x1_v2 + + tax ; Get the TileStoreBaseIndex + + ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile + + lda SpriteBit ; Mark this tile as having this sprite + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda VBuffOrigin + sta (tmp0),y ; Fill in the slot for this sprite on this tile + + lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, mark it + bne exit1x1 + + ldx DirtyTileCount + tya + sta DirtyTiles,x + sta TileStore+TS_DIRTY,y + inx + inx + stx DirtyTileCount + +exit1x1 + rts + +:mark2x2_v2 + +; Put the TileStoreBaseIndex into the X-register + + tax + +; Push a sentinel value of the stack that we use to inline all of the dirty tile array updates faster +; and the end of this routine. + + pea #$0000 + +; Now, move through each of the TileStore locations and set the necessary fields. We have to do the +; following +; +; 1. Set the marker bit in the TS_SPRITE_FLAG so the renderer knows which vbuff addresses to load +; 2. Set the address of the sprite stamp graphics that are used. This can change every frame. +; 3. Mark the tile as dirty and put it on the list if it was marked dirty for the first time. + + ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile + + lda SpriteBit ; Mark this tile as having this sprite + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, queue it up + bne *+3 + phy + + lda VBuffOrigin + sta (tmp0),y ; Fill in the slot for this sprite on this tile + +; Move to the next tile + + ldy TileStoreLookup+2,x + + adc #4 ; Weave in the VBuffOrigin values to save a load every + sta (tmp0),y ; other iteration + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y + bne *+3 + phy + +; Third tile + + ldy TileStoreLookup+TS_LOOKUP_SPAN,x + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y + bne *+3 + phy + + lda VBuffOrigin + adc #SPRITE_PLANE_SPAN + sta (tmp0),y + +; Fourth tile + + ldy TileStoreLookup+TS_LOOKUP_SPAN+2,x + + adc #4+SPRITE_PLANE_SPAN + sta (tmp0),y + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + +; Lift this above the last TS_DIRTY check + + ldx DirtyTileCount + +; Check the TS_DIRTY flag for this tile. We handle it immediately, if needed + + lda TileStore+TS_DIRTY,y + bne skip + +; Now, update the Dirty Tile array + + tya + sta DirtyTiles,x + sta TileStore+TS_DIRTY,y + +skip + pla + beq :done1 + sta DirtyTiles+2,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done2 + sta DirtyTiles+4,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done3 + sta DirtyTiles+6,x + tay + sta TileStore+TS_DIRTY,y + +; Maximum number of dirty tiles reached. Just fall through. + + pla + txa + adc #8 + sta DirtyTileCount + rts +:done3 + txa + adc #6 + sta DirtyTileCount + rts +:done2 + txa + adc #4 + sta DirtyTileCount + rts +:done1 + inx + inx + stx DirtyTileCount + + rts + :mark1x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_1,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_2,y rts ; NOTE: If we rework the _PushDirtyTile to use the Y register instead of the X register, we can @@ -209,112 +469,112 @@ _MarkDirtySprite :mark1x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_2,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_3,y rts :mark1x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_3,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_3,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_4,y rts :mark2x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_2,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_3,y rts :mark2x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_4,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_4,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_5,y rts :mark2x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_1_2 - sta _Sprites+TILE_STORE_ADDR_6,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_6,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_7,y rts :mark3x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_3,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_4,y rts :mark3x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_2_1 - sta _Sprites+TILE_STORE_ADDR_6,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_6,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_7,y rts :mark3x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_2_1 - sta _Sprites+TILE_STORE_ADDR_6,y +; sta _Sprites+TILE_STORE_ADDR_6,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_7,y jsr :mark_1_2 - sta _Sprites+TILE_STORE_ADDR_8,y +; sta _Sprites+TILE_STORE_ADDR_8,y jsr :mark_2_2 - sta _Sprites+TILE_STORE_ADDR_9,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_10,y +; sta _Sprites+TILE_STORE_ADDR_9,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_10,y rts ; Begin List of subroutines to mark each tile offset diff --git a/src/SpriteRender.s b/src/SpriteRender.s index 39041fd..4eed2a3 100644 --- a/src/SpriteRender.s +++ b/src/SpriteRender.s @@ -1,23 +1,42 @@ +; Alternate entry point that takes arguments in registers instead of using a _Sprite +; record +; +; Y = VBUFF address +; X = Tile Data address +; A = Sprite Flags +_DrawSpriteStamp + sty tmp1 + stx tmp2 + and #DISP_MASK ; dispatch to all of the different orientations + sta tmp3 + jmp _DSSCommon + ; Function to render a sprite from a sprite definition into the internal data buffers ; ; X = sprite index -_DrawSpriteSheet +; _DrawSpriteSheet DISP_VFLIP equ $0004 ; hard code these because they are internal values DISP_HFLIP equ $0002 DISP_MASK equ $0018 ; Isolate the size bits - phx - - lda _Sprites+VBUFF_ADDR,x - sta tmp1 - - lda _Sprites+TILE_DATA_OFFSET,x - sta tmp2 - - lda _Sprites+SPRITE_DISP,x - and #DISP_MASK ; dispatch to all of the different orientations - sta tmp3 +; phx +; +; lda _Sprites+VBUFF_ADDR,x +; sta tmp1 +; +; lda _Sprites+TILE_DATA_OFFSET,x +; sta tmp2 +; +; lda _Sprites+SPRITE_DISP,x +; and #DISP_MASK ; dispatch to all of the different orientations +; sta tmp3 +; +; jsr _DSSCommon +; +; plx +; rts +_DSSCommon ; Set bank phb pea #^tiledata ; Set the bank to the tile data @@ -58,8 +77,6 @@ DISP_MASK equ $0018 ; Isolate the size bits ; Restore bank plb ; pop extra byte plb - - plx rts ; ; X = _Sprites array offset diff --git a/src/blitter/Tables.s b/src/blitter/Tables.s index 8493199..44bf70c 100644 --- a/src/blitter/Tables.s +++ b/src/blitter/Tables.s @@ -254,8 +254,16 @@ NextCol ; A double-sized table of lookup values. This is basically the cross-product of TileStoreYTable and ; NextCol. If is double-width and double-height so that, if we know a tile's address position -; of (X + 41*Y), then any relative tile store address can be looked up by adding a constan value. -;TileStore2DLookup ds {26*41*2}*4 +; of (X + 41*Y), then any relative tile store address can be looked up by adding a constant value. +; +; 50 rows by 80 columns + 2 extra rows and columns +TS_LOOKUP_WIDTH equ 80 +TS_LOOKUP_HEIGHT equ 50 +TS_LOOKUP_SPAN equ {TS_LOOKUP_WIDTH+2} +TS_LOOKUP_ROWS equ {TS_LOOKUP_HEIGHT+2} + +TileStoreLookupYTable ds {TS_LOOKUP_HEIGHT*2} +TileStoreLookup ds {TS_LOOKUP_SPAN*TS_LOOKUP_ROWS*2} ; This is a double-length table that holds the right-edge adresses of the playfield on the physical ; screen. At most, it needs to hold 200 addresses for a full height playfield. It is double-length @@ -296,7 +304,5 @@ BG1YOffsetTable lup 26 dw 1,1,1,2,2,2,2,2,1,1,1,0,0,0,0,0 --^ - - - - +; Table of base VBUFF addresses for each sprite stamp slot +VBuffAddrTable ds 2*VBUFF_SLOT_COUNT \ No newline at end of file diff --git a/src/blitter/Template.s b/src/blitter/Template.s index ac7662b..82446a8 100644 --- a/src/blitter/Template.s +++ b/src/blitter/Template.s @@ -160,7 +160,7 @@ SetScreenRect sty ScreenHeight ; Save the screen height and ; Generalized routine that calculates the on-screen address of the tiles and takes the ; StartX and StartY values into consideration. This routine really exists to support -; the dirty tile rendering mode and the tiles *must* be aligned with the playfield. +; the dirty tile rendering mode and the tiles *must* be aligned with the playfield. ; That is, StartX % 4 == 0 and StartY % 8 == 0. If these conditions are not met, then ; screen will not render correctly. _RecalcTileScreenAddrs diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s index ba76c3f..45c45ac 100644 --- a/src/blitter/Tiles.s +++ b/src/blitter/Tiles.s @@ -41,17 +41,6 @@ TILE_CTRL_MASK equ $FE00 TILE_PROC_MASK equ $F800 ; Select tile proc for rendering -; Temporary direct page locatinos used by some of the complex tile renderers - -_X_REG equ tiletmp -_Y_REG equ tiletmp+2 -_T_PTR equ tiletmp+4 ; Copy of the tile address pointer -_BASE_ADDR equ tiletmp+6 ; Copy of BTableLow for this tile -_SPR_X_REG equ tiletmp+8 ; Cache address of sprite plane source for a tile -_JTBL_CACHE equ tiletmp+10 ; Cache the offset to the exception handler for a column -_OP_CACHE equ tiletmp+12 ; Cache of a relevant operand / oeprator -_TILE_ID equ tiletmp+14 ; Copy of the tile descriptor - ; Low-level function to take a tile descriptor and return the address in the tiledata ; bank. This is not too useful in the fast-path because the fast-path does more ; incremental calculations, but it is handy for other utility functions @@ -113,56 +102,36 @@ _RenderTileBG1 ; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile ; Store record contains all of the low-level information that's needed to call the renderer. ; +; This routine sets the direct page register to the second page since we use that space to +; build and cache tile and sprite data, when necessary ; Y = address of tile _RenderTile2 - pea >TileStore ; Need that addressing flexibility here. Caller is responsible for restoring bank reg - plb - plb - txy ; We can be better than this.... + lda TileStore+TS_SPRITE_FLAG,x ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not + bne do_dirty_sprite - lda TileStore+TS_TILE_ID,y ; build the finalized tile descriptor - ldx TileStore+TS_SPRITE_FLAG,y ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not - beq :nosprite +; Handle the non-sprite tile blit -; txa -; jsr BuildActiveSpriteArray ; Build the max 4 array of active sprites for this tile -; sta ActiveSpriteCount + sep #$20 + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later - lda TileStore+TS_VBUFF_ARRAY_ADDR,y ; Scratch space - sta _SPR_X_REG - phy - ldy spriteIdx - lda (_SPR_X_REG),y - sta _SPR_X_REG - ply + lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000) + sta _BASE_ADDR+1 ; so we can get by just copying the high byte + rep #$20 - lda TileStore+TS_TILE_ID,y - ora #TILE_SPRITE_BIT -; ldx TileStore+TS_VBUFF_ARRAY_ADDR,y -; stx _SPR_X_REG - -:nosprite - sta _TILE_ID ; Some tile blitters need to get the tile descriptor - and #TILE_CTRL_MASK - xba - tax - ldal TileProcs,x ; load and patch in the appropriate subroutine + lda TileStore+TS_BASE_TILE_DISP,x ; Get the address of the renderer for this tile stal :tiledisp+1 - ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) + lda TileStore+TS_TILE_ID,x + sta _TILE_ID ; Some tile blitters need to get the tile descriptor - sep #$20 ; load the bank of the target code field line - lda TileStore+TS_CODE_ADDR_HIGH,y + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) pha - rep #$20 - lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field - pha - lda TileStore+TS_BASE_ADDR,y ; load the base address of the code field - sta _BASE_ADDR - lda TileStore+TS_WORD_OFFSET,y - ply - plb ; set the bank + lda TileStore+TS_WORD_OFFSET,x + plx + plb ; set the bank to the code field that will be updated ; B is set to the correct code field bank ; A is set to the tile word offset (0 through 80 in steps of 4) @@ -171,6 +140,194 @@ _RenderTile2 :tiledisp jmp $0000 ; render the tile +; Let's make a macro helper for the bit test tree +; dobit src_offset,dest,next_target,end_target +dobit MAC + beq last_bit + ldx: ]1,y + stx ]2 + jmp ]3 +last_bit ldx: ]1,y + stx ]2 + jmp ]4 + EOM + +; The sprite code is just responsible for quickly copying all of the sprite data +; into the direct page temp area. + +do_dirty_sprite + pei TileStoreBankAndTileDataBank ; Special value that has the TileStore bank in LSB and TileData bank in MSB + plb + +; Cache a couple of values into the direct page, but preserve the Accumulator + + ldy TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + sty tileAddr + +; This is very similar to the code in the dirty tile renderer, but we can't reuse +; because that code draws directly to the graphics screen, and this code draws +; to a temporary budder that has a different stride. + + ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile + + lsr + bcc :loop_0_bit_1 + dobit $0000;sprite_ptr0;:loop_1_bit_1;CopyOneSprite + +:loop_0_bit_1 lsr + bcc :loop_0_bit_2 + dobit $0002;sprite_ptr0;:loop_1_bit_2;CopyOneSprite + +:loop_0_bit_2 lsr + bcc :loop_0_bit_3 + dobit $0004;sprite_ptr0;:loop_1_bit_3;CopyOneSprite + +:loop_0_bit_3 lsr + bcc :loop_0_bit_4 + dobit $0006;sprite_ptr0;:loop_1_bit_4;CopyOneSprite + +:loop_0_bit_4 lsr + bcc :loop_0_bit_5 + dobit $0008;sprite_ptr0;:loop_1_bit_5;CopyOneSprite + +:loop_0_bit_5 lsr + bcc :loop_0_bit_6 + dobit $000A;sprite_ptr0;:loop_1_bit_6;CopyOneSprite + +:loop_0_bit_6 lsr + bcc :loop_0_bit_7 + dobit $000C;sprite_ptr0;:loop_1_bit_7;CopyOneSprite + +:loop_0_bit_7 lsr + bcc :loop_0_bit_8 + dobit $000E;sprite_ptr0;:loop_1_bit_8;CopyOneSprite + +:loop_0_bit_8 lsr + bcc :loop_0_bit_9 + dobit $0010;sprite_ptr0;:loop_1_bit_9;CopyOneSprite + +:loop_0_bit_9 lsr + bcc :loop_0_bit_10 + ldx: $0012,y + stx spriteIdx + cmp #0 + jne :loop_1_bit_10 + jmp CopyOneSprite + +:loop_0_bit_10 lsr + bcc :loop_0_bit_11 + dobit $0014;sprite_ptr0;:loop_1_bit_11;CopyOneSprite + +:loop_0_bit_11 lsr + bcc :loop_0_bit_12 + dobit $0016;sprite_ptr0;:loop_1_bit_12;CopyOneSprite + +:loop_0_bit_12 lsr + bcc :loop_0_bit_13 + dobit $0018;sprite_ptr0;:loop_1_bit_13;CopyOneSprite + +:loop_0_bit_13 lsr + bcc :loop_0_bit_14 + dobit $001A;sprite_ptr0;:loop_1_bit_14;CopyOneSprite + +:loop_0_bit_14 lsr + bcc :loop_0_bit_15 + dobit $001C;sprite_ptr0;:loop_1_bit_15;CopyOneSprite + +:loop_0_bit_15 ldx: $001E,y + stx spriteIdx + jmp CopyOneSprite + +; We can optimize later, for now just copy the sprite data and mask into its own +; direct page buffer and combine with the tile data later + +; We set up direct page pointers to the mask bank and use the bank register for the +; data. +CopyFourSpritesAbove + +; Copy three sprites into a temporary direct page buffer +LDA_IL equ $A7 ; lda [dp] +LDA_ILY equ $B7 ; lda [dp],y +AND_IL equ $27 ; and [dp] +AND_ILY equ $37 ; and [dp],y + +CopyThreeSprites +]line equ 0 + lup 8 + ldy #]line*SPRITE_PLANE_SPAN + lda (spriteIdx+8),y + db AND_ILY,spriteIdx+4 ; Can't use long indirect inside LUP because of ']' + ora (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4} + + db LDA_ILY,spriteIdx+8 + db AND_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4} + + ldy #]line*SPRITE_PLANE_SPAN+2 + lda (spriteIdx+8),y + db AND_ILY,spriteIdx+4 + ora (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4}+2 + + db LDA_ILY,spriteIdx+8 + db AND_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ +; jmp FinishTile + +; Copy two sprites into a temporary direct page buffer +CopyTwoSprites +]line equ 0 + lup 8 + ldy #]line*SPRITE_PLANE_SPAN + lda (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4} + + db LDA_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4} + + ldy #]line*SPRITE_PLANE_SPAN+2 + lda (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4}+2 + + db LDA_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ +; jmp FinishTile + +; Copy a single piece of sprite data into a temporary direct page . X = spriteIdx +CopyOneSprite +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 + + ldal spritemask+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_mask+{]line*4} + ldal spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ + +; jmp FinishTile + ; Reference all of the tile rendering subroutines defined in the TileXXXXX files. Each file defines ; 8 entry points: ; @@ -518,7 +675,7 @@ _CopyBG1Tile ; a tile. ; ; TileStore+TS_TILE_ID : Tile descriptor -; TileStore+TS_DIRTY : $FFFF is clean, otherwise stores a back-reference to the DirtyTiles array +; TileStore+TS_DIRTY : $0000 is clean, any other value indicated a dirty tile ; TileStore+TS_TILE_ADDR : Address of the tile in the tile data buffer ; TileStore+TS_CODE_ADDR_LOW : Low word of the address in the code field that receives the tile ; TileStore+TS_CODE_ADDR_HIGH : High word of the address in the code field that receives the tile @@ -590,11 +747,14 @@ InitTiles lda #0 stal TileStore+TS_TILE_ID,x ; clear the tile store with the special zero tile stal TileStore+TS_TILE_ADDR,x - stal TileStore+TS_TILE_DISP,x - stal TileStore+TS_SPRITE_FLAG,x ; no sprites are set at the beginning - lda #$FFFF ; none of the tiles are dirty - stal TileStore+TS_DIRTY,x + stal TileStore+TS_DIRTY,x ; none of the tiles are dirty + + lda DirtyTileProcs ; Fill in with the first dispatch address + stal TileStore+TS_DIRTY_TILE_DISP,x + + lda TileProcs ; Same for non-dirty, non-sprite base case + stal TileStore+TS_BASE_TILE_DISP,x lda :vbuff ; array of sprite vbuff addresses per tile stal TileStore+TS_VBUFF_ARRAY_ADDR,x @@ -700,7 +860,16 @@ _SetTile ldal TileStore+TS_TILE_ID,x and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value xba - stal TileStore+TS_TILE_DISP,x + tay + lda DirtyTileProcs,y + stal TileStore+TS_DIRTY_TILE_DISP,x + + ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address + and #TILE_CTRL_MASK + xba + tay + lda TileProcs,y + stal TileStore+TS_BASE_TILE_DISP,x ; txa ; Add this tile to the list of dirty tiles to refresh jmp _PushDirtyTileX ; on the next call to _ApplyTiles @@ -731,11 +900,12 @@ _PushDirtyTile ; alternate entry point if the x-register is already set _PushDirtyTileX ldal TileStore+TS_DIRTY,x - bpl :occupied2 + bne :occupied2 - txa ; any non-negative value will work, this saves work below + inc ; any non-zero value will work stal TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value + txa ldx DirtyTileCount ; 4 sta DirtyTiles,x ; 6 inx ; 2 @@ -783,7 +953,15 @@ ApplyTiles ENT plb rtl +; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code +; field. In this function we switch to the second direct page which holds the temporary +; working buffers for tile rendering. _ApplyTiles + tdc + clc + adc #$100 ; move to the next page + tcd + bra :begin :loop @@ -801,4 +979,124 @@ _ApplyTiles :begin ldy DirtyTileCount bne :loop - rts \ No newline at end of file + + tdc ; Move back to the original direct page + sec + sbc #$100 + tcd + rts + +; To make processing the tile faster, we do them in chunks of eight. This allows the loop to be +; unrolled, which means we don't have to keep track of the register value and makes it faster to +; clear the dirty tile flag after being processed. + + tdc ; Move to the dedicated direct page for tile rendering + clc + adc #$100 + tcd + + phb ; Save the current bank + tsc + sta tmp0 ; Save it on the direct page + bra at_loop + +; The DirtyTiles array and the TileStore information is in the Tile Store bank. Because we +; process up to 8 tiles as a time and the tile code sets the bank register to the target +; code field bank, we need to restore the bank register each time. So, we pre-push +; 8 copies of the TileStore bank onto the stack. + + +at_exit + tdc ; Move back to the original direct page + sec + sbc #$100 + tcd + + plb ; Restore the original data bank and return + rts +dt_base equ $FE ; top of second direct page space + +at_loop + lda tmp0 + tcs + + lda DirtyTileCount ; This is pre-multiplied by 2 + beq at_exit ; If there are no items, exit + + ldx TileStoreBankDoubled + phx + phx + phx + + cmp #16 ; If there are >= 8 elements, then + bcs at_chunk ; do a full chunk + + stz DirtyTileCount ; Otherwise, this pass will handle them all + tax + jmp (at_table,x) +at_table da at_exit,at_one,at_two,at_three + da at_four,at_five,at_six,at_seven + +at_chunk sec + sbc #16 + sta DirtyTileCount ; Fall through + +; Because all of the registers get used in the _RenderTile2 subroutine, we +; push the values from the DirtyTiles array onto the stack and then pop off +; the values as we go + + ldy dt_base ; Reload the base index + ldx DirtyTiles+14,y ; Load the TileStore offset + stz TileStore+TS_DIRTY,x ; Clear this tile's dirty flag + jsr _RenderTile2 ; Draw the tile + plb ; Reset the data bank to the TileStore bank + +at_seven + ldy dt_base + ldx DirtyTiles+12,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_six + ldy dt_base + ldx DirtyTiles+10,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_five + ldy dt_base + ldx DirtyTiles+8,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_four + ldy dt_base + ldx DirtyTiles+6,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_three + ldy dt_base + ldx DirtyTiles+4,y + jsr _RenderTile2 + plb + +at_two + ldy dt_base + ldx DirtyTiles+2,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_one + ldy dt_base + ldx DirtyTiles+0,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + + jmp at_loop diff --git a/src/blitter/Tiles10000.s b/src/blitter/Tiles10000.s index df1b0ab..8e761d3 100644 --- a/src/blitter/Tiles10000.s +++ b/src/blitter/Tiles10000.s @@ -46,6 +46,47 @@ _TBApplySpriteData --^ rts +_TBApplySpriteDataOne + ldx spriteIdx +]line equ 0 + lup 8 + lda blttmp+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts + +_TBApplySpriteDataTwo +]line equ 0 + lup 8 + lda blttmp+{]line*4} + ldx spriteIdx+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + ldx spriteIdx + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + ldx spriteIdx+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + ldx spriteIdx + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts + ; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is ; because we can avoid needing to use both the X and Y registers during the compositing process and ; reserve Y to hold the code field address.