diff --git a/src/Defs.s b/src/Defs.s index 568d504..d2d8017 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -87,11 +87,11 @@ ActiveSpriteCount equ 102 BankLoad equ 104 TileStoreBankAndBank01 equ 106 TileStoreBankAndTileDataBank equ 108 -Next equ 110 +TileStoreBankDoubled equ 110 +Next equ 112 activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames) -AppSpace equ 160 ; 16 bytes of space reserved for application use -tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers +; tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers blttmp equ 192 ; 32 bytes of local cache/scratch space for blitter tmp8 equ 224 ; another 16 bytes of temporary space to be used as scratch @@ -112,6 +112,36 @@ tmp5 equ 250 tmp6 equ 252 tmp7 equ 254 +; Defines for the second direct page (used in the tile blitters) + +sprite_ptr0 equ 0 ; Each tile can render up to 4 sprite blocks. The sprite +sprite_ptr1 equ 4 ; data and mask values live in different banks, but have a +sprite_ptr2 equ 8 ; parallel structure. The high word of each point is set to +sprite_ptr3 equ 12 ; the mask bank. With the Bank register set, both data and mask +; ; can be accessed through the same pointer, e.g. lda (sprite_ptr0) +; ; and [sprite_ptr0] + +tmp_sprite_data equ 16 ; 32 byte temporary buffer to build up sprite data values +tmp_sprite_mask equ 48 ; 32 byte temporary buffer to build up sprite mask values +tmp_tile_data equ 80 ; 32 byte temporary buffer to build up tile data values +tmp_tile_mask equ 112 ; 32 byte temporary buffer to build up tile mask values + +; Temporary direct page locations used by some of the complex tile renderers +_X_REG equ 144 +_Y_REG equ 146 +_T_PTR equ 148 ; Copy of the tile address pointer +_BASE_ADDR equ 150 ; Copy of BTableLow for this tile +_SPR_X_REG equ 152 ; Cache address of sprite plane source for a tile +_JTBL_CACHE equ 154 ; Cache the offset to the exception handler for a column +_OP_CACHE equ 156 ; Cache of a relevant operand / oeprator +_TILE_ID equ 158 ; Copy of the tile descriptor + +; Define free space the the application to use +FREE_SPACE_DP2 equ 160 + +; End direct page values + + DIRTY_BIT_BG0_X equ $0001 DIRTY_BIT_BG0_Y equ $0002 DIRTY_BIT_BG1_X equ $0004 @@ -153,17 +183,55 @@ SPRITE_8X8 equ $0000 SPRITE_VFLIP equ $0400 SPRITE_HFLIP equ $0200 -MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows) -TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot +; Stamp storage parameters +VBUFF_STRIDE_BYTES equ 12*4 ; Each line has 4 slots of 16 pixels + 8 buffer pixels +VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES ; Each row is comprised of 8 lines +VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3 ; Allocate space fo 16 rows + 8 rows of buffer +VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4 ; Start at an offset so $0000 can be used as an empty value +VBUFF_SLOT_COUNT equ 48 ; Have space for this many stamps + +; Tile storage parameters +TILE_STORE_WIDTH equ 41 +TILE_STORE_HEIGHT equ 26 +MAX_TILES equ {26*41} ; Number of tiles in the code field (41 columns * 26 rows) +TILE_STORE_SIZE equ {MAX_TILES*2} ; The tile store contains a tile descriptor in each slot TS_TILE_ID equ TILE_STORE_SIZE*0 ; tile descriptor for this location TS_DIRTY equ TILE_STORE_SIZE*1 ; Flag. Used to prevent a tile from being queued multiple times per frame TS_SPRITE_FLAG equ TILE_STORE_SIZE*2 ; Bitfield of all sprites that intersect this tile. 0 if no sprites. TS_TILE_ADDR equ TILE_STORE_SIZE*3 ; cached value, the address of the tiledata for this tile TS_CODE_ADDR_LOW equ TILE_STORE_SIZE*4 ; const value, address of this tile in the code fields -TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 ; const value +TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000. TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender. TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank -TS_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize tile dispatch in the Render function +TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function +TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function + +; 16 consecutive entries to provide directly addressable space for holding the VBUFF address for the +; sprites that may be rendered at a given tile. Given a tile store offset, X, the way to address the +; address for the Y'th sprite is +; +; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE},x +; +; Moving to the next tile can be done with a constant. +; +; lda TileStore+TS_VBUFF_0+{Y*TILE_STORE_SIZE}+{41*row}+{2*col},x + +TS_VBUFF_0 equ TILE_STORE_SIZE*12 +TS_VBUFF_1 equ TILE_STORE_SIZE*13 +TS_VBUFF_2 equ TILE_STORE_SIZE*14 +TS_VBUFF_3 equ TILE_STORE_SIZE*15 +TS_VBUFF_4 equ TILE_STORE_SIZE*16 +TS_VBUFF_5 equ TILE_STORE_SIZE*17 +TS_VBUFF_6 equ TILE_STORE_SIZE*18 +TS_VBUFF_7 equ TILE_STORE_SIZE*19 +TS_VBUFF_8 equ TILE_STORE_SIZE*20 +TS_VBUFF_9 equ TILE_STORE_SIZE*21 +TS_VBUFF_10 equ TILE_STORE_SIZE*22 +TS_VBUFF_11 equ TILE_STORE_SIZE*23 +TS_VBUFF_12 equ TILE_STORE_SIZE*22 +TS_VBUFF_13 equ TILE_STORE_SIZE*23 +TS_VBUFF_14 equ TILE_STORE_SIZE*24 +TS_VBUFF_15 equ TILE_STORE_SIZE*25 diff --git a/src/Render.s b/src/Render.s index b010f38..9e5e63f 100644 --- a/src/Render.s +++ b/src/Render.s @@ -195,15 +195,13 @@ _RenderDirtyTile pei TileStoreBankAndBank01 ; Special value that has the TileStore bank in LSB and $01 in MSB plb - txy - ldx TileStore+TS_TILE_DISP,y ; get the finalized tile descriptor - ldal DirtyTileProcs,x ; load and patch in the appropriate subroutine + lda TileStore+TS_DIRTY_TILE_DISP,x ; load and patch in the appropriate subroutine stal :tiledisp+1 - ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) - lda TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile - tay + ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile + lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) + tax plb ; set the bank @@ -671,7 +669,7 @@ dirty_sprite stx spriteIdx+6 jmp BlitFourSprites -DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH +DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH ;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH ; Blit tiles directly to the screen. @@ -998,4 +996,4 @@ BlitOneSprite _R0W0 cli pld - rts + rts diff --git a/src/Sprite.s b/src/Sprite.s index b50cd47..a3bcb89 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -21,35 +21,85 @@ InitSprites ; Clear values in the sprite array - ldx #{MAX_SPRITES-1}*2 -:loop3 stz _Sprites+TILE_STORE_ADDR_1,x - dex - dex - bpl :loop3 +; ldx #{MAX_SPRITES-1}*2 +;:loop3 stz _Sprites+TILE_STORE_ADDR_1,x +; dex +; dex +; bpl :loop3 ; Initialize the VBUFF address offsets in the data and mask banks for each sprite ; -; The internal grid 13 tiles wide where each sprite has a 2x2 interior square with a +; The internal grid 12 tiles wide where each sprite has a 2x2 interior square with a ; tile-size buffer all around. We pre-render each sprite with all four vert/horz flips -VBUFF_STRIDE_BYTES equ 13*4 -VBUFF_TILE_ROW_BYTES equ 8*VBUFF_STRIDE_BYTES -VBUFF_SPRITE_STEP equ VBUFF_TILE_ROW_BYTES*3 -VBUFF_SPRITE_START equ {8*VBUFF_TILE_ROW_BYTES}+4 +; +; Eventually we should be able to have a separate rendering path for vertically flipped +; sprites and will be able to double the capacity of the stamp buffer ldx #0 lda #VBUFF_SPRITE_START clc -:loop4 sta _Sprites+VBUFF_ADDR,x +:loop4 sta VBuffAddrTable,x adc #VBUFF_SPRITE_STEP inx inx - cpx #MAX_SPRITES*2 + cpx #VBUFF_SLOT_COUNT*2 bcc :loop4 ; Precalculate some bank values jsr _CacheSpriteBanks rts +; Utility function to calculate the difference in tile positions between a sprite's current +; position and it's previous position. This gets interesting because the number of tiles +; that a sprite covers can change based on the relative alignemen of the sprite with the +; background. +; +; Ideally, we would be able to quickly calculate exactly which new background tiles a sprite +; intersects with and which ones it has left to minimize the number of TileStore entries +; that need to be updated. +; +; In the short-term, we just do an equality test which lets us know if the sprite is +; covering the exact same tiles. + + +; Render a sprite stamp into the sprite buffer. Stamps exits independent of the sprites +; and sprite reference a specific stamp. This is necessary because it's common for a +; spite to change its graphic as its animating, but it is too costly to have to set up +; the stamp every time. So this allows users to create stamps in advance and then +; assign them to the sprites as needed. +; +; Currently, we support a maximum of 48 stamps. +; +; Input: +; A = sprite descriptor +; X = stamp slot +; Return: +; A = vbuff address to be assigned to Sprite[VBUFF_ADDR] +CreateSpriteStamp ENT + phb + phk + plb + jsr _CreateSpriteStamp + plb + rtl + +_CreateSpriteStamp + pha ; Save the descriptor + jsr _GetBaseTileAddr ; Get the address of the tile data + pha + + txa + asl + tax + ldy VBuffAddrTable,x ; Load the address of the stamp slot + + plx ; Pop the tile address + pla ; Pop the sprite ID + phy ; VBUFF_ADDR value + jsr _DrawSpriteStamp ; Render the sprite data and create a stamp + + pla ; Pop the VBUFF_ADDR and return + rts ; Add a new sprite to the rendering pipeline ; @@ -90,12 +140,12 @@ _AddSprite pla sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor - jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK - sta _Sprites+TILE_DATA_OFFSET,x lda #SPRITE_STATUS_OCCUPIED+SPRITE_STATUS_ADDED sta _Sprites+SPRITE_STATUS,x + stz _Sprites+VBUFF_ADDR,x ; Clear the VBUFF address, just to initialize it + phy tya and #$00FF @@ -106,7 +156,7 @@ _AddSprite sta _Sprites+SPRITE_X,x ; X coordinate jsr _PrecalcAllSpriteInfo ; Cache sprite property values (simple stuff) - jsr _DrawSpriteSheet ; Render the sprite into internal space +; jsr _DrawSpriteSheet ; Render the sprite into internal space ; Mark the dirty bit to indicate that the active sprite list needs to be rebuilt in the next ; render call @@ -117,11 +167,161 @@ _AddSprite lda _SpriteBits,x ; Get the bit flag for this sprite slot tsb SpriteMap ; Mark it in the sprite map bit field - txa ; And return the sprite ID - clc ; Mark that the sprite was successfully added +; txa ; And return the sprite ID +; clc ; Mark that the sprite was successfully added rts +; Alternate implementation that uses the TS_COVERAGE_SIZE and TS_LOOKUP_INDEX properties to +; load the old values directly from the TileStoreLookup table, rather than caching them. +; This is more efficient, because the work in MarkDirtySprite is independent of the +; sprite size and, by inlining the _PushDirtyTile logic, we can save a fair amount of overhead +_ClearSpriteFromTileStore2 + ldx _Sprites+TS_COVERAGE_SIZE,y + jmp (csfts_tbl,x) +csfts_tbl dw csfts_1x1,csfts_1x2,csfts_1x3,csfts_out + dw csfts_2x1,csfts_2x2,csfts_2x3,csfts_out + dw csfts_3x1,csfts_3x2,csfts_3x3,csfts_out + dw csfts_out,csfts_out,csfts_out,csfts_out + +; Just a single value to clear and add to the dirty tile list +csfts_1x1 ldx _Sprites+TS_LOOKUP_INDEX,y + lda TileStoreLookup,x + tax + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBitsNot,y + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + bne csfts_1x1_out + + inc ; any non-zero value will work + sta TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value + + txa + ldx DirtyTileCount + sta DirtyTiles,x + inx + inx + stx DirtyTileCount +csfts_1x2 +csfts_1x3 +csfts_2x1 +csfts_2x3 +csfts_3x1 +csfts_3x2 +csfts_3x3 +csfts_1x1_out + rts + +; This is a more interesting case where the ability to batch things up starts to produce some +; efficiency gains +csfts_2x2 ldx _Sprites+TS_LOOKUP_INDEX,y ; Get the address of the old top-left corner + tay + ldx TileStoreLookup,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+2,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+TS_LOOKUP_SPAN,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + lda TileStore+TS_DIRTY,x + beq *+3 + phx + + + ldx TileStoreLookup+TS_LOOKUP_SPAN+2,y + + lda TileStore+TS_SPRITE_FLAG,x + and _SpriteBits + sta TileStore+TS_SPRITE_FLAG,x + + ldy DirtyTileCount + + lda TileStore+TS_DIRTY,x + beq skip_2x2 + + txa + sta DirtyTiles,y + sta TileStore+TS_DIRTY,x + +skip_2x2 + pla + beq :done1 + sta DirtyTiles+2,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done2 + sta DirtyTiles+4,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done3 + sta DirtyTiles+6,x + tay + sta TileStore+TS_DIRTY,y + +; Maximum number of dirty tiles reached. Just fall through. + + pla + txa + adc #8 + sta DirtyTileCount + rts +:done3 + txa + adc #6 + sta DirtyTileCount + rts +:done2 + txa + adc #4 + sta DirtyTileCount + rts +:done1 + inx + inx + stx DirtyTileCount + + rts + + + + lda _SpriteBitsNot,y ; Cache the bit value for this sprite + + ldy TileStoreLookup,x ; Get the tile store offset + + + and TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + +csfts_out rts + ; Run through the list of tile store offsets that this sprite was last drawn into and mark ; those tiles as dirty. The largest number of tiles that a sprite could possibly cover is 20 ; (an unaligned 4x3 sprite), covering a 5x4 area of play field tiles. @@ -129,68 +329,68 @@ _AddSprite ; Y register = sprite record index _CSFTS_Out rts _ClearSpriteFromTileStore - ldx _Sprites+TILE_STORE_ADDR_1,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but - and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in - stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10. - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_1,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x ; Clear the bit in the bit field. This seems wasteful, but +; and _SpriteBitsNot,y ; there is no indexed form of TSB/TRB and caching the value in +; stal TileStore+TS_SPRITE_FLAG,x ; a direct page location, only saves 1 or 2 cycles per and costs 10. +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_2,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_2,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_3,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_3,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_4,y - beq _CSFTS_Out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_4,y +; beq _CSFTS_Out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_5,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_5,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_6,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_6,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_7,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_7,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_8,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jsr _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_8,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jsr _PushDirtyTileX - ldx _Sprites+TILE_STORE_ADDR_9,y - beq :out - ldal TileStore+TS_SPRITE_FLAG,x - and _SpriteBitsNot,y - stal TileStore+TS_SPRITE_FLAG,x - jmp _PushDirtyTileX +; ldx _Sprites+TILE_STORE_ADDR_9,y +; beq :out +; ldal TileStore+TS_SPRITE_FLAG,x +; and _SpriteBitsNot,y +; stal TileStore+TS_SPRITE_FLAG,x +; jmp _PushDirtyTileX :out rts @@ -383,7 +583,7 @@ _DoPhase2 RebuildSpriteArray lda SpriteMap ; Get the bit field -; Unrolled loop to get the sprite index values that coorespond to the set bit positions +; Unrolled loop to get the sprite index values that correspond to the set bit positions pea $FFFF ; end-of-list marker ]step equ 0 @@ -442,6 +642,20 @@ _RenderSprites ; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls ; by +X, but the sprite moves by -X (so it's relative position is unchanged), then ; it does NOT need to be marked as dirty. +; +; OPTIMIZATION NOTE: At this point, a decent chunk of per-tile time is spent cupdating the sprite flgas +; for a given TileStore entry. When a sprite needs to be redrawn (such as when the +; screen scrolls), the code marks every tile the sprite was on as no longer occupied +; and then marks the occupied tiles. While simple, this is very redundent when the +; screen in scrolling slowly since it is very likely that the same sprite covers the +; exact same tiles. Each pair of markings requires 35 cycles, so a basic 16x16 sprite +; could save >300 cycles per frame. With 4 or 5 sprites on screen, the saving passes +; our 1% threshold for useful optimizations. +; +; Since we cache the tile location and effective sprite coverage, we need a fast +; way to compare the old and new positions and get a list of the new tiles the sprite +; occupies and old locations that it no longer covers. It's possible that just testing +; for equality would be the easiest win to know when we can skip everything. stz forceSpriteFlag lda StartX @@ -531,10 +745,15 @@ _CacheSpriteBanks ora #^TileStore sta TileStoreBankAndTileDataBank + lda #>TileStore + and #$FF00 + ora #^TileStore + sta TileStoreBankDoubled + rts ; This is 13 blocks wide -SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52 +SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; A = x coordinate ; Y = y coordinate @@ -574,11 +793,19 @@ SPRITE_PLANE_SPAN equ VBUFF_STRIDE_BYTES ; 52 ; it's tile information, or changing its position. ; ; X = sprite index +_stamp_step dw 0,12,24,36 _PrecalcAllSpriteInfo lda _Sprites+SPRITE_ID,x - and #$3E00 +; and #$3E00 xba - sta _Sprites+SPRITE_DISP,x ; use bits 9 through 13 for full dispatch + and #$0006 + tay + lda _Sprites+VBUFF_ADDR,x + clc + adc _stamp_step,y + sta _Sprites+SPRITE_DISP,x + +; Set the ; Set the sprite's width and height lda #4 @@ -673,19 +900,26 @@ RemoveSprite ENT rtl _RemoveSprite + cmp #MAX_SPRITES + bcc :ok + rts + +:ok + asl tax -_RemoveSpriteX lda _Sprites+SPRITE_STATUS,x ora #SPRITE_STATUS_REMOVED sta _Sprites+SPRITE_STATUS,x + rts ; Update the sprite's flags. We do not allow the size of a sprite to be changed. That requires ; the sprite to be removed and re-added. ; ; A = Sprite ID -; X = Sprite Tile ID and Flags +; X = New Sprite Flags +; Y = New Sprite Stamp Address UpdateSprite ENT phb phk @@ -695,36 +929,34 @@ UpdateSprite ENT rtl _UpdateSprite - phx ; swap X/A to be more efficient - tax - pla - -_UpdateSpriteX - cpx #MAX_SPRITES*2 ; Make sure we're in bounds + cmp #MAX_SPRITES bcc :ok rts :ok -_UpdateSpriteXnc - cmp _Sprites+SPRITE_ID,x ; Don't do anything if there is no change - beq :no_sprite_change + phx ; Save X to swap into A + asl + tax + pla + cmp _Sprites+SPRITE_ID,x ; If the flags changed, need to redraw the sprite + bne :sprite_flag_change ; on the next frame + tya + cmp _Sprites+VBUFF_ADDR,x ; Did the stamp change? + bne :sprite_stamp_change + rts ; Nothing changed, so just return + +:sprite_flag_change sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor - jsr _GetBaseTileAddr ; This applies the TILE_ID_MASK - cmp _Sprites+TILE_DATA_OFFSET,x - beq :no_tile_change - sta _Sprites+TILE_DATA_OFFSET,x + tya +:sprite_stamp_change + sta _Sprites+VBUFF_ADDR,x ; Just save this to stay in sync - jsr _PrecalcAllSpriteInfo ; Cache stuff - jsr _DrawSpriteSheet ; Render the sprite into internal space if the tile id has changed - -:no_tile_change - lda _Sprites+SPRITE_STATUS,x + lda _Sprites+SPRITE_STATUS,x ; Mark this sprite as updated ora #SPRITE_STATUS_UPDATED sta _Sprites+SPRITE_STATUS,x -:no_sprite_change - rts + jmp _PrecalcAllSpriteInfo ; Cache stuff and return ; Move a sprite to a new location. If the tile ID of the sprite needs to be changed, then ; a full remove/add cycle needs to happen @@ -741,17 +973,16 @@ MoveSprite ENT rtl _MoveSprite - phx ; swap X/A to be more efficient - tax - pla - -_MoveSpriteX - cpx #MAX_SPRITES*2 ; Make sure we're in bounds + cmp #MAX_SPRITES bcc :ok rts :ok -_MoveSpriteXnc + phx ; Save X to swap into A + asl + tax + pla + cmp _Sprites+SPRITE_X,x bne :changed1 sta _Sprites+SPRITE_X,x ; Update the X coordinate @@ -766,13 +997,11 @@ _MoveSpriteXnc :changed2 sta _Sprites+SPRITE_Y,x ; Update the Y coordinate - jsr _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values - lda _Sprites+SPRITE_STATUS,x ora #SPRITE_STATUS_MOVED sta _Sprites+SPRITE_STATUS,x - rts + jmp _PrecalcAllSpriteInfo ; Can be specialized to only update (x,y) values ; Sprite data structures. We cache quite a few pieces of information about the sprite ; to make calculations faster, so this is hidden from the caller. @@ -797,22 +1026,25 @@ SPRITE_STATUS_UPDATED equ $0004 ; Sprite's non-position attributes were SPRITE_STATUS_REMOVED equ $0008 ; Sprite has been removed. SPRITE_STATUS equ {MAX_SPRITES*0} -TILE_DATA_OFFSET equ {MAX_SPRITES*2} -VBUFF_ADDR equ {MAX_SPRITES*4} ; Fixed address in sprite/mask banks +; TILE_DATA_OFFSET equ {MAX_SPRITES*2} +VBUFF_ADDR equ {MAX_SPRITES*4} ; Base address of the sprite's stamp in the data/mask banks SPRITE_ID equ {MAX_SPRITES*6} SPRITE_X equ {MAX_SPRITES*8} SPRITE_Y equ {MAX_SPRITES*10} -TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} -TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} -TILE_STORE_ADDR_3 equ {MAX_SPRITES*16} -TILE_STORE_ADDR_4 equ {MAX_SPRITES*18} -TILE_STORE_ADDR_5 equ {MAX_SPRITES*20} -TILE_STORE_ADDR_6 equ {MAX_SPRITES*22} -TILE_STORE_ADDR_7 equ {MAX_SPRITES*24} -TILE_STORE_ADDR_8 equ {MAX_SPRITES*26} -TILE_STORE_ADDR_9 equ {MAX_SPRITES*28} -TILE_STORE_ADDR_10 equ {MAX_SPRITES*30} -SPRITE_DISP equ {MAX_SPRITES*32} ; pre-calculated index for jmp (abs,x) based on sprite size +; TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} +TS_LOOKUP_INDEX equ {MAX_SPRITES*12} ; The index into the TileStoreLookup table corresponding to the top-left corner of the sprite +; TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} +TS_COVERAGE_SIZE equ {MAX_SPRITES*14} ; Index into the lookup table of how many TileStore tiles are covered by this sprite +;TILE_STORE_ADDR_3 equ {MAX_SPRITES*16} +TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X memory locations +;TILE_STORE_ADDR_4 equ {MAX_SPRITES*18} +;TILE_STORE_ADDR_5 equ {MAX_SPRITES*20} +;TILE_STORE_ADDR_6 equ {MAX_SPRITES*22} +;TILE_STORE_ADDR_7 equ {MAX_SPRITES*24} +;TILE_STORE_ADDR_8 equ {MAX_SPRITES*26} +;TILE_STORE_ADDR_9 equ {MAX_SPRITES*28} +;TILE_STORE_ADDR_10 equ {MAX_SPRITES*30} +SPRITE_DISP equ {MAX_SPRITES*32} ; cached address of the specific stamp based on flags SPRITE_CLIP_LEFT equ {MAX_SPRITES*34} SPRITE_CLIP_RIGHT equ {MAX_SPRITES*36} SPRITE_CLIP_TOP equ {MAX_SPRITES*38} diff --git a/src/Sprite2.s b/src/Sprite2.s index 82a6dd6..9d83bea 100644 --- a/src/Sprite2.s +++ b/src/Sprite2.s @@ -81,105 +81,206 @@ _LocalToTileStore ; ... ; ; For the Y-coordinate, we just use "mod 8" instead of "mod 4" -mdsOut rts +mdsOut2 + lda #6 ; Pick a value for a 0x0 tile sprite + sta _Sprites+TS_COVERAGE_SIZE,y ; zero the list of tile store addresses + rts + _MarkDirtySprite - - lda #0 - sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit - lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots - sta SpriteBit - lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield - bne mdsOut + bne mdsOut2 -; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined -; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that -; encloses the top-left on-screen corner of the sprite +; Add the first visible row of the sprite to the Y-scroll offset to find the first line in the +; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406] clc lda _Sprites+SPRITE_CLIP_TOP,y adc StartYMod208 ; Adjust for the scroll offset - tax ; cache - cmp #208 ; check if we went too far positive - bcc *+5 - sbc #208 + pha ; Cache + and #$FFF8 ; mask first to ensure LSR will clear the carry lsr - lsr ; This is the row in the Tile Store for top-left corner of the sprite - and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines + lsr + tax + lda TileStoreLookupYTable,x ; Even numbers from [0, 100] (50 elements) sta RowTop + pla -; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but -; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to -; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4 +; Get the position of the top edge within the tile and then add it to the sprite's height +; to calculate the number of tiles that are overlapped. We use the actual width and height +; values here so small sprites (like 4x4 bullets) only force an update to the actual tiles +; that are intersected, rather than assuming an 8x8 sprite always takes up that amount of +; space. txa and #$0007 - sta tmp0 ; save to adjust sprite origin + tax ; cache again. This is a bit faster than recalculating - lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 + adc _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 dec - clc - adc tmp0 and #$0018 sta AreaIndex -; Repeat to get the same information for the columns + txa + asl + tax + lda :vbuff_mul,x + sta tmp0 + +; Add the horizontal position to the horizontal offset to find the first column in the +; code field that needs to be drawn. The range of values is 0 to 159+163 = [0, 322] clc lda _Sprites+SPRITE_CLIP_LEFT,y adc StartXMod164 tax - cmp #164 - bcc *+5 - sbc #164 + and #$FFFC lsr - and #$FFFE ; Same pre-multiply by 2 for later - sta ColLeft +; sta ColLeft ; Even numbers from [0, 160] (80 elements) + adc RowTop + sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table + + +; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier +; in the buffer based on the horizontal offset and move up for each vertical offset. txa and #$0003 - sta tmp1 ; save to adjust sprite origin + tax - lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 + adc tmp0 ; add to the vertical offset + +; Subtract this value from the SPRITE_DISP address + + eor #$FFFF ; A = -X - 1 + sec ; C = 1 + adc _Sprites+SPRITE_DISP,y ; A = SPRITE_DISP + (-X - 1) + 1 = SPRITE_DISP - X + + sta VBuffOrigin ; this is the final (adjusted) origin for this sprite + +; Load the base address of the appropriate TS_VBUFF_? offset for this sprite index and +; store it as an indirect address. + + lda _Sprites+TS_VBUFF_BASE_ADDR,y + sta tmp0 + +; We know the starting corner of the TileStore. Now, we need to figure out now many tiles +; the sprite covers. This is a function of the sprite's width and height and the specific +; location of the upper-left corner of the sprite within the corner tile. + + txa + adc _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 dec - clc - adc tmp1 + and #$000C lsr ; max value = 4 = 0x04 - and #$0006 - ora AreaIndex - sta AreaIndex + ora AreaIndex ; merge into the area index + +; No need to copy the TileStore addresses into the Sprite's TILE_STORE_ADDR values. Just +; hold a copy of the corner offset into the lookup table and the sprite's size in tiles. +; Then, when we need to erase we can just lookup the values in the TileStoreLookup table. + + sta _Sprites+TS_COVERAGE_SIZE,y + tax +; lda TileStoreBaseIndex +; sta _Sprites+TS_LOOKUP_INDEX,y + +; Jump to the appropriate marking routine + + jmp (:mark,x) + +mdsOut rts +;_MarkDirtySprite +; +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit +; lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots +; sta SpriteBit + +; lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield +; bne mdsOut + +; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined +; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that +; encloses the top-left on-screen corner of the sprite + +; clc +; lda _Sprites+SPRITE_CLIP_TOP,y +; adc StartYMod208 ; Adjust for the scroll offset +; tax ; cache +; cmp #208 ; check if we went too far positive +; bcc *+5 +; sbc #208 +; lsr +; lsr ; This is the row in the Tile Store for top-left corner of the sprite +; and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines +; sta RowTop + +; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but +; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to +; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4 + +; txa +; and #$0007 +; sta tmp0 ; save to adjust sprite origin + +; lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 +; dec +; clc +; adc tmp0 +; and #$0018 +; sta AreaIndex + +; Repeat to get the same information for the columns + +; clc +; lda _Sprites+SPRITE_CLIP_LEFT,y +; adc StartXMod164 +; tax +; cmp #164 +; bcc *+5 +; sbc #164 +; lsr +; and #$FFFE ; Same pre-multiply by 2 for later +; sta ColLeft + +; txa +; and #$0003 +; sta tmp1 ; save to adjust sprite origin; + +; lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 +; dec +; clc +; adc tmp1 +; lsr ; max value = 4 = 0x04 +; and #$0006 +; ora AreaIndex +; sta AreaIndex ; Calculate the modified origin address for the sprite. We need to look at the sprite flip bits ; to determine which of the four sprite stamps is the correct one to use. Then, offset that origin ; based on the (x, y) and (startx, starty) positions. - lda _Sprites+SPRITE_DISP,y ; Each stamp is 12 bytes - and #$0006 - tax - lda :stamp_step,x - clc - adc _Sprites+VBUFF_ADDR,y - sec - sbc tmp1 ; Subtract the horizontal within-tile displacement - asl tmp0 - ldx tmp0 - sec - sbc :vbuff_mul,x - sta VBuffOrigin - lda #^TileStore - sta tmp1 +; lda _Sprites+SPRITE_DISP,y ; Get the sprite's base display address +; sec +; sbc tmp1 ; Subtract the horizontal within-tile displacement +; asl tmp0 +; ldx tmp0 +; sec +; sbc :vbuff_mul,x +; sta VBuffOrigin +; lda #^TileStore +; sta tmp1 ; Dispatch to cover the tiles - ldx AreaIndex - jmp (:mark,x) +; ldx AreaIndex +; jmp (:mark,x) :mark dw :mark1x1,:mark1x2,:mark1x3,mdsOut dw :mark2x1,:mark2x2,:mark2x3,mdsOut dw :mark3x1,:mark3x2,:mark3x3,mdsOut dw mdsOut,mdsOut,mdsOut,mdsOut -:stamp_step dw 0,12,24,36 :vbuff_mul dw 0,52,104,156,208,260,312,364 + ; Dispatch to the calculated sizing ; Begin a list of subroutines to cover all of the valid sprite size combinations. This is all unrolled code, @@ -191,11 +292,170 @@ _MarkDirtySprite ; ; There *might* be some speed gained by pushing a list of :mark_R_C addressed onto the stack in the clipping routing ; and dispatching that way, but probably not... + +:mark1x1_v2 + + tax ; Get the TileStoreBaseIndex + + ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile + + lda SpriteBit ; Mark this tile as having this sprite + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda VBuffOrigin + sta (tmp0),y ; Fill in the slot for this sprite on this tile + + lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, mark it + bne exit1x1 + + ldx DirtyTileCount + tya + sta DirtyTiles,x + sta TileStore+TS_DIRTY,y + inx + inx + stx DirtyTileCount + +exit1x1 + rts + +:mark2x2_v2 + +; Put the TileStoreBaseIndex into the X-register + + tax + +; Push a sentinel value of the stack that we use to inline all of the dirty tile array updates faster +; and the end of this routine. + + pea #$0000 + +; Now, move through each of the TileStore locations and set the necessary fields. We have to do the +; following +; +; 1. Set the marker bit in the TS_SPRITE_FLAG so the renderer knows which vbuff addresses to load +; 2. Set the address of the sprite stamp graphics that are used. This can change every frame. +; 3. Mark the tile as dirty and put it on the list if it was marked dirty for the first time. + + ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile + + lda SpriteBit ; Mark this tile as having this sprite + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, queue it up + bne *+3 + phy + + lda VBuffOrigin + sta (tmp0),y ; Fill in the slot for this sprite on this tile + +; Move to the next tile + + ldy TileStoreLookup+2,x + + adc #4 ; Weave in the VBuffOrigin values to save a load every + sta (tmp0),y ; other iteration + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y + bne *+3 + phy + +; Third tile + + ldy TileStoreLookup+TS_LOOKUP_SPAN,x + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + + lda TileStore+TS_DIRTY,y + bne *+3 + phy + + lda VBuffOrigin + adc #SPRITE_PLANE_SPAN + sta (tmp0),y + +; Fourth tile + + ldy TileStoreLookup+TS_LOOKUP_SPAN+2,x + + adc #4+SPRITE_PLANE_SPAN + sta (tmp0),y + + lda SpriteBit + ora TileStore+TS_SPRITE_FLAG,y + sta TileStore+TS_SPRITE_FLAG,y + +; Lift this above the last TS_DIRTY check + + ldx DirtyTileCount + +; Check the TS_DIRTY flag for this tile. We handle it immediately, if needed + + lda TileStore+TS_DIRTY,y + bne skip + +; Now, update the Dirty Tile array + + tya + sta DirtyTiles,x + sta TileStore+TS_DIRTY,y + +skip + pla + beq :done1 + sta DirtyTiles+2,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done2 + sta DirtyTiles+4,x + tay + sta TileStore+TS_DIRTY,y + + pla + beq :done3 + sta DirtyTiles+6,x + tay + sta TileStore+TS_DIRTY,y + +; Maximum number of dirty tiles reached. Just fall through. + + pla + txa + adc #8 + sta DirtyTileCount + rts +:done3 + txa + adc #6 + sta DirtyTileCount + rts +:done2 + txa + adc #4 + sta DirtyTileCount + rts +:done1 + inx + inx + stx DirtyTileCount + + rts + :mark1x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_1,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_2,y rts ; NOTE: If we rework the _PushDirtyTile to use the Y register instead of the X register, we can @@ -209,112 +469,112 @@ _MarkDirtySprite :mark1x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_2,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_3,y rts :mark1x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_3,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_3,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_4,y rts :mark2x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_2,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_3,y rts :mark2x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_4,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_4,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_5,y rts :mark2x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_1_2 - sta _Sprites+TILE_STORE_ADDR_6,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_6,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_7,y rts :mark3x1 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_3,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_4,y rts :mark3x2 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_2_1 - sta _Sprites+TILE_STORE_ADDR_6,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_6,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_7,y rts :mark3x3 jsr :mark_0_0 - sta _Sprites+TILE_STORE_ADDR_1,y +; sta _Sprites+TILE_STORE_ADDR_1,y jsr :mark_1_0 - sta _Sprites+TILE_STORE_ADDR_2,y +; sta _Sprites+TILE_STORE_ADDR_2,y jsr :mark_2_0 - sta _Sprites+TILE_STORE_ADDR_3,y +; sta _Sprites+TILE_STORE_ADDR_3,y jsr :mark_0_1 - sta _Sprites+TILE_STORE_ADDR_4,y +; sta _Sprites+TILE_STORE_ADDR_4,y jsr :mark_1_1 - sta _Sprites+TILE_STORE_ADDR_5,y +; sta _Sprites+TILE_STORE_ADDR_5,y jsr :mark_2_1 - sta _Sprites+TILE_STORE_ADDR_6,y +; sta _Sprites+TILE_STORE_ADDR_6,y jsr :mark_0_2 - sta _Sprites+TILE_STORE_ADDR_7,y +; sta _Sprites+TILE_STORE_ADDR_7,y jsr :mark_1_2 - sta _Sprites+TILE_STORE_ADDR_8,y +; sta _Sprites+TILE_STORE_ADDR_8,y jsr :mark_2_2 - sta _Sprites+TILE_STORE_ADDR_9,y - lda #0 - sta _Sprites+TILE_STORE_ADDR_10,y +; sta _Sprites+TILE_STORE_ADDR_9,y +; lda #0 +; sta _Sprites+TILE_STORE_ADDR_10,y rts ; Begin List of subroutines to mark each tile offset diff --git a/src/SpriteRender.s b/src/SpriteRender.s index 39041fd..4eed2a3 100644 --- a/src/SpriteRender.s +++ b/src/SpriteRender.s @@ -1,23 +1,42 @@ +; Alternate entry point that takes arguments in registers instead of using a _Sprite +; record +; +; Y = VBUFF address +; X = Tile Data address +; A = Sprite Flags +_DrawSpriteStamp + sty tmp1 + stx tmp2 + and #DISP_MASK ; dispatch to all of the different orientations + sta tmp3 + jmp _DSSCommon + ; Function to render a sprite from a sprite definition into the internal data buffers ; ; X = sprite index -_DrawSpriteSheet +; _DrawSpriteSheet DISP_VFLIP equ $0004 ; hard code these because they are internal values DISP_HFLIP equ $0002 DISP_MASK equ $0018 ; Isolate the size bits - phx - - lda _Sprites+VBUFF_ADDR,x - sta tmp1 - - lda _Sprites+TILE_DATA_OFFSET,x - sta tmp2 - - lda _Sprites+SPRITE_DISP,x - and #DISP_MASK ; dispatch to all of the different orientations - sta tmp3 +; phx +; +; lda _Sprites+VBUFF_ADDR,x +; sta tmp1 +; +; lda _Sprites+TILE_DATA_OFFSET,x +; sta tmp2 +; +; lda _Sprites+SPRITE_DISP,x +; and #DISP_MASK ; dispatch to all of the different orientations +; sta tmp3 +; +; jsr _DSSCommon +; +; plx +; rts +_DSSCommon ; Set bank phb pea #^tiledata ; Set the bank to the tile data @@ -58,8 +77,6 @@ DISP_MASK equ $0018 ; Isolate the size bits ; Restore bank plb ; pop extra byte plb - - plx rts ; ; X = _Sprites array offset diff --git a/src/blitter/Tables.s b/src/blitter/Tables.s index 8493199..44bf70c 100644 --- a/src/blitter/Tables.s +++ b/src/blitter/Tables.s @@ -254,8 +254,16 @@ NextCol ; A double-sized table of lookup values. This is basically the cross-product of TileStoreYTable and ; NextCol. If is double-width and double-height so that, if we know a tile's address position -; of (X + 41*Y), then any relative tile store address can be looked up by adding a constan value. -;TileStore2DLookup ds {26*41*2}*4 +; of (X + 41*Y), then any relative tile store address can be looked up by adding a constant value. +; +; 50 rows by 80 columns + 2 extra rows and columns +TS_LOOKUP_WIDTH equ 80 +TS_LOOKUP_HEIGHT equ 50 +TS_LOOKUP_SPAN equ {TS_LOOKUP_WIDTH+2} +TS_LOOKUP_ROWS equ {TS_LOOKUP_HEIGHT+2} + +TileStoreLookupYTable ds {TS_LOOKUP_HEIGHT*2} +TileStoreLookup ds {TS_LOOKUP_SPAN*TS_LOOKUP_ROWS*2} ; This is a double-length table that holds the right-edge adresses of the playfield on the physical ; screen. At most, it needs to hold 200 addresses for a full height playfield. It is double-length @@ -296,7 +304,5 @@ BG1YOffsetTable lup 26 dw 1,1,1,2,2,2,2,2,1,1,1,0,0,0,0,0 --^ - - - - +; Table of base VBUFF addresses for each sprite stamp slot +VBuffAddrTable ds 2*VBUFF_SLOT_COUNT \ No newline at end of file diff --git a/src/blitter/Template.s b/src/blitter/Template.s index ac7662b..82446a8 100644 --- a/src/blitter/Template.s +++ b/src/blitter/Template.s @@ -160,7 +160,7 @@ SetScreenRect sty ScreenHeight ; Save the screen height and ; Generalized routine that calculates the on-screen address of the tiles and takes the ; StartX and StartY values into consideration. This routine really exists to support -; the dirty tile rendering mode and the tiles *must* be aligned with the playfield. +; the dirty tile rendering mode and the tiles *must* be aligned with the playfield. ; That is, StartX % 4 == 0 and StartY % 8 == 0. If these conditions are not met, then ; screen will not render correctly. _RecalcTileScreenAddrs diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s index ba76c3f..45c45ac 100644 --- a/src/blitter/Tiles.s +++ b/src/blitter/Tiles.s @@ -41,17 +41,6 @@ TILE_CTRL_MASK equ $FE00 TILE_PROC_MASK equ $F800 ; Select tile proc for rendering -; Temporary direct page locatinos used by some of the complex tile renderers - -_X_REG equ tiletmp -_Y_REG equ tiletmp+2 -_T_PTR equ tiletmp+4 ; Copy of the tile address pointer -_BASE_ADDR equ tiletmp+6 ; Copy of BTableLow for this tile -_SPR_X_REG equ tiletmp+8 ; Cache address of sprite plane source for a tile -_JTBL_CACHE equ tiletmp+10 ; Cache the offset to the exception handler for a column -_OP_CACHE equ tiletmp+12 ; Cache of a relevant operand / oeprator -_TILE_ID equ tiletmp+14 ; Copy of the tile descriptor - ; Low-level function to take a tile descriptor and return the address in the tiledata ; bank. This is not too useful in the fast-path because the fast-path does more ; incremental calculations, but it is handy for other utility functions @@ -113,56 +102,36 @@ _RenderTileBG1 ; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile ; Store record contains all of the low-level information that's needed to call the renderer. ; +; This routine sets the direct page register to the second page since we use that space to +; build and cache tile and sprite data, when necessary ; Y = address of tile _RenderTile2 - pea >TileStore ; Need that addressing flexibility here. Caller is responsible for restoring bank reg - plb - plb - txy ; We can be better than this.... + lda TileStore+TS_SPRITE_FLAG,x ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not + bne do_dirty_sprite - lda TileStore+TS_TILE_ID,y ; build the finalized tile descriptor - ldx TileStore+TS_SPRITE_FLAG,y ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not - beq :nosprite +; Handle the non-sprite tile blit -; txa -; jsr BuildActiveSpriteArray ; Build the max 4 array of active sprites for this tile -; sta ActiveSpriteCount + sep #$20 + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later - lda TileStore+TS_VBUFF_ARRAY_ADDR,y ; Scratch space - sta _SPR_X_REG - phy - ldy spriteIdx - lda (_SPR_X_REG),y - sta _SPR_X_REG - ply + lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000) + sta _BASE_ADDR+1 ; so we can get by just copying the high byte + rep #$20 - lda TileStore+TS_TILE_ID,y - ora #TILE_SPRITE_BIT -; ldx TileStore+TS_VBUFF_ARRAY_ADDR,y -; stx _SPR_X_REG - -:nosprite - sta _TILE_ID ; Some tile blitters need to get the tile descriptor - and #TILE_CTRL_MASK - xba - tax - ldal TileProcs,x ; load and patch in the appropriate subroutine + lda TileStore+TS_BASE_TILE_DISP,x ; Get the address of the renderer for this tile stal :tiledisp+1 - ldx TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) + lda TileStore+TS_TILE_ID,x + sta _TILE_ID ; Some tile blitters need to get the tile descriptor - sep #$20 ; load the bank of the target code field line - lda TileStore+TS_CODE_ADDR_HIGH,y + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) pha - rep #$20 - lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field - pha - lda TileStore+TS_BASE_ADDR,y ; load the base address of the code field - sta _BASE_ADDR - lda TileStore+TS_WORD_OFFSET,y - ply - plb ; set the bank + lda TileStore+TS_WORD_OFFSET,x + plx + plb ; set the bank to the code field that will be updated ; B is set to the correct code field bank ; A is set to the tile word offset (0 through 80 in steps of 4) @@ -171,6 +140,194 @@ _RenderTile2 :tiledisp jmp $0000 ; render the tile +; Let's make a macro helper for the bit test tree +; dobit src_offset,dest,next_target,end_target +dobit MAC + beq last_bit + ldx: ]1,y + stx ]2 + jmp ]3 +last_bit ldx: ]1,y + stx ]2 + jmp ]4 + EOM + +; The sprite code is just responsible for quickly copying all of the sprite data +; into the direct page temp area. + +do_dirty_sprite + pei TileStoreBankAndTileDataBank ; Special value that has the TileStore bank in LSB and TileData bank in MSB + plb + +; Cache a couple of values into the direct page, but preserve the Accumulator + + ldy TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + sty tileAddr + +; This is very similar to the code in the dirty tile renderer, but we can't reuse +; because that code draws directly to the graphics screen, and this code draws +; to a temporary budder that has a different stride. + + ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile + + lsr + bcc :loop_0_bit_1 + dobit $0000;sprite_ptr0;:loop_1_bit_1;CopyOneSprite + +:loop_0_bit_1 lsr + bcc :loop_0_bit_2 + dobit $0002;sprite_ptr0;:loop_1_bit_2;CopyOneSprite + +:loop_0_bit_2 lsr + bcc :loop_0_bit_3 + dobit $0004;sprite_ptr0;:loop_1_bit_3;CopyOneSprite + +:loop_0_bit_3 lsr + bcc :loop_0_bit_4 + dobit $0006;sprite_ptr0;:loop_1_bit_4;CopyOneSprite + +:loop_0_bit_4 lsr + bcc :loop_0_bit_5 + dobit $0008;sprite_ptr0;:loop_1_bit_5;CopyOneSprite + +:loop_0_bit_5 lsr + bcc :loop_0_bit_6 + dobit $000A;sprite_ptr0;:loop_1_bit_6;CopyOneSprite + +:loop_0_bit_6 lsr + bcc :loop_0_bit_7 + dobit $000C;sprite_ptr0;:loop_1_bit_7;CopyOneSprite + +:loop_0_bit_7 lsr + bcc :loop_0_bit_8 + dobit $000E;sprite_ptr0;:loop_1_bit_8;CopyOneSprite + +:loop_0_bit_8 lsr + bcc :loop_0_bit_9 + dobit $0010;sprite_ptr0;:loop_1_bit_9;CopyOneSprite + +:loop_0_bit_9 lsr + bcc :loop_0_bit_10 + ldx: $0012,y + stx spriteIdx + cmp #0 + jne :loop_1_bit_10 + jmp CopyOneSprite + +:loop_0_bit_10 lsr + bcc :loop_0_bit_11 + dobit $0014;sprite_ptr0;:loop_1_bit_11;CopyOneSprite + +:loop_0_bit_11 lsr + bcc :loop_0_bit_12 + dobit $0016;sprite_ptr0;:loop_1_bit_12;CopyOneSprite + +:loop_0_bit_12 lsr + bcc :loop_0_bit_13 + dobit $0018;sprite_ptr0;:loop_1_bit_13;CopyOneSprite + +:loop_0_bit_13 lsr + bcc :loop_0_bit_14 + dobit $001A;sprite_ptr0;:loop_1_bit_14;CopyOneSprite + +:loop_0_bit_14 lsr + bcc :loop_0_bit_15 + dobit $001C;sprite_ptr0;:loop_1_bit_15;CopyOneSprite + +:loop_0_bit_15 ldx: $001E,y + stx spriteIdx + jmp CopyOneSprite + +; We can optimize later, for now just copy the sprite data and mask into its own +; direct page buffer and combine with the tile data later + +; We set up direct page pointers to the mask bank and use the bank register for the +; data. +CopyFourSpritesAbove + +; Copy three sprites into a temporary direct page buffer +LDA_IL equ $A7 ; lda [dp] +LDA_ILY equ $B7 ; lda [dp],y +AND_IL equ $27 ; and [dp] +AND_ILY equ $37 ; and [dp],y + +CopyThreeSprites +]line equ 0 + lup 8 + ldy #]line*SPRITE_PLANE_SPAN + lda (spriteIdx+8),y + db AND_ILY,spriteIdx+4 ; Can't use long indirect inside LUP because of ']' + ora (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4} + + db LDA_ILY,spriteIdx+8 + db AND_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4} + + ldy #]line*SPRITE_PLANE_SPAN+2 + lda (spriteIdx+8),y + db AND_ILY,spriteIdx+4 + ora (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4}+2 + + db LDA_ILY,spriteIdx+8 + db AND_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ +; jmp FinishTile + +; Copy two sprites into a temporary direct page buffer +CopyTwoSprites +]line equ 0 + lup 8 + ldy #]line*SPRITE_PLANE_SPAN + lda (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4} + + db LDA_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4} + + ldy #]line*SPRITE_PLANE_SPAN+2 + lda (spriteIdx+4),y + db AND_ILY,spriteIdx+0 + ora (spriteIdx+0),y + sta tmp_sprite_data+{]line*4}+2 + + db LDA_ILY,spriteIdx+4 + db AND_ILY,spriteIdx+0 + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ +; jmp FinishTile + +; Copy a single piece of sprite data into a temporary direct page . X = spriteIdx +CopyOneSprite +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 + + ldal spritemask+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_mask+{]line*4} + ldal spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ + +; jmp FinishTile + ; Reference all of the tile rendering subroutines defined in the TileXXXXX files. Each file defines ; 8 entry points: ; @@ -518,7 +675,7 @@ _CopyBG1Tile ; a tile. ; ; TileStore+TS_TILE_ID : Tile descriptor -; TileStore+TS_DIRTY : $FFFF is clean, otherwise stores a back-reference to the DirtyTiles array +; TileStore+TS_DIRTY : $0000 is clean, any other value indicated a dirty tile ; TileStore+TS_TILE_ADDR : Address of the tile in the tile data buffer ; TileStore+TS_CODE_ADDR_LOW : Low word of the address in the code field that receives the tile ; TileStore+TS_CODE_ADDR_HIGH : High word of the address in the code field that receives the tile @@ -590,11 +747,14 @@ InitTiles lda #0 stal TileStore+TS_TILE_ID,x ; clear the tile store with the special zero tile stal TileStore+TS_TILE_ADDR,x - stal TileStore+TS_TILE_DISP,x - stal TileStore+TS_SPRITE_FLAG,x ; no sprites are set at the beginning - lda #$FFFF ; none of the tiles are dirty - stal TileStore+TS_DIRTY,x + stal TileStore+TS_DIRTY,x ; none of the tiles are dirty + + lda DirtyTileProcs ; Fill in with the first dispatch address + stal TileStore+TS_DIRTY_TILE_DISP,x + + lda TileProcs ; Same for non-dirty, non-sprite base case + stal TileStore+TS_BASE_TILE_DISP,x lda :vbuff ; array of sprite vbuff addresses per tile stal TileStore+TS_VBUFF_ARRAY_ADDR,x @@ -700,7 +860,16 @@ _SetTile ldal TileStore+TS_TILE_ID,x and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value xba - stal TileStore+TS_TILE_DISP,x + tay + lda DirtyTileProcs,y + stal TileStore+TS_DIRTY_TILE_DISP,x + + ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address + and #TILE_CTRL_MASK + xba + tay + lda TileProcs,y + stal TileStore+TS_BASE_TILE_DISP,x ; txa ; Add this tile to the list of dirty tiles to refresh jmp _PushDirtyTileX ; on the next call to _ApplyTiles @@ -731,11 +900,12 @@ _PushDirtyTile ; alternate entry point if the x-register is already set _PushDirtyTileX ldal TileStore+TS_DIRTY,x - bpl :occupied2 + bne :occupied2 - txa ; any non-negative value will work, this saves work below + inc ; any non-zero value will work stal TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value + txa ldx DirtyTileCount ; 4 sta DirtyTiles,x ; 6 inx ; 2 @@ -783,7 +953,15 @@ ApplyTiles ENT plb rtl +; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code +; field. In this function we switch to the second direct page which holds the temporary +; working buffers for tile rendering. _ApplyTiles + tdc + clc + adc #$100 ; move to the next page + tcd + bra :begin :loop @@ -801,4 +979,124 @@ _ApplyTiles :begin ldy DirtyTileCount bne :loop - rts \ No newline at end of file + + tdc ; Move back to the original direct page + sec + sbc #$100 + tcd + rts + +; To make processing the tile faster, we do them in chunks of eight. This allows the loop to be +; unrolled, which means we don't have to keep track of the register value and makes it faster to +; clear the dirty tile flag after being processed. + + tdc ; Move to the dedicated direct page for tile rendering + clc + adc #$100 + tcd + + phb ; Save the current bank + tsc + sta tmp0 ; Save it on the direct page + bra at_loop + +; The DirtyTiles array and the TileStore information is in the Tile Store bank. Because we +; process up to 8 tiles as a time and the tile code sets the bank register to the target +; code field bank, we need to restore the bank register each time. So, we pre-push +; 8 copies of the TileStore bank onto the stack. + + +at_exit + tdc ; Move back to the original direct page + sec + sbc #$100 + tcd + + plb ; Restore the original data bank and return + rts +dt_base equ $FE ; top of second direct page space + +at_loop + lda tmp0 + tcs + + lda DirtyTileCount ; This is pre-multiplied by 2 + beq at_exit ; If there are no items, exit + + ldx TileStoreBankDoubled + phx + phx + phx + + cmp #16 ; If there are >= 8 elements, then + bcs at_chunk ; do a full chunk + + stz DirtyTileCount ; Otherwise, this pass will handle them all + tax + jmp (at_table,x) +at_table da at_exit,at_one,at_two,at_three + da at_four,at_five,at_six,at_seven + +at_chunk sec + sbc #16 + sta DirtyTileCount ; Fall through + +; Because all of the registers get used in the _RenderTile2 subroutine, we +; push the values from the DirtyTiles array onto the stack and then pop off +; the values as we go + + ldy dt_base ; Reload the base index + ldx DirtyTiles+14,y ; Load the TileStore offset + stz TileStore+TS_DIRTY,x ; Clear this tile's dirty flag + jsr _RenderTile2 ; Draw the tile + plb ; Reset the data bank to the TileStore bank + +at_seven + ldy dt_base + ldx DirtyTiles+12,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_six + ldy dt_base + ldx DirtyTiles+10,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_five + ldy dt_base + ldx DirtyTiles+8,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_four + ldy dt_base + ldx DirtyTiles+6,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_three + ldy dt_base + ldx DirtyTiles+4,y + jsr _RenderTile2 + plb + +at_two + ldy dt_base + ldx DirtyTiles+2,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + +at_one + ldy dt_base + ldx DirtyTiles+0,y + stz TileStore+TS_DIRTY,x + jsr _RenderTile2 + plb + + jmp at_loop diff --git a/src/blitter/Tiles10000.s b/src/blitter/Tiles10000.s index df1b0ab..8e761d3 100644 --- a/src/blitter/Tiles10000.s +++ b/src/blitter/Tiles10000.s @@ -46,6 +46,47 @@ _TBApplySpriteData --^ rts +_TBApplySpriteDataOne + ldx spriteIdx +]line equ 0 + lup 8 + lda blttmp+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts + +_TBApplySpriteDataTwo +]line equ 0 + lup 8 + lda blttmp+{]line*4} + ldx spriteIdx+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + ldx spriteIdx + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + ldx spriteIdx+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + ldx spriteIdx + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts + ; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is ; because we can avoid needing to use both the X and Y registers during the compositing process and ; reserve Y to hold the code field address.