diff --git a/demos/tool/App.Main.s b/demos/tool/App.Main.s index 068c9ab..8c3e922 100644 --- a/demos/tool/App.Main.s +++ b/demos/tool/App.Main.s @@ -39,6 +39,26 @@ ScreenY equ 2 pea #TSZelda _GTELoadTileSet +; Create stamps for the sprites we are going to use +HERO_SPRITE_1 equ SPRITE_16X16+1 +HERO_SLOT equ 0 + + pea HERO_SPRITE_1 ; sprinte id + pea VBUFF_SPRITE_START ; vbuff address + _GTECreateSpriteStamp + +; Create sprites + pea HERO_SPRITE_1 ; sprite id + pea #10 ; screen x-position (<256) + pea #8 ; screen y-position (<256) + pea HERO_SLOT ; sprite slot (0 - 15) + _GTEAddSprite + + pea HERO_SLOT ; update the sprite in this slot + pea $0000 ; with these flags (h/v flip) + pea VBUFF_SPRITE_START ; and use this stamp + _GTEUpdateSprite + ; Manually fill in the 41x26 tiles of the TileStore with a test pattern. ldx #0 @@ -71,15 +91,10 @@ ScreenY equ 2 ; Set the origin of the screen - lda #3 - sta ScreenX - lda #10 + stz ScreenX + lda #63 sta ScreenY - pea #3 - pea #10 - _GTESetBG0Origin - ; Very simple actions :evt_loop pha ; space for result, with pattern diff --git a/src/Defs.s b/src/Defs.s index 77479c7..bd81382 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -82,7 +82,7 @@ BG1TileMapPtr equ 86 SCBArrayPtr equ 90 ; Used for palette binding SpriteBanks equ 94 ; Bank bytes for the sprite data and sprite mask -LastRender equ 96 ; Record which reder function was last executed +LastRender equ 96 ; Record which render function was last executed ; gap SpriteMap equ 100 ; Bitmap of open sprite slots. ActiveSpriteCount equ 102 @@ -96,7 +96,8 @@ LastKey equ 116 LastTick equ 118 ForceSpriteFlag equ 120 -Next equ 122 +VBuffArrayPtr equ 122 +SpriteRemovedFlag equ 126 ; Indicate if any sprites were removed this frame activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames) ; tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers @@ -241,3 +242,4 @@ ScreenModeWidth EXT ScreenModeHeight EXT _SpriteBits EXT _SpriteBitsNot EXT +VBuffArrayAddr EXT diff --git a/src/Render.s b/src/Render.s index fe62c45..46d2437 100644 --- a/src/Render.s +++ b/src/Render.s @@ -20,6 +20,8 @@ ; It's important to do _ApplyBG0YPos first because it calculates the value of StartY % 208 which is ; used in all of the other loops _Render + stz SpriteRemovedFlag ; If we remove a sprite, then we need to flag a rebuild for the next frame + jsr _ApplyBG0YPos ; Set stack addresses for the virtual lines to the physical screen ; jsr _ApplyBG1YPos @@ -90,6 +92,12 @@ _Render stz DirtyBits stz LastRender ; Mark that a full render was just performed + + lda SpriteRemovedFlag ; If any sprite was removed, set the rebuild flag + beq :no_removal + lda #DIRTY_BIT_SPRITE_ARRAY + sta DirtyBits +:no_removal rts ; The _ApplyTilesFast is the same as _ApplyTiles, but we use the _RenderTileFast subroutine diff --git a/src/Sprite.s b/src/Sprite.s index d031a9b..aea622b 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -31,6 +31,178 @@ InitSprites jsr _CacheSpriteBanks rts + +; _RenderSprites +; +; The function is responsible for updating all of the rendering information based on any changes +; that occured to the sprites on this frame. Sprite handling is one of the most expensive and +; complicated pieces of the rendering pipeline, so these functions are aggressively simplified and +; optimized. +; +; The sprite rendering pipeline is: +; +; 0. Check if any new sprites have been added by testing the DIRTY_BIT_SPRITE_ARRAY. If so, then +; the activeSpriteList (a 32-byte array on the direct page) is rebuilt from the SpriteBits bitmap +; word. +; +; Next, the activeSpriteList is scanned for changes to specific sprites. If the screen has been +; scrolled, then every sprite is considered to have the SPRITE_STATUS_MOVED flag set. +; +; 1. If a sprite is marked as (SPRITE_STATUS_MOVED or SPRITE_STATUS_UPDATED or SPRITE_STATUS_ADDED) and not SPRITE_STATUS_REMOVED +; A. Calculate the TS_COVERAGE_SIZE, TS_LOOKUP_INDEX, and TS_VBUFF_BASE for the sprite +; B. For each tile the sprite overlaps with: +; i. Set its bit in the TileStore's TS_SPRITE_FLAG +; ii. Add the tile to the DirtyTile list +; iii. Set the VBUFF address for the sprite block +; C. If the sprite is not marked as SPRITE_STATUS_ADDED +; i. For each old tile the sprite overlaps with +; a. If it is not marked in the DirtyTile list +; * Clear its bit from the TileStore's TS_SPRITE_FLAG +; * Add the tile to the DirtyTile list +; +; 2. If a sprite is marked as SPRITE_STATUS_REMOVED, then +; A. Clear its bit from the SpriteBits bitmap +; B. For each tile the sprite overlaps with: +; i. Clear its bit from the TileStore's TS_SPRITE_FLAG +; ii. Add the tile to the DirtyTile list +; C. Clear the SPRITE_STATUS flags (work complete) +; +; 3. For each tile on the Dirty Tile list +; A. Place the sprite VBUFF addresses in TS_VBUFF_ADDR_0 through TS_VBUFF_ADDR_3 and set TS_VBUFF_ADDR_COUNT +; +; It is important that this work is done *prior* to any tile map updates so that we can interate over the +; DirtyTile list and *know* that it only contains tiles that are impacted by sprite changes. +_RenderSprites + +; Check to see if any sprites have been added or removed. If so, then we regenerate the active +; sprite list. Since adding and removing sprites is rare, this is a worthwhile tradeoff, because +; there are several places where we want to iterate over the all of the sprites, and having a list +; and not have to constantly load and test the SPRITE_STATUS just to skip unused slots can help +; streamline the code. + + lda #DIRTY_BIT_SPRITE_ARRAY + trb DirtyBits ; clears the flag, if it was set + beq :no_rebuild + jsr RebuildSpriteArray + +:no_rebuild + +; First step is to look at the StartX and StartY values. If the screen has scrolled, then it has +; the same effect as moving all of the sprites. +; +; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls +; by +X, but the sprite moves by -X (so it's relative position is unchanged), then +; it does NOT need to be marked as dirty. + + stz ForceSpriteFlag + lda StartX + cmp OldStartX + bne :force_update + + lda StartY + cmp OldStartY + beq :no_change + +:force_update + lda #SPRITE_STATUS_MOVED + sta ForceSpriteFlag +:no_change + +; Dispatch to the update process for sprites. By pre-building the list, we know exactly +; how many sprite to process and they are in a contiguous array. So we don't have to keep +; track of an iteration variable + + ldx ActiveSpriteCount + jmp (phase1,x) + +; Implement the logic for updating sprite and tile rendering information. Each iteration of the +; ActiveSpriteCount will call this routine with the Y-register set to the sprite index +tmpY equ tmp15 +tmpA equ tmp14 +_DoPhase1 + lda _Sprites+SPRITE_STATUS,y + ora ForceSpriteFlag + sta tmpA + sty tmpY + +; First step, if a sprite is being removed, then we just have to clear its old tile information +; and mark the tiles it overlapped as dirty. + + bit #SPRITE_STATUS_REMOVED + beq :no_clear + + lda _SpriteBits,y ; Clear from the sprite bitmap + sta SpriteRemovedFlag ; Stick a non-zero value here + trb SpriteMap + + jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done + +; Need to calculate new VBUFF information. The could be reuqired for UPDATED, ADDED or MOVED +; sprites, so we do it unconditionally. +:no_clear + jsr _CalcDirtySprite + +; If the sprite is marked as ADDED, then it does not need to have its old tile locations cleared + lda tmpA + bit #SPRITE_STATUS_ADDED + bne :no_move + jsr _ClearSpriteFromTileStore + ldy tmpY + +; Anything else (MOVED, UPDATED, ADDED) will need to have the VBUFF information updated and the +; current tiles marked for update +:no_move + jmp _MarkDirtySpriteTiles + +; Once all of the sprite values have been calculated, we need to scan the dirty tile list and +; collapse the sprite information down to no more than 4 vbuff references per tile. We used to +; do this on the fly in the renderer, but that required differentiating between tile with and +; without sprites in the core rendering function. My lifting this up, we simplify the core code +; and possible open up some optimization opportunities. +_SetTileStoreVBuffAddrs + + + +; Dispatch table. It's unintersting, so it's tucked out of the way +phase1 dw :phase1_0 + dw :phase1_1,:phase1_2,:phase1_3,:phase1_4 + dw :phase1_5,:phase1_6,:phase1_7,:phase1_8 + dw :phase1_9,:phase1_10,:phase1_11,:phase1_12 + dw :phase1_13,:phase1_14,:phase1_15,:phase1_16 +:phase1_16 ldy activeSpriteList+30 + jsr _DoPhase1 +:phase1_15 ldy activeSpriteList+28 + jsr _DoPhase1 +:phase1_14 ldy activeSpriteList+26 + jsr _DoPhase1 +:phase1_13 ldy activeSpriteList+24 + jsr _DoPhase1 +:phase1_12 ldy activeSpriteList+22 + jsr _DoPhase1 +:phase1_11 ldy activeSpriteList+20 + jsr _DoPhase1 +:phase1_10 ldy activeSpriteList+18 + jsr _DoPhase1 +:phase1_9 ldy activeSpriteList+16 + jsr _DoPhase1 +:phase1_8 ldy activeSpriteList+14 + jsr _DoPhase1 +:phase1_7 ldy activeSpriteList+12 + jsr _DoPhase1 +:phase1_6 ldy activeSpriteList+10 + jsr _DoPhase1 +:phase1_5 ldy activeSpriteList+8 + jsr _DoPhase1 +:phase1_4 ldy activeSpriteList+6 + jsr _DoPhase1 +:phase1_3 ldy activeSpriteList+4 + jsr _DoPhase1 +:phase1_2 ldy activeSpriteList+2 + jsr _DoPhase1 +:phase1_1 ldy activeSpriteList + jsr _DoPhase1 +:phase1_0 jmp _SetTileStoreVBuffAddrs + ; Utility function to calculate the difference in tile positions between a sprite's current ; position and it's previous position. This gets interesting because the number of tiles ; that a sprite covers can change based on the relative alignemen of the sprite with the @@ -134,8 +306,6 @@ _AddSprite ; Macro to make the unrolled loop more concise ; -; The macro -; ; 1. Load the tile store address from a fixed offset ; 2. Clears the sprite bit from the TS_SPRITE_FLAG location ; 3. Checks if the tile is dirty and marks it @@ -149,10 +319,15 @@ TSClearSprite mac lda TileStore+TS_DIRTY,y bne next - inc sta TileStore+TS_DIRTY,y - phy + + tya + ldy DirtyTileCount + sta DirtyTiles,y + iny + iny + sty DirtyTileCount next <<< @@ -161,8 +336,6 @@ next ; This is more efficient, because the work in MarkDirtySprite is independent of the ; sprite size and, by inlining the _PushDirtyTile logic, we can save a fair amount of overhead _ClearSpriteFromTileStore - tsc - sta tmp1 ; We use the stack as a counter lda _SpriteBitsNot,y ; Cache this value in a direct page location sta tmp0 ldx _Sprites+TS_COVERAGE_SIZE,y @@ -172,6 +345,8 @@ csfts_tbl dw csfts_1x1,csfts_1x2,csfts_1x3,csfts_out dw csfts_3x1,csfts_3x2,csfts_3x3,csfts_out dw csfts_out,csfts_out,csfts_out,csfts_out +csfts_out rts + csfts_3x3 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 2 @@ -182,7 +357,7 @@ csfts_3x3 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 2*{TS_LOOKUP_SPAN*2} TSClearSprite 2*{TS_LOOKUP_SPAN*2}+2 TSClearSprite 2*{TS_LOOKUP_SPAN*2}+4 - jmp csfts_finish + rts csfts_3x2 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 @@ -191,13 +366,13 @@ csfts_3x2 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 1*{TS_LOOKUP_SPAN*2}+2 TSClearSprite 2*{TS_LOOKUP_SPAN*2} TSClearSprite 2*{TS_LOOKUP_SPAN*2}+2 - jmp csfts_finish + rts csfts_3x1 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 1*{TS_LOOKUP_SPAN*2} TSClearSprite 2*{TS_LOOKUP_SPAN*2} - jmp csfts_finish + rts csfts_2x3 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 @@ -206,260 +381,33 @@ csfts_2x3 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 1*{TS_LOOKUP_SPAN*2} TSClearSprite 1*{TS_LOOKUP_SPAN*2}+2 TSClearSprite 1*{TS_LOOKUP_SPAN*2}+4 - jmp csfts_finish + rts csfts_2x2 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 2 TSClearSprite 1*{TS_LOOKUP_SPAN*2} TSClearSprite 1*{TS_LOOKUP_SPAN*2}+2 - jmp csfts_finish + rts csfts_2x1 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 1*{TS_LOOKUP_SPAN*2} - jmp csfts_finish + rts csfts_1x3 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 2 TSClearSprite 4 - jmp csfts_finish + rts csfts_1x2 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 TSClearSprite 2 - jmp csfts_finish + rts csfts_1x1 ldx _Sprites+TS_LOOKUP_INDEX,y TSClearSprite 0 - -; Second phase; put all the dirty tiles on the DirtyTiles list -csfts_finish - tsc - eor #$FFFF - sec - adc tmp1 ; Looks weird, but calculates (tmp1 - acc) - - tax ; This is 2 * N where N is the number of dirty tiles - ldy DirtyTileCount ; Grab a copy of the old index (for addressing) - - clc - adc DirtyTileCount ; Add the new items to the list - sta DirtyTileCount - - jmp (dtloop,x) -dtloop dw csfts_out, dtloop1, dtloop2, dtloop3 - dw dtloop4, dtloop5, dtloop6, dtloop7 - dw dtloop8, dtloop9, dtloop10, dtloop11 - -dtloop11 pla - sta DirtyTiles+20,y -dtloop10 pla - sta DirtyTiles+18,y -dtloop9 pla - sta DirtyTiles+16,y -dtloop8 pla - sta DirtyTiles+14,y -dtloop7 pla - sta DirtyTiles+12,y -dtloop6 pla - sta DirtyTiles+10,y -dtloop5 pla - sta DirtyTiles+8,y -dtloop4 pla - sta DirtyTiles+6,y -dtloop3 pla - sta DirtyTiles+4,y -dtloop2 pla - sta DirtyTiles+2,y -dtloop1 pla - sta DirtyTiles+0,y - -csfts_out rts - -; This function looks at the sprite list and renders the sprite plane data into the appropriate -; tiles in the code field. There are a few phases to this routine. The assumption is that -; any sprite that needs to be re-drawn has been marked as DIRTY or DAMAGED. -; -; A DIRTY sprite is one that has moved, so it needs to be erased/redrawn in the sprite -; buffer AND the tiles it covers marked for refresh. A DAMAGED sprite shared one or more -; tiles with a DIRTY sprite, so it needs to be redraw in the sprite buffer (but not erased!) -; and its tile do NOT need to be marked for refresh. -; -; In the first phase, we run through the list of dirty sprites and erase them from their -; OLD_VBUFF_ADDR. This clears the sprite plane buffers. We also iterate through the -; TILE_STORE_ADDR_X array and mark all of the tile store location that this sprite had occupied -; as dirty, as well as removing this sprite from the TS_SPRITE_FLAG bitfield. -; -; A final aspect is that any of the sprites indicated in the TS_SPRITE_FLAG are marked to be -; drawn in the next phase (since a portion of their content may have been erased if they overlap) -; -; In the second phase, the sprite is re-drawn into the sprite plane buffers and the appropriate -; Tile Store locations are marked as dirty. It is important to recognize that the sprites themselves -; can be marked dirty, and the underlying tiles in the tile store are independently marked dirty. - -phase1 dw :phase1_0 - dw :phase1_1,:phase1_2,:phase1_3,:phase1_4 - dw :phase1_5,:phase1_6,:phase1_7,:phase1_8 - dw :phase1_9,:phase1_10,:phase1_11,:phase1_12 - dw :phase1_13,:phase1_14,:phase1_15,:phase1_16 - -:phase1_16 - ldy activeSpriteList+30 - jsr _DoPhase1 -:phase1_15 - ldy activeSpriteList+28 - jsr _DoPhase1 -:phase1_14 - ldy activeSpriteList+26 - jsr _DoPhase1 -:phase1_13 - ldy activeSpriteList+24 - jsr _DoPhase1 -:phase1_12 - ldy activeSpriteList+22 - jsr _DoPhase1 -:phase1_11 - ldy activeSpriteList+20 - jsr _DoPhase1 -:phase1_10 - ldy activeSpriteList+18 - jsr _DoPhase1 -:phase1_9 - ldy activeSpriteList+16 - jsr _DoPhase1 -:phase1_8 - ldy activeSpriteList+14 - jsr _DoPhase1 -:phase1_7 - ldy activeSpriteList+12 - jsr _DoPhase1 -:phase1_6 - ldy activeSpriteList+10 - jsr _DoPhase1 -:phase1_5 - ldy activeSpriteList+8 - jsr _DoPhase1 -:phase1_4 - ldy activeSpriteList+6 - jsr _DoPhase1 -:phase1_3 - ldy activeSpriteList+4 - jsr _DoPhase1 -:phase1_2 - ldy activeSpriteList+2 - jsr _DoPhase1 -:phase1_1 - ldy activeSpriteList - jsr _DoPhase1 -:phase1_0 - jmp phase1_rtn - -; If this sprite has been MOVED or REMOVED, then clear its bit from the TS_SPRITE_FLAG in -; all of the tile store locations that it occupied on the previous frame and add those -; tile store locations to the dirty tile list. -_DoPhase1 - lda _Sprites+SPRITE_STATUS,y - ora ForceSpriteFlag - bit #SPRITE_STATUS_MOVED+SPRITE_STATUS_REMOVED - beq :no_clear - jsr _ClearSpriteFromTileStore -:no_clear - -; Check to see if sprite was REMOVED If so, clear the sprite slot status - - lda _Sprites+SPRITE_STATUS,y - bit #SPRITE_STATUS_REMOVED - beq :out - - lda #SPRITE_STATUS_EMPTY ; Mark as empty (zero value) - sta _Sprites+SPRITE_STATUS,y - - lda _SpriteBits,y ; Clear from the sprite bitmap - trb SpriteMap - -:out - rts - -; Second phase takes care of drawing the sprites and marking the tiles that will need to be merged -; with pixel data from the sprite plane -phase2 dw :phase2_0 - dw :phase2_1,:phase2_2,:phase2_3,:phase2_4 - dw :phase2_5,:phase2_6,:phase2_7,:phase2_8 - dw :phase2_9,:phase2_10,:phase2_11,:phase2_12 - dw :phase2_13,:phase2_14,:phase2_15,:phase2_16 - -:phase2_16 - ldy activeSpriteList+30 - jsr _DoPhase2 -:phase2_15 - ldy activeSpriteList+28 - jsr _DoPhase2 -:phase2_14 - ldy activeSpriteList+26 - jsr _DoPhase2 -:phase2_13 - ldy activeSpriteList+24 - jsr _DoPhase2 -:phase2_12 - ldy activeSpriteList+22 - jsr _DoPhase2 -:phase2_11 - ldy activeSpriteList+20 - jsr _DoPhase2 -:phase2_10 - ldy activeSpriteList+18 - jsr _DoPhase2 -:phase2_9 - ldy activeSpriteList+16 - jsr _DoPhase2 -:phase2_8 - ldy activeSpriteList+14 - jsr _DoPhase2 -:phase2_7 - ldy activeSpriteList+12 - jsr _DoPhase2 -:phase2_6 - ldy activeSpriteList+10 - jsr _DoPhase2 -:phase2_5 - ldy activeSpriteList+8 - jsr _DoPhase2 -:phase2_4 - ldy activeSpriteList+6 - jsr _DoPhase2 -:phase2_3 - ldy activeSpriteList+4 - jsr _DoPhase2 -:phase2_2 - ldy activeSpriteList+2 - jsr _DoPhase2 -:phase2_1 - ldy activeSpriteList - jsr _DoPhase2 -:phase2_0 - jmp phase2_rtn - -_DoPhase2 - lda _Sprites+SPRITE_STATUS,y - beq :out ; If phase 1 marked us as empty, do nothing - ora ForceSpriteFlag - and #SPRITE_STATUS_ADDED+SPRITE_STATUS_MOVED+SPRITE_STATUS_UPDATED - beq :out - -; Last thing to do, so go ahead and clear the flags - - lda #SPRITE_STATUS_OCCUPIED - sta _Sprites+SPRITE_STATUS,y - -; Mark the appropriate tiles as dirty and as occupied by a sprite so that the ApplyTiles -; subroutine will combine the sprite data with the tile data into the code field where it -; can be drawn to the screen. This routine is also responsible for setting the specific -; VBUFF address for each sprite's tile sheet position - -; jmp _MarkDirtySprite -:out rts ; Use the blttmp space to build the active sprite list. Since the sprite tiles are not drawn until later, @@ -504,71 +452,6 @@ RebuildSpriteArray stx ActiveSpriteCount rts -_RenderSprites - -; Check to see if any sprites have been added or removed. If so, then we regenerate the active -; sprite list. Since adding and removing sprites is rare, this is a worthwhile tradeoff, because -; there are several places where we want to iterate over the all of the sprites, and having a list -; and not have to constantly load and test the SPRITE_STATUS just to skip unused slots can help -; streamline the code. - - lda #DIRTY_BIT_SPRITE_ARRAY - trb DirtyBits ; clears the flag, if it was set - beq :no_rebuild - jsr RebuildSpriteArray - -:no_rebuild - -; First step is to look at the StartX and StartY values. If the screen has scrolled, then it has -; the same effect as moving all of the sprites. -; -; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls -; by +X, but the sprite moves by -X (so it's relative position is unchanged), then -; it does NOT need to be marked as dirty. -; -; OPTIMIZATION NOTE: At this point, a decent chunk of per-tile time is spent cupdating the sprite flgas -; for a given TileStore entry. When a sprite needs to be redrawn (such as when the -; screen scrolls), the code marks every tile the sprite was on as no longer occupied -; and then marks the occupied tiles. While simple, this is very redundent when the -; screen in scrolling slowly since it is very likely that the same sprite covers the -; exact same tiles. Each pair of markings requires 35 cycles, so a basic 16x16 sprite -; could save >300 cycles per frame. With 4 or 5 sprites on screen, the saving passes -; our 1% threshold for useful optimizations. -; -; Since we cache the tile location and effective sprite coverage, we need a fast -; way to compare the old and new positions and get a list of the new tiles the sprite -; occupies and old locations that it no longer covers. It's possible that just testing -; for equality would be the easiest win to know when we can skip everything. - - stz ForceSpriteFlag - lda StartX - cmp OldStartX - bne :force_update - - lda StartY - cmp OldStartY - beq :no_change - -:force_update - lda #SPRITE_STATUS_MOVED - sta ForceSpriteFlag -:no_change - -; Dispatch to the first phase of rendering the sprites. By pre-building the list, we know exactly -; how many sprite to process and they are in a contiguous array. So we on't have to keep track -; of an iterating variable - - ldx ActiveSpriteCount -; jmp (phase1,x) -phase1_rtn - -; Dispatch to the second phase of rendering the sprites. - ldx ActiveSpriteCount -; jmp (phase2,x) -phase2_rtn - - rts - ; _GetTileAt ; ; Given a relative playfield coordinate [0, ScreenWidth), [0, ScreenHeight) return the @@ -679,13 +562,14 @@ _PrecalcAllSpriteInfo ; and #$3E00 xba and #$0006 - tay - lda _Sprites+VBUFF_ADDR,x - clc - adc _stamp_step,y - sta _Sprites+SPRITE_DISP,x -; Set the + txy ; swap X/Y for this... + tax + lda _Sprites+VBUFF_ADDR,y + clc + adcl _stamp_step,x + sta _Sprites+SPRITE_DISP,y + tyx ; Set the sprite's width and height lda #4 diff --git a/src/Sprite2.s b/src/Sprite2.s index d150879..13996e9 100644 --- a/src/Sprite2.s +++ b/src/Sprite2.s @@ -38,15 +38,28 @@ VBuffOrigin equ tmp11 ; ... ; ; For the Y-coordinate, we just use "mod 8" instead of "mod 4" +; +; When this subroutine is completed, the following values will be calculated +; +; _Sprites+TS_COVERAGE_SIZE : The number of horizontal and vertical playfield tiles covered by the sprite +; _Sprites+TS_LOOKUP_INDEX : TileStore index of the upper-left corner of the sprite +; _Sprites+TS_VBUFF_BASE : Address of the top-left corner of the sprite in the VBUFF sprite stamp memory +; mdsOut2 lda #6 ; Pick a value for a 0x0 tile sprite sta _Sprites+TS_COVERAGE_SIZE,y ; zero the list of tile store addresses rts -_MarkDirtySprite +_CalcDirtySprite lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield bne mdsOut2 +; Copy the current values into the old value slots + lda _Sprites+TS_COVERAGE_SIZE,y + sta _Sprites+OLD_TS_COVERAGE_SIZE,y + lda _Sprites+TS_LOOKUP_INDEX,y + sta _Sprites+OLD_TS_LOOKUP_INDEX,y + ; Add the first visible row of the sprite to the Y-scroll offset to find the first line in the ; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406] @@ -77,10 +90,10 @@ _MarkDirtySprite and #$0018 sta AreaIndex - txa + txa ; Get the verical offset in the VBUFF memory asl tax - lda :vbuff_mul,x + ldal :vbuff_mul,x sta tmp0 ; Add the horizontal position to the horizontal offset to find the first column in the @@ -91,12 +104,10 @@ _MarkDirtySprite adc StartXMod164 tax and #$FFFC - lsr -; sta ColLeft ; Even numbers from [0, 160] (80 elements) + lsr ; Even numbers from [0, 160] (80 elements) adc RowTop sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table - ; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier ; in the buffer based on the horizontal offset and move up for each vertical offset. @@ -111,14 +122,7 @@ _MarkDirtySprite eor #$FFFF ; A = -X - 1 sec ; C = 1 adc _Sprites+SPRITE_DISP,y ; A = SPRITE_DISP + (-X - 1) + 1 = SPRITE_DISP - X - - sta VBuffOrigin ; this is the final (adjusted) origin for this sprite - -; Load the base address of the appropriate TS_VBUFF_? offset for this sprite index and -; store it as an indirect address. - - lda _Sprites+TS_VBUFF_BASE_ADDR,y - sta tmp0 + sta _Sprites+TS_VBUFF_BASE,y ; We know the starting corner of the TileStore. Now, we need to figure out now many tiles ; the sprite covers. This is a function of the sprite's width and height and the specific @@ -136,101 +140,26 @@ _MarkDirtySprite ; Then, when we need to erase we can just lookup the values in the TileStoreLookup table. sta _Sprites+TS_COVERAGE_SIZE,y - tax -; lda TileStoreBaseIndex -; sta _Sprites+TS_LOOKUP_INDEX,y - -; Jump to the appropriate marking routine - - jmp (:mark,x) - mdsOut rts -;_MarkDirtySprite -; -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_1,y ; Clear this sprite's dirty tile list in case of an early exit -; lda _SpriteBits,y ; Cache its bit flag to mark in the tile slots -; sta SpriteBit -; lda _Sprites+IS_OFF_SCREEN,y ; Check if the sprite is visible in the playfield -; bne mdsOut -; At this point we know that we have to update the tiles that overlap the sprite's rectangle defined -; by (Top, Left), (Bottom, Right). First, calculate the row and column in the TileStore that -; encloses the top-left on-screen corner of the sprite +; NOTE: The VBuffArrayAddr lookup table is set up so that each sprite's vbuff address is stored in a +; parallel structure to the Tile Store. This allows up to use the same TileStoreLookup offset +; to index into the array of 16 sprite VBUFF addresses that are bound to a given tile +_MarkDirtySpriteTiles + lda VBuffArrayAddr,y ; Get the base address for the TileStore VBuff array for this sprite + sta VBuffArrayPtr -; clc -; lda _Sprites+SPRITE_CLIP_TOP,y -; adc StartYMod208 ; Adjust for the scroll offset -; tax ; cache -; cmp #208 ; check if we went too far positive -; bcc *+5 -; sbc #208 -; lsr -; lsr ; This is the row in the Tile Store for top-left corner of the sprite -; and #$FFFE ; Store the value pre-multiplied by 2 for indexing in the :mark_R_C routines -; sta RowTop + lda _Sprites+TS_VBUFF_BASE,y ; This is the final upper-left cornder for this frame + sta VBuffOrigin -; Next, calculate how many tiles are covered by the sprite. This uses the table at the top of this function, but -; the idea is that for every increment of StartX or StartY, that can shift the sprite into the next tile, up to -; a maximum of mod 4 / mod 8. So the effective width of a sprite is (((StartX + Clip_Left) mod 4) + Clip_Width) / 4 + lda _SpriteBits,y + sta SpriteBit -; txa -; and #$0007 -; sta tmp0 ; save to adjust sprite origin + clc + ldx _Sprites+TS_COVERAGE_SIZE,y + jmp (:mark,x) -; lda _Sprites+SPRITE_CLIP_HEIGHT,y ; Nominal value between 0 and 16+7 = 23 = 10111 -; dec -; clc -; adc tmp0 -; and #$0018 -; sta AreaIndex - -; Repeat to get the same information for the columns - -; clc -; lda _Sprites+SPRITE_CLIP_LEFT,y -; adc StartXMod164 -; tax -; cmp #164 -; bcc *+5 -; sbc #164 -; lsr -; and #$FFFE ; Same pre-multiply by 2 for later -; sta ColLeft - -; txa -; and #$0003 -; sta tmp1 ; save to adjust sprite origin; - -; lda _Sprites+SPRITE_CLIP_WIDTH,y ; max width = 8 = 0x08 -; dec -; clc -; adc tmp1 -; lsr ; max value = 4 = 0x04 -; and #$0006 -; ora AreaIndex -; sta AreaIndex - -; Calculate the modified origin address for the sprite. We need to look at the sprite flip bits -; to determine which of the four sprite stamps is the correct one to use. Then, offset that origin -; based on the (x, y) and (startx, starty) positions. - -; lda _Sprites+SPRITE_DISP,y ; Get the sprite's base display address -; sec -; sbc tmp1 ; Subtract the horizontal within-tile displacement -; asl tmp0 -; ldx tmp0 -; sec -; sbc :vbuff_mul,x -; sta VBuffOrigin -; lda #^TileStore -; sta tmp1 - -; Dispatch to cover the tiles - -; ldx AreaIndex -; jmp (:mark,x) :mark dw :mark1x1,:mark1x2,:mark1x3,mdsOut dw :mark2x1,:mark2x2,:mark2x3,mdsOut dw :mark3x1,:mark3x2,:mark3x3,mdsOut @@ -238,509 +167,110 @@ mdsOut rts :vbuff_mul dw 0,52,104,156,208,260,312,364 -; Dispatch to the calculated sizing - -; Begin a list of subroutines to cover all of the valid sprite size combinations. This is all unrolled code, -; mainly to be able to do an unrolled fill of the TILE_STORE_ADDR_X values. Thus, it's important that the clipping -; function does its job properly since it allows us to save a lot of time here. +; Pair of macros to make the unrolled loop more concise ; -; These functions are a trade off of being composable versus fast. Having to pay for multiple JSR/RTS invocations -; in the hot sprite path isn't great, but we're at a point of diminishing returns. +; 1. Load the tile store address from a fixed offset +; 2. Set the sprite bit from the TS_SPRITE_FLAG location +; 3. Checks if the tile is dirty and marks it +; 4. If the tile was dirty, save the tile store address to be added to the DirtyTiles list later +; 5. Sets the VBUFF address for the current sprite slot ; -; There *might* be some speed gained by pushing a list of :mark_R_C addressed onto the stack in the clipping routing -; and dispatching that way, but probably not... +; The second macro is the same as the first, but the VBUFF calculation is moved up so that the value +; from the previous step can be reused and save a load every other step. +TSSetSprite mac + ldy TileStoreLookup+]1,x -:mark1x1_v2 - - tax ; Get the TileStoreBaseIndex - - ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile - - lda SpriteBit ; Mark this tile as having this sprite + lda SpriteBit ora TileStore+TS_SPRITE_FLAG,y sta TileStore+TS_SPRITE_FLAG,y lda VBuffOrigin - sta (tmp0),y ; Fill in the slot for this sprite on this tile + adc ]2 + sta [tmp0],y ; This is *very* carefully constructed.... - lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, mark it - bne exit1x1 + lda TileStore+TS_DIRTY,y + bne next - ldx DirtyTileCount - tya - sta DirtyTiles,x + inc sta TileStore+TS_DIRTY,y - inx - inx - stx DirtyTileCount - -exit1x1 - rts - -:mark2x2_v2 - -; Put the TileStoreBaseIndex into the X-register - - tax - -; Push a sentinel value of the stack that we use to inline all of the dirty tile array updates faster -; and the end of this routine. - - pea #$0000 - -; Now, move through each of the TileStore locations and set the necessary fields. We have to do the -; following -; -; 1. Set the marker bit in the TS_SPRITE_FLAG so the renderer knows which vbuff addresses to load -; 2. Set the address of the sprite stamp graphics that are used. This can change every frame. -; 3. Mark the tile as dirty and put it on the list if it was marked dirty for the first time. - - ldy TileStoreLookup,x ; Get the offset into the TileStore for this tile - - lda SpriteBit ; Mark this tile as having this sprite - ora TileStore+TS_SPRITE_FLAG,y - sta TileStore+TS_SPRITE_FLAG,y - - lda TileStore+TS_DIRTY,y ; If this tile is not yet marked dirty, queue it up - bne *+3 - phy - - lda VBuffOrigin - sta (tmp0),y ; Fill in the slot for this sprite on this tile - -; Move to the next tile - - ldy TileStoreLookup+2,x - - adc #4 ; Weave in the VBuffOrigin values to save a load every - sta (tmp0),y ; other iteration - - lda SpriteBit - ora TileStore+TS_SPRITE_FLAG,y - sta TileStore+TS_SPRITE_FLAG,y - - lda TileStore+TS_DIRTY,y - bne *+3 - phy - -; Third tile - - ldy TileStoreLookup+TS_LOOKUP_SPAN,x - - lda SpriteBit - ora TileStore+TS_SPRITE_FLAG,y - sta TileStore+TS_SPRITE_FLAG,y - - lda TileStore+TS_DIRTY,y - bne *+3 - phy - - lda VBuffOrigin - adc #SPRITE_PLANE_SPAN - sta (tmp0),y - -; Fourth tile - - ldy TileStoreLookup+TS_LOOKUP_SPAN+2,x - - adc #4+SPRITE_PLANE_SPAN - sta (tmp0),y - - lda SpriteBit - ora TileStore+TS_SPRITE_FLAG,y - sta TileStore+TS_SPRITE_FLAG,y - -; Lift this above the last TS_DIRTY check - - ldx DirtyTileCount - -; Check the TS_DIRTY flag for this tile. We handle it immediately, if needed - - lda TileStore+TS_DIRTY,y - bne skip - -; Now, update the Dirty Tile array tya - sta DirtyTiles,x - sta TileStore+TS_DIRTY,y - -skip - pla - beq :done1 - sta DirtyTiles+2,x - tay - sta TileStore+TS_DIRTY,y - - pla - beq :done2 - sta DirtyTiles+4,x - tay - sta TileStore+TS_DIRTY,y - - pla - beq :done3 - sta DirtyTiles+6,x - tay - sta TileStore+TS_DIRTY,y - -; Maximum number of dirty tiles reached. Just fall through. - - pla - txa - adc #8 - sta DirtyTileCount - rts -:done3 - txa - adc #6 - sta DirtyTileCount - rts -:done2 - txa - adc #4 - sta DirtyTileCount - rts -:done1 - inx - inx - stx DirtyTileCount - - rts + ldy DirtyTileCount + sta DirtyTiles,y + iny + iny + sty DirtyTileCount +next + <<< :mark1x1 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_2,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2};#0 rts -; NOTE: If we rework the _PushDirtyTile to use the Y register instead of the X register, we can -; optimize all of these :mark routines as -; -; :mark1x1 -; jsr :mark_0_0 -; sty _Sprites+TILE_STORE_ADDR_1,x -; stz _Sprites+TILE_STORE_ADDR_2,y -; rts - :mark1x2 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_2,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_3,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} rts :mark1x3 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_0_2 -; sta _Sprites+TILE_STORE_ADDR_3,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_4,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} rts :mark2x1 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_2,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_3,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} rts :mark2x2 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_3,y - jsr :mark_1_1 -; sta _Sprites+TILE_STORE_ADDR_4,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_5,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} rts :mark2x3 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_0_2 -; sta _Sprites+TILE_STORE_ADDR_3,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_4,y - jsr :mark_1_1 -; sta _Sprites+TILE_STORE_ADDR_5,y - jsr :mark_1_2 -; sta _Sprites+TILE_STORE_ADDR_6,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_7,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4;#{1*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} rts :mark3x1 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_2_0 -; sta _Sprites+TILE_STORE_ADDR_3,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_4,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} rts :mark3x2 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_2_0 -; sta _Sprites+TILE_STORE_ADDR_3,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_4,y - jsr :mark_1_1 -; sta _Sprites+TILE_STORE_ADDR_5,y - jsr :mark_2_1 -; sta _Sprites+TILE_STORE_ADDR_6,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_7,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2;#{2*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} rts :mark3x3 - jsr :mark_0_0 -; sta _Sprites+TILE_STORE_ADDR_1,y - jsr :mark_1_0 -; sta _Sprites+TILE_STORE_ADDR_2,y - jsr :mark_2_0 -; sta _Sprites+TILE_STORE_ADDR_3,y - jsr :mark_0_1 -; sta _Sprites+TILE_STORE_ADDR_4,y - jsr :mark_1_1 -; sta _Sprites+TILE_STORE_ADDR_5,y - jsr :mark_2_1 -; sta _Sprites+TILE_STORE_ADDR_6,y - jsr :mark_0_2 -; sta _Sprites+TILE_STORE_ADDR_7,y - jsr :mark_1_2 -; sta _Sprites+TILE_STORE_ADDR_8,y - jsr :mark_2_2 -; sta _Sprites+TILE_STORE_ADDR_9,y -; lda #0 -; sta _Sprites+TILE_STORE_ADDR_10,y + ldx _Sprites+TS_LOOKUP_INDEX,y + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4;#{1*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2;#{2*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+4;#{2*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} rts - -; Begin List of subroutines to mark each tile offset -:mark_0_0 - ldx RowTop - lda ColLeft - clc - adc TileStoreYTable,x ; Fixed offset to the next row - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - sta [tmp0],y - -; lda VBuffOrigin ; This is an interesting case. The mapping between the tile store -; adc #{0*4}+{0*256} ; and the sprite buffers changes as the StartX, StartY values change -; stal TileStore+TS_SPRITE_ADDR,x ; but don't depend on any sprite information. However, by setting the - ; value only for the tiles that get added to the dirty tile list, we - ; can avoid recalculating over 1,000 values whenever the screen scrolls - ; (which is common) and just limit it to the number of tiles covered by - ; the sprites. If the screen is not scrolling and the sprites are not - ; moving and they are being dirtied, then we may do more work, but the - ; odds are in our favor to just take care of it here. - - ; lda TileStore+TS_SPRITE_FLAG,x - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX ; Needs X = tile store offset; destroys A,X. Returns X in A - -:mark_1_0 - lda ColLeft - ldx RowTop - clc - adc TileStoreYTable+2,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{0*4}+{1*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_2_0 - lda ColLeft - ldx RowTop - clc - adc TileStoreYTable+4,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{0*4}+{2*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_0_1 - ldx ColLeft - lda NextCol+2,x - ldx RowTop - clc - adc TileStoreYTable,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{1*4}+{0*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_1_1 - ldx ColLeft - lda NextCol+2,x - ldx RowTop - clc - adc TileStoreYTable+2,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{1*4}+{1*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_2_1 - ldx ColLeft - lda NextCol+2,x - ldx RowTop - clc - adc TileStoreYTable+4,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{1*4}+{2*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_0_2 - ldx ColLeft - lda NextCol+4,x - ldx RowTop - clc - adc TileStoreYTable,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{2*4}+{0*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_1_2 - ldx ColLeft - lda NextCol+4,x - ldx RowTop - clc - adc TileStoreYTable+2,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{2*4}+{1*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -:mark_2_2 - ldx ColLeft - lda NextCol+4,x - ldx RowTop - clc - adc TileStoreYTable+4,x - tax - - ldal TileStore+TS_VBUFF_ARRAY_ADDR,x - sta tmp0 - - lda VBuffOrigin - adc #{2*4}+{2*8*SPRITE_PLANE_SPAN} - sta [tmp0],y - - lda SpriteBit - oral TileStore+TS_SPRITE_FLAG,x - stal TileStore+TS_SPRITE_FLAG,x - - jmp _PushDirtyTileX - -; End list of subroutines to mark dirty tiles - -; Range-check and clamp the vertical part of the sprite. When this routine returns we will have valid -; values for the tile-top and row-top. Also, the accumulator will return the number of rows to render, -; a value of zero means that all of the sprite's rows are off-screen. -; -; This subroutine takes are of calculating the extra tile for unaligned accesses, too. -;_SpriteHeight dw 8,8,16,16 -;_SpriteHeightMinus1 dw 7,7,15,15 -;_SpriteRows dw 1,1,2,2 -;_SpriteWidth dw 4,8,4,8 -;_SpriteWidthMinus1 dw 3,7,3,7 -;_SpriteCols dw 1,2,1,2 diff --git a/src/Tool.s b/src/Tool.s index 711b6a8..b3be07a 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -309,6 +309,7 @@ _TSUpdateSprite put Graphics.s put Tiles.s put Sprite.s + put Sprite2.s put SpriteRender.s put Render.s put tiles/DirtyTileQueue.s diff --git a/src/static/TileStore.s b/src/static/TileStore.s index ec7ff57..b586ed6 100644 --- a/src/static/TileStore.s +++ b/src/static/TileStore.s @@ -368,11 +368,15 @@ DefaultPalette ENT ; 8. Game Boy Color : 20 x 18 160 x 144 (11,520 bytes ( 36.0%)) ; 9. Agony (Amiga) : 36 x 24 288 x 192 (27,648 bytes ( 86.4%)) ; 10. Atari Lynx : 20 x 13 160 x 102 (8,160 bytes ( 25.5%)) -ScreenModeWidth ENT +ScreenModeWidth ENT dw 320,272,256,256,280,256,240,288,160,288,160,320 -ScreenModeHeight ENT +ScreenModeHeight ENT dw 200,192,200,176,160,160,160,128,144,192,102,1 +; List of addresses of the VBuff arrays for each Tile Store entry, indexed by sprite index +VBuffArrayAddr ENT + ds MAX_SPRITES*2 + ; Convert sprite index to a bit position _SpriteBits ENT dw $0001,$0002,$0004,$0008,$0010,$0020,$0040,$0080,$0100,$0200,$0400,$0800,$1000,$2000,$4000,$8000 diff --git a/src/static/TileStoreDefs.s b/src/static/TileStoreDefs.s index debdd63..b855619 100644 --- a/src/static/TileStoreDefs.s +++ b/src/static/TileStoreDefs.s @@ -45,18 +45,34 @@ SPRITE_STATUS_MOVED equ $0002 ; Sprite's position was changed SPRITE_STATUS_UPDATED equ $0004 ; Sprite's non-position attributes were changed SPRITE_STATUS_REMOVED equ $0008 ; Sprite has been removed. -SPRITE_STATUS equ {MAX_SPRITES*0} -; TILE_DATA_OFFSET equ {MAX_SPRITES*2} -VBUFF_ADDR equ {MAX_SPRITES*4} ; Base address of the sprite's stamp in the data/mask banks -SPRITE_ID equ {MAX_SPRITES*6} -SPRITE_X equ {MAX_SPRITES*8} -SPRITE_Y equ {MAX_SPRITES*10} -; TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} -TS_LOOKUP_INDEX equ {MAX_SPRITES*12} ; The index into the TileStoreLookup table corresponding to the top-left corner of the sprite -; TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} -TS_COVERAGE_SIZE equ {MAX_SPRITES*14} ; Index into the lookup table of how many TileStore tiles are covered by this sprite +; These values are set by the user +SPRITE_STATUS equ {MAX_SPRITES*0} +SPRITE_ID equ {MAX_SPRITES*2} +SPRITE_X equ {MAX_SPRITES*4} +SPRITE_Y equ {MAX_SPRITES*6} + +; These values are cached / calculated during the rendering process +VBUFF_ADDR equ {MAX_SPRITES*8} ; Base address of the sprite's stamp in the data/mask banks +TS_LOOKUP_INDEX equ {MAX_SPRITES*10} ; The index from the TileStoreLookup table that corresponds to the top-left corner of the sprite +TS_COVERAGE_SIZE equ {MAX_SPRITES*12} ; Representation of how many TileStore tiles (NxM) are covered by this sprite +OLD_TS_LOOKUP_INDEX equ {MAX_SPRITES*14} ; Copy of the values to support diffing +OLD_TS_COVERAGE_SIZE equ {MAX_SPRITES*16} +SPRITE_DISP equ {MAX_SPRITES*18} ; Cached address of the specific stamp based on sprite flags +SPRITE_CLIP_LEFT equ {MAX_SPRITES*20} +SPRITE_CLIP_RIGHT equ {MAX_SPRITES*22} +SPRITE_CLIP_TOP equ {MAX_SPRITES*24} +SPRITE_CLIP_BOTTOM equ {MAX_SPRITES*26} +IS_OFF_SCREEN equ {MAX_SPRITES*28} +SPRITE_WIDTH equ {MAX_SPRITES*30} +SPRITE_HEIGHT equ {MAX_SPRITES*32} +SPRITE_CLIP_WIDTH equ {MAX_SPRITES*34} +SPRITE_CLIP_HEIGHT equ {MAX_SPRITES*36} +TS_VBUFF_BASE equ {MAX_SPRITES*38} ; Finalized VBUFF address based on the sprite position and tile offsets +;TILE_DATA_OFFSET equ {MAX_SPRITES*2} +;TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} +;TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} ;TILE_STORE_ADDR_3 equ {MAX_SPRITES*16} -TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X memory locations +;TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X memory locations ;TILE_STORE_ADDR_4 equ {MAX_SPRITES*18} ;TILE_STORE_ADDR_5 equ {MAX_SPRITES*20} ;TILE_STORE_ADDR_6 equ {MAX_SPRITES*22} @@ -64,16 +80,6 @@ TS_VBUFF_BASE_ADDR equ {MAX_SPRITES*16} ; Fixed address of the TS_VBUFF_X ;TILE_STORE_ADDR_8 equ {MAX_SPRITES*26} ;TILE_STORE_ADDR_9 equ {MAX_SPRITES*28} ;TILE_STORE_ADDR_10 equ {MAX_SPRITES*30} -SPRITE_DISP equ {MAX_SPRITES*32} ; cached address of the specific stamp based on flags -SPRITE_CLIP_LEFT equ {MAX_SPRITES*34} -SPRITE_CLIP_RIGHT equ {MAX_SPRITES*36} -SPRITE_CLIP_TOP equ {MAX_SPRITES*38} -SPRITE_CLIP_BOTTOM equ {MAX_SPRITES*40} -IS_OFF_SCREEN equ {MAX_SPRITES*42} -SPRITE_WIDTH equ {MAX_SPRITES*44} -SPRITE_HEIGHT equ {MAX_SPRITES*46} -SPRITE_CLIP_WIDTH equ {MAX_SPRITES*48} -SPRITE_CLIP_HEIGHT equ {MAX_SPRITES*50} ; 50 rows by 80 columns + 2 extra rows and columns TS_LOOKUP_WIDTH equ 80