From 78d7dafe140196195ba1fe8ca41c54f7b64388d4 Mon Sep 17 00:00:00 2001 From: Lucas Scharenbroich Date: Tue, 31 May 2022 08:43:26 -0500 Subject: [PATCH] Another checkpoint; converging on the working implementation --- package.json | 1 + src/CoreImpl.s | 2 +- src/Defs.s | 8 +- src/Render.s | 33 +++--- src/Sprite.s | 49 ++++---- src/Sprite2.s | 149 ++++++++++++++++++++----- src/Tiles.s | 216 ++++++++++++++++++++++++++++++++++-- src/Tool.s | 41 ++++++- src/blitter/TemplateUtils.s | 2 +- src/blitter/TileProcs.s | 12 ++ src/blitter/Tiles.s | 88 +-------------- src/blitter/Tiles10000.s | 1 - src/static/TileStore.s | 21 ++-- src/static/TileStoreDefs.s | 12 +- src/tiles/FastRenderer.s | 159 +++++++++++++++----------- 15 files changed, 535 insertions(+), 259 deletions(-) diff --git a/package.json b/package.json index c79ebe3..d5c1d91 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "crossrunner": "C:\\Programs\\Crossrunner\\Crossrunner.exe" }, "scripts": { + "archive": "%npm_package_config_cadius% EXTRACTFILE ", "test": "npm run build && build-image.bat %npm_package_config_cadius% && %npm_package_config_gsport%", "build": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Master.s", "build:debug": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Debug.s", diff --git a/src/CoreImpl.s b/src/CoreImpl.s index a8696a7..ee31bea 100644 --- a/src/CoreImpl.s +++ b/src/CoreImpl.s @@ -93,7 +93,7 @@ _CoreStartUp jsr EngineReset ; All of the resources are allocated, put the engine in a known state jsr InitGraphics ; Initialize all of the graphics-related data -; jsr InitSprites ; Initialize the sprite subsystem + jsr InitSprites ; Initialize the sprite subsystem jsr InitTiles ; Initialize the tile subsystem jsr InitTimers ; Initialize the timer subsystem diff --git a/src/Defs.s b/src/Defs.s index bd81382..a74298d 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -96,7 +96,7 @@ LastKey equ 116 LastTick equ 118 ForceSpriteFlag equ 120 -VBuffArrayPtr equ 122 +;VBuffArrayPtr equ 122 SpriteRemovedFlag equ 126 ; Indicate if any sprites were removed this frame activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames) @@ -147,8 +147,10 @@ _TILE_ID equ 158 ; Copy of the tile descriptor ; Define free space the the application to use ; FREE_SPACE_DP2 equ 160 -DP2_DIRTY_TILE_COUNT equ 160 ; Local copy of dirty tile count to avoid banking +DP2_DIRTY_TILE_COUNT equ 160 ; Local copy of dirty tile count to avoid banking DP2_DIRTY_TILE_CALLBACK equ 162 + +SPRITE_VBUFF_PTR equ 224 ; 32 bytes of adjusted pointers to VBuffArray addresses ; End direct page values ; EngineMode definitions @@ -242,4 +244,4 @@ ScreenModeWidth EXT ScreenModeHeight EXT _SpriteBits EXT _SpriteBitsNot EXT -VBuffArrayAddr EXT +VBuffArray EXT diff --git a/src/Render.s b/src/Render.s index 46d2437..7a7bba6 100644 --- a/src/Render.s +++ b/src/Render.s @@ -102,22 +102,22 @@ _Render ; The _ApplyTilesFast is the same as _ApplyTiles, but we use the _RenderTileFast subroutine _ApplyTilesFast + ldx DirtyTileCount + tdc clc adc #$100 ; move to the next page tcd - lda DirtyTileCount ; Cache the dirty tile count - sta DP2_DIRTY_TILE_COUNT - + stx DP2_DIRTY_TILE_COUNT ; Cache the dirty tile count jsr _PopDirtyTilesFast - stz DirtyTileCount - tdc ; Move back to the original direct page sec sbc #$100 tcd + + stz DirtyTileCount ; Reset the dirty tile count rts ; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code @@ -198,17 +198,10 @@ _ApplyDirtyTiles ; Only render solid tiles and sprites _RenderDirtyTile - ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile? + lda TileStore+TS_SPRITE_FLAG,y beq NoSpritesDirty ; This is faster if there are no sprites - lda TileStore+TS_TILE_ID,y ; Check if the tile has - jmp (dirty_dispatch,x) -dirty_dispatch - da NoSpritesDirty - da OneSpriteDirty - da TwoSpritesDirty - da ThreeSpritesDirty - da FourSpritesDirty +; TODO: handle sprite drawing ; The rest of this function handles that non-sprite blit, which is super fast since it blits directly from the ; tile data store to the graphics screen with no masking. The only extra work is selecting a blit function @@ -218,12 +211,12 @@ dirty_dispatch ; Y is set to the top-left address of the tile in SHR screen ; A is set to the address of the tile data NoSpritesDirty - tyx - ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile - lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + lda TileStore+TS_DIRTY_TILE_DISP,y + stal :nsd+1 + ldx TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile + lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) plb ; set the code field bank - jmp (TileStore+TS_DIRTY_TILE_DISP,x) ; go to the tile copy routine (just basics) - +:nsd jmp $0000 ; Use some temporary space for the spriteIdx array (maximum of 4 entries) stkSave equ tmp9 @@ -240,7 +233,7 @@ ThreeSpritesDirty TwoSpritesDirty sta tileAddr - sty screenAddr + stx screenAddr plb tsc diff --git a/src/Sprite.s b/src/Sprite.s index aea622b..e8d685d 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -19,19 +19,32 @@ InitSprites cpx #$FFFE bne :loop2 -; Clear values in the sprite array +; Set the VBuff array addresses for each sprite, since they're static -; ldx #{MAX_SPRITES-1}*2 -;:loop3 stz _Sprites+TILE_STORE_ADDR_1,x -; dex -; dex -; bpl :loop3 + ldx #0 + lda #VBuffArray +:loop3 sta _Sprites+VBUFF_ARRAY_ADDR,x + clc + adc #4*2 ; skip ahead 4 tiles + inx + inx + cpx #8*2 + bcc :loop3 + +; Now do the second set of sprites + lda #VBuffArray+{3*{TILE_STORE_WIDTH*2}} +:loop4 sta _Sprites+VBUFF_ARRAY_ADDR,x + clc + adc #4*2 ; skip ahead 4 tiles + inx + inx + cpx #8*2 + bcc :loop4 ; Precalculate some bank values jsr _CacheSpriteBanks rts - ; _RenderSprites ; ; The function is responsible for updating all of the rendering information based on any changes @@ -59,7 +72,7 @@ InitSprites ; a. If it is not marked in the DirtyTile list ; * Clear its bit from the TileStore's TS_SPRITE_FLAG ; * Add the tile to the DirtyTile list -; +;t ; 2. If a sprite is marked as SPRITE_STATUS_REMOVED, then ; A. Clear its bit from the SpriteBits bitmap ; B. For each tile the sprite overlaps with: @@ -134,6 +147,8 @@ _DoPhase1 lda _SpriteBits,y ; Clear from the sprite bitmap sta SpriteRemovedFlag ; Stick a non-zero value here trb SpriteMap + lda #SPRITE_STATUS_EMPTY ; Mark as empty so no error if we try to Add a sprite here again + sta _Sprites+SPRITE_STATUS,y jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done @@ -154,15 +169,6 @@ _DoPhase1 :no_move jmp _MarkDirtySpriteTiles -; Once all of the sprite values have been calculated, we need to scan the dirty tile list and -; collapse the sprite information down to no more than 4 vbuff references per tile. We used to -; do this on the fly in the renderer, but that required differentiating between tile with and -; without sprites in the core rendering function. My lifting this up, we simplify the core code -; and possible open up some optimization opportunities. -_SetTileStoreVBuffAddrs - - - ; Dispatch table. It's unintersting, so it's tucked out of the way phase1 dw :phase1_0 dw :phase1_1,:phase1_2,:phase1_3,:phase1_4 @@ -200,8 +206,8 @@ phase1 dw :phase1_0 :phase1_2 ldy activeSpriteList+2 jsr _DoPhase1 :phase1_1 ldy activeSpriteList - jsr _DoPhase1 -:phase1_0 jmp _SetTileStoreVBuffAddrs + jmp _DoPhase1 +:phase1_0 rts ; Utility function to calculate the difference in tile positions between a sprite's current ; position and it's previous position. This gets interesting because the number of tiles @@ -299,9 +305,6 @@ _AddSprite lda _SpriteBits,x ; Get the bit flag for this sprite slot tsb SpriteMap ; Mark it in the sprite map bit field -; txa ; And return the sprite ID -; clc ; Mark that the sprite was successfully added - rts ; Macro to make the unrolled loop more concise @@ -314,7 +317,7 @@ TSClearSprite mac ldy TileStoreLookup+]1,x lda TileStore+TS_SPRITE_FLAG,y - and tmp0 + and tmp0 sta TileStore+TS_SPRITE_FLAG,y lda TileStore+TS_DIRTY,y diff --git a/src/Sprite2.s b/src/Sprite2.s index 13996e9..4776a6f 100644 --- a/src/Sprite2.s +++ b/src/Sprite2.s @@ -66,14 +66,11 @@ _CalcDirtySprite clc lda _Sprites+SPRITE_CLIP_TOP,y adc StartYMod208 ; Adjust for the scroll offset - pha ; Cache + tax ; Cache and #$FFF8 ; mask first to ensure LSR will clear the carry lsr lsr - tax - lda TileStoreLookupYTable,x ; Even numbers from [0, 100] (50 elements) - sta RowTop - pla + sta RowTop ; Even numbers from [0, 100] (50 elements) ; Get the position of the top edge within the tile and then add it to the sprite's height ; to calculate the number of tiles that are overlapped. We use the actual width and height @@ -90,7 +87,7 @@ _CalcDirtySprite and #$0018 sta AreaIndex - txa ; Get the verical offset in the VBUFF memory + txa ; Get the vertical offset in the VBUFF memory asl tax ldal :vbuff_mul,x @@ -108,13 +105,19 @@ _CalcDirtySprite adc RowTop sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table +; Create an offset value for loading the calculated VBUFF addresses within the core renderer + + eor #$FFFF + sec + adc _Sprites+VBUFF_ARRAY_ADDR,y + sta tmp1 ; Spill this value to direct page temp space + ; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier ; in the buffer based on the horizontal offset and move up for each vertical offset. txa and #$0003 tax - adc tmp0 ; add to the vertical offset ; Subtract this value from the SPRITE_DISP address @@ -134,12 +137,17 @@ _CalcDirtySprite and #$000C lsr ; max value = 4 = 0x04 ora AreaIndex ; merge into the area index + sta _Sprites+TS_COVERAGE_SIZE,y ; Save this value as a key to the coverage size of the sprite -; No need to copy the TileStore addresses into the Sprite's TILE_STORE_ADDR values. Just -; hold a copy of the corner offset into the lookup table and the sprite's size in tiles. -; Then, when we need to erase we can just lookup the values in the TileStoreLookup table. +; Last task. Since we don't need to use the X-register to cache values; load the direct page 2 +; offset for the SPRITE_VBUFF_PTR and save it + + tya + ora #$100 + tax + lda tmp1 + sta SPRITE_VBUFF_PTR,x - sta _Sprites+TS_COVERAGE_SIZE,y mdsOut rts @@ -147,12 +155,6 @@ mdsOut rts ; parallel structure to the Tile Store. This allows up to use the same TileStoreLookup offset ; to index into the array of 16 sprite VBUFF addresses that are bound to a given tile _MarkDirtySpriteTiles - lda VBuffArrayAddr,y ; Get the base address for the TileStore VBuff array for this sprite - sta VBuffArrayPtr - - lda _Sprites+TS_VBUFF_BASE,y ; This is the final upper-left cornder for this frame - sta VBuffOrigin - lda _SpriteBits,y sta SpriteBit @@ -184,10 +186,6 @@ TSSetSprite mac ora TileStore+TS_SPRITE_FLAG,y sta TileStore+TS_SPRITE_FLAG,y - lda VBuffOrigin - adc ]2 - sta [tmp0],y ; This is *very* carefully constructed.... - lda TileStore+TS_DIRTY,y bne next @@ -203,31 +201,68 @@ TSSetSprite mac next <<< +ROW equ TILE_STORE_WIDTH*2 +COL equ TILE_DATA_SPAN + :mark1x1 + ldx _Sprites+VBUFF_ARRAY_ADDR,y ; get the address of this sprite's vbuff values + lda _Sprites+TS_VBUFF_BASE,y ; get the starting vbuff address + sta: {0*ROW}+{0*COL},x ; Put in the vbuff address + ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2};#0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2} rts :mark1x2 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 rts :mark1x3 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{2*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4 rts :mark2x1 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_ROW_BYTES + sta: {1*ROW}+{0*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 rts :mark2x2 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{1*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} @@ -236,6 +271,20 @@ next rts :mark2x3 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{2*COL},x + adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES} + sta: {1*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{2*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} @@ -246,6 +295,14 @@ next rts :mark3x1 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_ROW_BYTES + sta: {1*ROW}+{0*COL},x + adc #VBUFF_TILE_ROW_BYTES + sta: {2*ROW}+{0*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} @@ -253,6 +310,20 @@ next rts :mark3x2 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{1*COL},x + adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES + sta: {2*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {2*ROW}+{1*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} @@ -263,6 +334,26 @@ next rts :mark3x3 + ldx _Sprites+VBUFF_ARRAY_ADDR,y + lda _Sprites+TS_VBUFF_BASE,y + sta: {0*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {0*ROW}+{2*COL},x + adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES} + sta: {1*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {1*ROW}+{2*COL},x + adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES} + sta: {2*ROW}+{0*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {2*ROW}+{1*COL},x + adc #VBUFF_TILE_COL_BYTES + sta: {2*ROW}+{2*COL},x + ldx _Sprites+TS_LOOKUP_INDEX,y TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} diff --git a/src/Tiles.s b/src/Tiles.s index 85b1e82..616d767 100644 --- a/src/Tiles.s +++ b/src/Tiles.s @@ -120,14 +120,6 @@ InitTiles ; lda TileProcs ; Same for non-dirty, non-sprite base case ; stal TileStore+TS_BASE_TILE_DISP,x -; *** DEPRECATED *** -; lda :vbuff ; array of sprite vbuff addresses per tile -; stal TileStore+TS_VBUFF_ARRAY_ADDR,x -; clc -; adc #32 -; sta :vbuff -; *** ********** *** - ; The next set of values are constants that are simply used as cached parameters to avoid needing to ; calculate any of these values during tile rendering @@ -267,3 +259,211 @@ _SetBG0YPos stx OldStartY ; First change, so preserve the value :out rts + +; Macro helper for the bit test tree +; dobit bit_position,dest;next;exit +dobit mac + lsr + bcc next_bit + beq last_bit + tax + lda (SPRITE_VBUFF_PTR+{]2*2}),y + sta sprite_ptr0+{]2*4} + txa + jmp ]3 +last_bit lda (SPRITE_VBUFF_PTR+{]2*2}),y + sta sprite_ptr0+{]2*4} + jmp ]4 +next_bit + <<< + +; Specialization for the first sprite which can just return the vbuff address +; in a register if there is only one sprite intersecting the tile +dobit1 mac + lsr + bcc next_bit + beq last_bit + tax + lda (SPRITE_VBUFF_PTR+{]2*2}),y + sta sprite_ptr0+{]2*4} + txa + jmp ]3 +last_bit lda (SPRITE_VBUFF_PTR+{]2*2}),y + jmp ]4 +next_bit + <<< + +; Optimization discussion. In the Sprite2.s file, we calculate the VBUFF address for each tile overlapped +; by a sprite: +; +; 4 lda VBuffOrigin +; 3 adc ]2 +; 7 sta [tmp0],y +; +; and then in this macro it is loaded again and copied to the direct page. If a sprite is never drawn, this is +; wasted work (which is not too ofter since >4 sprites would need to be overlapping), but still. +; +; 6 ldy: {]1*TILE_STORE_SIZE},x +; 4 sty sprite_ptr0+{]2*4} +; +; Since we know *exactly* which sprite is being accessed, the _Sprites+TS_VBUFF_BASE,y value can be loaded without +; an index +; +; 5 lda _Sprites+TS_VBUFF_BASE+{]1*2} +; 6 adc {]1*TILE_STORE_SIZE},x +; 4 sta sprite_ptr0+{]2*4} +; 2 tya +; +; = a savings of at least (24 - 17) = 7 cycles per tile and more if the sprite is skipped. +; +; The problem is that this still required storing a value for the sprite in the tile store. What is ideal is +; if there is a way to know implicitly which relative tile offset we are on for a given sprite and use +; that to calculate the offset... +; +; What do we know +; X = current tile +; Sprite+TS_LOOKUP_INDEX +; +; txa +; sbc _Sprites+TS_LOOKUP_INDEX+{]1*2} +; tay +; lda _Sprites+TS_VBUFF_BASE+{]1*2} +; adc DisplacementTable,y +; sta sprite_ptr0+{]2*4} +; +; Have the sprite select a table base which holds the offset values, pre-adjusted for the TS_LOOKUP_INDEX. The table +; values are fixed. Yes!! This is the solution!! It will only need 288 bytes of total space +; +; Best implementation will pass the Tile Store index in Y instead of X +; +; 5 lda _Sprites+VBUFF_TABLE+{]1*2} +; 6 sta self_mod +; 6 lda $0000,x +; 4 sta sprite_ptr0+{]2*4} +; 2 tya +; +; or +; +; 5 lda _Sprites+VBUFF_TABLE+{]1*2} +; 4 sta tmp0 +; 7 lda (tmp0),y +; 4 sta sprite_ptr0+{]2*4} +; 2 txa +; +; Even better, if the VBUFF_TABLE (only 32 bytes) was already stored in the second direct page +; +; 7 lda (VBUFF_TABLE+{]1*2}),y +; 5 adc _Sprites+VBUFF_TABLE+{]1*2} +; 4 sta sprite_ptr0+{]2*4} +; 2 txa +; +; Final saving compared to current implementation is (24 - 18) = 6 cycles per tile and we eliminate +; the need to pre-calculate +; + +; If we find a last bit (4th in this case) and will exit +stpbit mac + lsr + bcc next_bit + lda (SPRITE_VBUFF_PTR+{]2*2}),y + sta sprite_ptr0+{]2*4} + jmp ]3 +next_bit + <<< + +; Last bit test which *must* be set +endbit mac + lda (SPRITE_VBUFF_PTR+{]2*2}),y + sta sprite_ptr0+{]2*4} + jmp ]3 + <<< + +; OPTIMIZATION: +; +; bit #$00FF ; Optimization to skip the first 8 bits if they are all zeros +; bne norm_entry +; xba +; jmp skip_entry +; +; Placed at the entry point + +; This is a complex, but fast subroutine that is called from the core tile rendering code. It +; Takes a bitmap of sprites in the Accumulator and then extracts the VBuff addresses for the +; target TileStore entry and places them in specific direct page locations. +; +; Inputs: +; A = sprite bitmap (assumed to be non-zero) +; Y = tile store index +; D = second work page +; B = vbuff array bank +; Output: +; X = +; +; ]1 address of single sprite process +; ]2 address of two sprite process +; ]3 address of three sprite process +; ]4 address of four sprite process + +SpriteBitsToVBuffAddrs mac + dobit1 0;0;b_1_1;]1 + dobit1 1;0;b_2_1;]1 + dobit1 2;0;b_3_1;]1 + dobit1 3;0;b_4_1;]1 + dobit1 4;0;b_5_1;]1 + dobit1 5;0;b_6_1;]1 + dobit1 6;0;b_7_1;]1 + dobit1 7;0;b_8_1;]1 + dobit1 8;0;b_9_1;]1 + dobit1 9;0;b_10_1;]1 + dobit1 10;0;b_11_1;]1 + dobit1 11;0;b_12_1;]1 + dobit1 12;0;b_13_1;]1 + dobit1 13;0;b_14_1;]1 + dobit1 14;0;b_15_1;]1 + endbit 15;0;]1 + +b_1_1 dobit 1;1;b_2_2;]2 +b_2_1 dobit 2;1;b_3_2;]2 +b_3_1 dobit 3;1;b_4_2;]2 +b_4_1 dobit 4;1;b_5_2;]2 +b_5_1 dobit 5;1;b_6_2;]2 +b_6_1 dobit 6;1;b_7_2;]2 +b_7_1 dobit 7;1;b_8_2;]2 +b_8_1 dobit 8;1;b_9_2;]2 +b_9_1 dobit 9;1;b_10_2;]2 +b_10_1 dobit 10;1;b_11_2;]2 +b_11_1 dobit 11;1;b_12_2;]2 +b_12_1 dobit 12;1;b_13_2;]2 +b_13_1 dobit 13;1;b_14_2;]2 +b_14_1 dobit 14;1;b_15_2;]2 +b_15_1 endbit 15;1;]2 + +b_2_2 dobit 2;2;b_3_3;]3 +b_3_2 dobit 3;2;b_4_3;]3 +b_4_2 dobit 4;2;b_5_3;]3 +b_5_2 dobit 5;2;b_6_3;]3 +b_6_2 dobit 6;2;b_7_3;]3 +b_7_2 dobit 7;2;b_8_3;]3 +b_8_2 dobit 8;2;b_9_3;]3 +b_9_2 dobit 9;2;b_10_3;]3 +b_10_2 dobit 10;2;b_11_3;]3 +b_11_2 dobit 11;2;b_12_3;]3 +b_12_2 dobit 12;2;b_13_3;]3 +b_13_2 dobit 13;2;b_14_3;]3 +b_14_2 dobit 14;2;b_15_3;]3 +b_15_2 endbit 15;2;]3 + +b_3_3 stpbit 3;3;]4 +b_4_3 stpbit 4;3;]4 +b_5_3 stpbit 5;3;]4 +b_6_3 stpbit 6;3;]4 +b_7_3 stpbit 7;3;]4 +b_8_3 stpbit 8;3;]4 +b_9_3 stpbit 9;3;]4 +b_10_3 stpbit 10;3;]4 +b_11_3 stpbit 11;3;]4 +b_12_3 stpbit 12;3;]4 +b_13_3 stpbit 13;3;]4 +b_14_3 stpbit 14;3;]4 +b_15_3 endbit 15;3;]4 + <<< \ No newline at end of file diff --git a/src/Tool.s b/src/Tool.s index b3be07a..1f4315f 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -52,8 +52,23 @@ _CallTable adrl _TSRender-1 adrl _TSLoadTileSet-1 adrl _TSCreateSpriteStamp-1 + adrl _TSAddSprite-1 + adrl _TSMoveSprite-1 + adrl _TSUpdateSprite-1 + adrl _TSRemoveSprite-1 _CTEnd - +_GTEAddSprite MAC + UserTool $1000+GTEToolNum + <<< +_GTEMoveSprite MAC + UserTool $1100+GTEToolNum + <<< +_GTEUpdateSprite MAC + UserTool $1200+GTEToolNum + <<< +_GTERemoveSprite MAC + UserTool $1300+GTEToolNum + <<< ; Helper function to set the data back to the toolset default _SetDataBank sep #$20 lda #^TileStore @@ -285,6 +300,21 @@ _TSAddSprite _TSExit #0;#8 +_TSMoveSprite +:spriteY equ FirstParam+0 +:spriteX equ FirstParam+2 +:spriteSlot equ FirstParam+4 + _TSEntry + + lda :spriteX,s + tax + lda :spriteY,s + tay + lda :spriteSlot,s + jsr _MoveSprite + + _TSExit #0;#6 + _TSUpdateSprite :vbuff equ FirstParam+0 :spriteFlags equ FirstParam+2 @@ -300,6 +330,15 @@ _TSUpdateSprite _TSExit #0;#6 +_TSRemoveSprite +:spriteSlot equ FirstParam+0 + _TSEntry + + lda :spriteSlot,s + jsr _UpdateSprite + + _TSExit #0;#2 + ; Insert the GTE code put Math.s diff --git a/src/blitter/TemplateUtils.s b/src/blitter/TemplateUtils.s index 56b60bf..88852bb 100644 --- a/src/blitter/TemplateUtils.s +++ b/src/blitter/TemplateUtils.s @@ -73,7 +73,7 @@ Counter equ tmp3 ; Patch an 8-bit or 16-bit valueS into the bank. These are a set up unrolled loops to -; quickly patch in a constanct value, or a value from an array into a given set of +; quickly patch in a constant value, or a value from an array into a given set of ; templates. ; ; Because we have structured everything as parallel code blocks, most updates to the blitter diff --git a/src/blitter/TileProcs.s b/src/blitter/TileProcs.s index 5fe0b29..f40b634 100644 --- a/src/blitter/TileProcs.s +++ b/src/blitter/TileProcs.s @@ -27,6 +27,18 @@ _TBCopyTileDataAndMaskToCBuffV jsr _TBCopyTileDataToCBuffV jmp _TBCopyTileMaskToCBuffV +_CopyTileDataToDP2 +]line equ 0 + lup 8 + ldal tiledata+{]line*4},x + sta tmp_tile_data+{]line*4} + + ldal tiledata+{]line*4}+2,x + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + rts + _TBCopyTileDataToCBuff ]line equ 0 lup 8 diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s index 7a6f197..b1779b0 100644 --- a/src/blitter/Tiles.s +++ b/src/blitter/Tiles.s @@ -84,18 +84,6 @@ CopyNoSprites :tiledisp jmp $0000 ; render the tile -; Let's make a macro helper for the bit test tree -; dobit src_offset,dest,next_target,end_target -dobit MAC - beq last_bit - ldx: ]1,y - stx ]2 - jmp ]3 -last_bit ldx: ]1,y - stx ]2 - jmp ]4 - EOM - ; The sprite code is just responsible for quickly copying all of the sprite data ; into the direct page temp area. @@ -117,83 +105,9 @@ dirty_sprite_dispatch da CopyThreeSprites da CopyFourSprites ; MAX, don't bother with more than 4 sprites per tile -; This is very similar to the code in the dirty tile renderer, but we can't reuse -; because that code draws directly to the graphics screen, and this code draws -; to a temporary buffer that has a different stride. - -; ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile -; -; lsr -; bcc :loop_0_bit_1 -; dobit $0000;sprite_ptr0;:loop_1_bit_1;CopyOneSprite - -;:loop_0_bit_1 lsr -; bcc :loop_0_bit_2 -; dobit $0002;sprite_ptr0;:loop_1_bit_2;CopyOneSprite - -;:loop_0_bit_2 lsr -; bcc :loop_0_bit_3 -; dobit $0004;sprite_ptr0;:loop_1_bit_3;CopyOneSprite - -;:loop_0_bit_3 lsr -; bcc :loop_0_bit_4 -; dobit $0006;sprite_ptr0;:loop_1_bit_4;CopyOneSprite - -;:loop_0_bit_4 lsr -; bcc :loop_0_bit_5 -; dobit $0008;sprite_ptr0;:loop_1_bit_5;CopyOneSprite - -;:loop_0_bit_5 lsr -; bcc :loop_0_bit_6 -; dobit $000A;sprite_ptr0;:loop_1_bit_6;CopyOneSprite - -;:loop_0_bit_6 lsr -; bcc :loop_0_bit_7 -; dobit $000C;sprite_ptr0;:loop_1_bit_7;CopyOneSprite - -;:loop_0_bit_7 lsr -; bcc :loop_0_bit_8 -; dobit $000E;sprite_ptr0;:loop_1_bit_8;CopyOneSprite - -;:loop_0_bit_8 lsr -; bcc :loop_0_bit_9 -; dobit $0010;sprite_ptr0;:loop_1_bit_9;CopyOneSprite - -;:loop_0_bit_9 lsr -; bcc :loop_0_bit_10 -; ldx: $0012,y -; stx spriteIdx -; cmp #0 -; jne :loop_1_bit_10 -; jmp CopyOneSprite - -;:loop_0_bit_10 lsr -; bcc :loop_0_bit_11 -; dobit $0014;sprite_ptr0;:loop_1_bit_11;CopyOneSprite - -;:loop_0_bit_11 lsr -; bcc :loop_0_bit_12 -; dobit $0016;sprite_ptr0;:loop_1_bit_12;CopyOneSprite - -;:loop_0_bit_12 lsr -; bcc :loop_0_bit_13 -; dobit $0018;sprite_ptr0;:loop_1_bit_13;CopyOneSprite - -;:loop_0_bit_13 lsr -; bcc :loop_0_bit_14 -; dobit $001A;sprite_ptr0;:loop_1_bit_14;CopyOneSprite - -;:loop_0_bit_14 lsr -; bcc :loop_0_bit_15 -; dobit $001C;sprite_ptr0;:loop_1_bit_15;CopyOneSprite - -;:loop_0_bit_15 ldx: $001E,y -; stx spriteIdx -; jmp CopyOneSprite - ; We can optimize later, for now just copy the sprite data and mask into its own ; direct page buffer and combine with the tile data later - +; ; We set up direct page pointers to the mask bank and use the bank register for the ; data. CopyFourSprites diff --git a/src/blitter/Tiles10000.s b/src/blitter/Tiles10000.s index edc5655..d56861a 100644 --- a/src/blitter/Tiles10000.s +++ b/src/blitter/Tiles10000.s @@ -30,7 +30,6 @@ _TBFastSpriteTile_VH ; Need to update the X-register before calling this _TBApplySpriteData ldx _SPR_X_REG ; set to the unaligned tile block address in the sprite plane - ]line equ 0 lup 8 lda blttmp+{]line*4} diff --git a/src/static/TileStore.s b/src/static/TileStore.s index b586ed6..f03cf02 100644 --- a/src/static/TileStore.s +++ b/src/static/TileStore.s @@ -2,18 +2,15 @@ put ../Defs.s put TileStoreDefs.s - put ../blitter/Template.s ;------------------------------------------------------------------------------------- ; -; Buffer space - - ds 256 + put ../blitter/Template.s ;------------------------------------------------------------------------------------- TileStore ENT - ds {TILE_STORE_SIZE*17} + ds {TILE_STORE_SIZE*TILE_STORE_NUM} ;------------------------------------------------------------------------------------- ; @@ -23,7 +20,7 @@ TileStore ENT DirtyTileCount ENT ds 2 DirtyTiles ENT - ds TILE_STORE_SIZE ; At most this many tiles can possibly be update at once + ds TILE_STORE_SIZE ; At most this many tiles can possibly be updated at once ;------------------------------------------------------------------------------------- ; @@ -373,9 +370,15 @@ ScreenModeWidth ENT ScreenModeHeight ENT dw 200,192,200,176,160,160,160,128,144,192,102,1 -; List of addresses of the VBuff arrays for each Tile Store entry, indexed by sprite index -VBuffArrayAddr ENT - ds MAX_SPRITES*2 +; VBuff arrays for each sprite. We need at least a 3x3 block for each sprite and the shape of the +; array must match the TileStore structure. The TileStore is 41 blocks wide. To keep things simple +; we allocate 8 sprites in the first row and 8 more sprites in the 4th row. So we need to allocate a +; total of 6 rows of TileStore space +; +; It is *critical* that this array be placed in a memory location that is greated than the largest +; TileStore offset. +VBuffArray ENT + ds 6*{TILE_STORE_WIDTH*2} ; Convert sprite index to a bit position _SpriteBits ENT diff --git a/src/static/TileStoreDefs.s b/src/static/TileStoreDefs.s index b855619..2ab791a 100644 --- a/src/static/TileStoreDefs.s +++ b/src/static/TileStoreDefs.s @@ -14,24 +14,17 @@ TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000. TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender. -;TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank - TS_BASE_TILE_COPY equ TILE_STORE_SIZE*9 ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function -; Hold values for up to 4 sprites per tile -TS_VBUFF_ADDR_0 equ TILE_STORE_SIZE*12 -TS_VBUFF_ADDR_1 equ TILE_STORE_SIZE*13 -TS_VBUFF_ADDR_2 equ TILE_STORE_SIZE*14 -TS_VBUFF_ADDR_3 equ TILE_STORE_SIZE*15 -TS_VBUFF_ADDR_COUNT equ TILE_STORE_SIZE*16 ; replace usage of TS_VBUFF_ARRAY_ADDR with this later +TILE_STORE_NUM equ 12 ; Need this many parallel arrays ; Sprite data structures. We cache quite a few pieces of information about the sprite ; to make calculations faster, so this is hidden from the caller. MAX_SPRITES equ 16 -SPRITE_REC_SIZE equ 52 +SPRITE_REC_SIZE equ 42 ; Mark each sprite as ADDED, UPDATED, MOVED, REMOVED depending on the actions applied to it ; on this frame. Quick note, the same Sprite ID cannot be removed and added in the same frame. @@ -68,6 +61,7 @@ SPRITE_HEIGHT equ {MAX_SPRITES*32} SPRITE_CLIP_WIDTH equ {MAX_SPRITES*34} SPRITE_CLIP_HEIGHT equ {MAX_SPRITES*36} TS_VBUFF_BASE equ {MAX_SPRITES*38} ; Finalized VBUFF address based on the sprite position and tile offsets +VBUFF_ARRAY_ADDR equ {MAX_SPRITES*40} ; Fixed address where this sprite's VBUFF addresses are stores. The array is the same shape as TileStore, but much smaller ;TILE_DATA_OFFSET equ {MAX_SPRITES*2} ;TILE_STORE_ADDR_1 equ {MAX_SPRITES*12} ;TILE_STORE_ADDR_2 equ {MAX_SPRITES*14} diff --git a/src/tiles/FastRenderer.s b/src/tiles/FastRenderer.s index 780d319..298c100 100644 --- a/src/tiles/FastRenderer.s +++ b/src/tiles/FastRenderer.s @@ -6,8 +6,8 @@ ; If there are sprites, then the sprite data is flattened and stored into a direct page buffer ; and then copied into the code field _RenderTileFast -; lda TileStore+TS_VBUFF_ADDR_COUNT,x ; How many sprites are on this tile? -; bne SpriteDispatch ; This is faster if there are no sprites + lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line? + bne SpriteDispatch NoSpriteFast lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line @@ -15,7 +15,7 @@ NoSpriteFast ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field lda TileStore+TS_BASE_TILE_DISP,x ; go to the tile copy routine (just basics) stal nsf_patch+1 - lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) plb ; set the code field bank nsf_patch jmp $0000 @@ -24,35 +24,60 @@ nsf_patch jmp $0000 FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataFast,_TBCopyDataFast ; dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast -SpriteDispatch - tax - jmp (:,x) ; Dispatch to the other routines -: da NoSpriteFast ; Placeholder - da OneSpriteFast - da TwoSpritesFast - da ThreeSpritesFast - da FourSpritesFast +; NOTE: Inlining the dispatch would eliminate a JSR,RTS,LDX, and JMP (abs,x) because the exit code +; could jump directly to the target address. Net savings of 20 cycles per tile. For a 16x16 +; sprite with a 3x3 block coverage this is 180 cycles per frame per block... This would also +; preserve a register +; +; For comparison, a fast one sprite copy takes 22 cycles per word, so this would save +; about 1/2 block of render time per tile. +; +; Need to determine if the sprite or tile data is on top, as that will decide whether the +; sprite or tile data is copied into the temporary buffer first. Also, if TWO_LAYER is set +; then the mask information must be copied as well....This is the last decision point. -; Pointers to sprite data and masks -spritedata_0 equ tmp0 -spritedata_1 equ tmp2 -spritedata_2 equ tmp4 -spritedata_3 equ tmp6 -spritemask_0 equ tmp8 -spritemask_1 equ tmp10 -spritemask_2 equ tmp12 -spritemask_3 equ tmp14 +SpriteDispatch + txy + SpriteBitsToVBuffAddrs OneSpriteFast;OneSpriteFast;OneSpriteFast;OneSpriteFast + sta sprite_ptr0 + ldx TileStore+TS_TILE_ADDR,y + jsr _CopyTileDataToDP2 ; preserves Y + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldx sprite_ptr0 ; address of sprite vbuff info + lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field + tay +; jmp _TBApplySpriteData2 + +_TBApplySpriteData2 +]line equ 0 + lup 8 + lda blttmp+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts ; Where there are sprites involved, the first step is to call a routine to copy the ; tile data into a temporary buffer. Then the sprite data is merged and placed into ; the code field. +; +; A = vbuff address +; Y = tile store address OneSpriteFast - tyx - lda TileStore+TS_TILE_ADDR,y - per :-1 - jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer -: - ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data + tax ; address of the sprite data + lda TileStore+TS_BASE_TILE_COPY,y ; copy routine (handles flips and other behaviors) + stal osf_copy+1 +osf_copy jsr $0000 + +; ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line pha ; and put on the stack for later. lda TileStore+TS_CODE_ADDR_LOW,y @@ -71,17 +96,17 @@ OneSpriteFast rts TwoSpritesFast - tyx - lda TileStore+TS_TILE_ADDR,y - per :-1 - jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer -: - lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data - sta spritedata_0 - sta spritemask_0 - lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data - sta spritedata_1 - sta spritemask_1 +; tyx +; lda TileStore+TS_TILE_ADDR,y +; per :-1 +; jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer +;: +; lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data +; sta spritedata_0 +; sta spritemask_0 +; lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data +; sta spritedata_1 +; sta spritemask_1 lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line pha ; and put on the stack for later. @@ -89,33 +114,33 @@ TwoSpritesFast tay plb ; set the code field bank - TwoSpritesToCodeField 0 - TwoSpritesToCodeField 1 - TwoSpritesToCodeField 2 - TwoSpritesToCodeField 3 - TwoSpritesToCodeField 4 - TwoSpritesToCodeField 5 - TwoSpritesToCodeField 6 - TwoSpritesToCodeField 7 +; TwoSpritesToCodeField 0 +; TwoSpritesToCodeField 1 +; TwoSpritesToCodeField 2 +; TwoSpritesToCodeField 3 +; TwoSpritesToCodeField 4 +; TwoSpritesToCodeField 5 +; TwoSpritesToCodeField 6 +; TwoSpritesToCodeField 7 rts ThreeSpritesFast FourSpritesFast - tyx - lda TileStore+TS_TILE_ADDR,y - per :-1 - jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer -: - lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data - sta spritedata_0 - sta spritemask_0 - lda TileStore+TS_VBUFF_ADDR_1,y - sta spritedata_1 - sta spritemask_1 - lda TileStore+TS_VBUFF_ADDR_2,y - sta spritedata_2 - sta spritemask_2 +; tyx +; lda TileStore+TS_TILE_ADDR,y +; per :-1 +; jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer +;: +; lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data +; sta spritedata_0 +; sta spritemask_0 +; lda TileStore+TS_VBUFF_ADDR_1,y +; sta spritedata_1 +; sta spritemask_1 +; lda TileStore+TS_VBUFF_ADDR_2,y +; sta spritedata_2 +; sta spritemask_2 lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line pha ; and put on the stack for later. @@ -123,13 +148,13 @@ FourSpritesFast tay plb ; set the code field bank - ThreeSpritesToCodeField 0 - ThreeSpritesToCodeField 1 - ThreeSpritesToCodeField 2 - ThreeSpritesToCodeField 3 - ThreeSpritesToCodeField 4 - ThreeSpritesToCodeField 5 - ThreeSpritesToCodeField 6 - ThreeSpritesToCodeField 7 +; ThreeSpritesToCodeField 0 +; ThreeSpritesToCodeField 1 +; ThreeSpritesToCodeField 2 +; ThreeSpritesToCodeField 3 +; ThreeSpritesToCodeField 4 +; ThreeSpritesToCodeField 5 +; ThreeSpritesToCodeField 6 +; ThreeSpritesToCodeField 7 rts \ No newline at end of file