mirror of
https://github.com/lscharen/iigs-game-engine.git
synced 2024-11-25 15:32:59 +00:00
Another checkpoint; converging on the working implementation
This commit is contained in:
parent
7909113a97
commit
78d7dafe14
@ -11,6 +11,7 @@
|
||||
"crossrunner": "C:\\Programs\\Crossrunner\\Crossrunner.exe"
|
||||
},
|
||||
"scripts": {
|
||||
"archive": "%npm_package_config_cadius% EXTRACTFILE ",
|
||||
"test": "npm run build && build-image.bat %npm_package_config_cadius% && %npm_package_config_gsport%",
|
||||
"build": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Master.s",
|
||||
"build:debug": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Debug.s",
|
||||
|
@ -93,7 +93,7 @@ _CoreStartUp
|
||||
jsr EngineReset ; All of the resources are allocated, put the engine in a known state
|
||||
|
||||
jsr InitGraphics ; Initialize all of the graphics-related data
|
||||
; jsr InitSprites ; Initialize the sprite subsystem
|
||||
jsr InitSprites ; Initialize the sprite subsystem
|
||||
jsr InitTiles ; Initialize the tile subsystem
|
||||
|
||||
jsr InitTimers ; Initialize the timer subsystem
|
||||
|
@ -96,7 +96,7 @@ LastKey equ 116
|
||||
LastTick equ 118
|
||||
ForceSpriteFlag equ 120
|
||||
|
||||
VBuffArrayPtr equ 122
|
||||
;VBuffArrayPtr equ 122
|
||||
SpriteRemovedFlag equ 126 ; Indicate if any sprites were removed this frame
|
||||
|
||||
activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames)
|
||||
@ -149,6 +149,8 @@ _TILE_ID equ 158 ; Copy of the tile descriptor
|
||||
; FREE_SPACE_DP2 equ 160
|
||||
DP2_DIRTY_TILE_COUNT equ 160 ; Local copy of dirty tile count to avoid banking
|
||||
DP2_DIRTY_TILE_CALLBACK equ 162
|
||||
|
||||
SPRITE_VBUFF_PTR equ 224 ; 32 bytes of adjusted pointers to VBuffArray addresses
|
||||
; End direct page values
|
||||
|
||||
; EngineMode definitions
|
||||
@ -242,4 +244,4 @@ ScreenModeWidth EXT
|
||||
ScreenModeHeight EXT
|
||||
_SpriteBits EXT
|
||||
_SpriteBitsNot EXT
|
||||
VBuffArrayAddr EXT
|
||||
VBuffArray EXT
|
||||
|
33
src/Render.s
33
src/Render.s
@ -102,22 +102,22 @@ _Render
|
||||
|
||||
; The _ApplyTilesFast is the same as _ApplyTiles, but we use the _RenderTileFast subroutine
|
||||
_ApplyTilesFast
|
||||
ldx DirtyTileCount
|
||||
|
||||
tdc
|
||||
clc
|
||||
adc #$100 ; move to the next page
|
||||
tcd
|
||||
|
||||
lda DirtyTileCount ; Cache the dirty tile count
|
||||
sta DP2_DIRTY_TILE_COUNT
|
||||
|
||||
stx DP2_DIRTY_TILE_COUNT ; Cache the dirty tile count
|
||||
jsr _PopDirtyTilesFast
|
||||
|
||||
stz DirtyTileCount
|
||||
|
||||
tdc ; Move back to the original direct page
|
||||
sec
|
||||
sbc #$100
|
||||
tcd
|
||||
|
||||
stz DirtyTileCount ; Reset the dirty tile count
|
||||
rts
|
||||
|
||||
; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code
|
||||
@ -198,17 +198,10 @@ _ApplyDirtyTiles
|
||||
|
||||
; Only render solid tiles and sprites
|
||||
_RenderDirtyTile
|
||||
ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile?
|
||||
lda TileStore+TS_SPRITE_FLAG,y
|
||||
beq NoSpritesDirty ; This is faster if there are no sprites
|
||||
|
||||
lda TileStore+TS_TILE_ID,y ; Check if the tile has
|
||||
jmp (dirty_dispatch,x)
|
||||
dirty_dispatch
|
||||
da NoSpritesDirty
|
||||
da OneSpriteDirty
|
||||
da TwoSpritesDirty
|
||||
da ThreeSpritesDirty
|
||||
da FourSpritesDirty
|
||||
; TODO: handle sprite drawing
|
||||
|
||||
; The rest of this function handles that non-sprite blit, which is super fast since it blits directly from the
|
||||
; tile data store to the graphics screen with no masking. The only extra work is selecting a blit function
|
||||
@ -218,12 +211,12 @@ dirty_dispatch
|
||||
; Y is set to the top-left address of the tile in SHR screen
|
||||
; A is set to the address of the tile data
|
||||
NoSpritesDirty
|
||||
tyx
|
||||
ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile
|
||||
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
|
||||
lda TileStore+TS_DIRTY_TILE_DISP,y
|
||||
stal :nsd+1
|
||||
ldx TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile
|
||||
lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
|
||||
plb ; set the code field bank
|
||||
jmp (TileStore+TS_DIRTY_TILE_DISP,x) ; go to the tile copy routine (just basics)
|
||||
|
||||
:nsd jmp $0000
|
||||
; Use some temporary space for the spriteIdx array (maximum of 4 entries)
|
||||
|
||||
stkSave equ tmp9
|
||||
@ -240,7 +233,7 @@ ThreeSpritesDirty
|
||||
TwoSpritesDirty
|
||||
|
||||
sta tileAddr
|
||||
sty screenAddr
|
||||
stx screenAddr
|
||||
|
||||
plb
|
||||
tsc
|
||||
|
47
src/Sprite.s
47
src/Sprite.s
@ -19,19 +19,32 @@ InitSprites
|
||||
cpx #$FFFE
|
||||
bne :loop2
|
||||
|
||||
; Clear values in the sprite array
|
||||
; Set the VBuff array addresses for each sprite, since they're static
|
||||
|
||||
; ldx #{MAX_SPRITES-1}*2
|
||||
;:loop3 stz _Sprites+TILE_STORE_ADDR_1,x
|
||||
; dex
|
||||
; dex
|
||||
; bpl :loop3
|
||||
ldx #0
|
||||
lda #VBuffArray
|
||||
:loop3 sta _Sprites+VBUFF_ARRAY_ADDR,x
|
||||
clc
|
||||
adc #4*2 ; skip ahead 4 tiles
|
||||
inx
|
||||
inx
|
||||
cpx #8*2
|
||||
bcc :loop3
|
||||
|
||||
; Now do the second set of sprites
|
||||
lda #VBuffArray+{3*{TILE_STORE_WIDTH*2}}
|
||||
:loop4 sta _Sprites+VBUFF_ARRAY_ADDR,x
|
||||
clc
|
||||
adc #4*2 ; skip ahead 4 tiles
|
||||
inx
|
||||
inx
|
||||
cpx #8*2
|
||||
bcc :loop4
|
||||
|
||||
; Precalculate some bank values
|
||||
jsr _CacheSpriteBanks
|
||||
rts
|
||||
|
||||
|
||||
; _RenderSprites
|
||||
;
|
||||
; The function is responsible for updating all of the rendering information based on any changes
|
||||
@ -59,7 +72,7 @@ InitSprites
|
||||
; a. If it is not marked in the DirtyTile list
|
||||
; * Clear its bit from the TileStore's TS_SPRITE_FLAG
|
||||
; * Add the tile to the DirtyTile list
|
||||
;
|
||||
;t
|
||||
; 2. If a sprite is marked as SPRITE_STATUS_REMOVED, then
|
||||
; A. Clear its bit from the SpriteBits bitmap
|
||||
; B. For each tile the sprite overlaps with:
|
||||
@ -134,6 +147,8 @@ _DoPhase1
|
||||
lda _SpriteBits,y ; Clear from the sprite bitmap
|
||||
sta SpriteRemovedFlag ; Stick a non-zero value here
|
||||
trb SpriteMap
|
||||
lda #SPRITE_STATUS_EMPTY ; Mark as empty so no error if we try to Add a sprite here again
|
||||
sta _Sprites+SPRITE_STATUS,y
|
||||
|
||||
jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done
|
||||
|
||||
@ -154,15 +169,6 @@ _DoPhase1
|
||||
:no_move
|
||||
jmp _MarkDirtySpriteTiles
|
||||
|
||||
; Once all of the sprite values have been calculated, we need to scan the dirty tile list and
|
||||
; collapse the sprite information down to no more than 4 vbuff references per tile. We used to
|
||||
; do this on the fly in the renderer, but that required differentiating between tile with and
|
||||
; without sprites in the core rendering function. My lifting this up, we simplify the core code
|
||||
; and possible open up some optimization opportunities.
|
||||
_SetTileStoreVBuffAddrs
|
||||
|
||||
|
||||
|
||||
; Dispatch table. It's unintersting, so it's tucked out of the way
|
||||
phase1 dw :phase1_0
|
||||
dw :phase1_1,:phase1_2,:phase1_3,:phase1_4
|
||||
@ -200,8 +206,8 @@ phase1 dw :phase1_0
|
||||
:phase1_2 ldy activeSpriteList+2
|
||||
jsr _DoPhase1
|
||||
:phase1_1 ldy activeSpriteList
|
||||
jsr _DoPhase1
|
||||
:phase1_0 jmp _SetTileStoreVBuffAddrs
|
||||
jmp _DoPhase1
|
||||
:phase1_0 rts
|
||||
|
||||
; Utility function to calculate the difference in tile positions between a sprite's current
|
||||
; position and it's previous position. This gets interesting because the number of tiles
|
||||
@ -299,9 +305,6 @@ _AddSprite
|
||||
lda _SpriteBits,x ; Get the bit flag for this sprite slot
|
||||
tsb SpriteMap ; Mark it in the sprite map bit field
|
||||
|
||||
; txa ; And return the sprite ID
|
||||
; clc ; Mark that the sprite was successfully added
|
||||
|
||||
rts
|
||||
|
||||
; Macro to make the unrolled loop more concise
|
||||
|
149
src/Sprite2.s
149
src/Sprite2.s
@ -66,14 +66,11 @@ _CalcDirtySprite
|
||||
clc
|
||||
lda _Sprites+SPRITE_CLIP_TOP,y
|
||||
adc StartYMod208 ; Adjust for the scroll offset
|
||||
pha ; Cache
|
||||
tax ; Cache
|
||||
and #$FFF8 ; mask first to ensure LSR will clear the carry
|
||||
lsr
|
||||
lsr
|
||||
tax
|
||||
lda TileStoreLookupYTable,x ; Even numbers from [0, 100] (50 elements)
|
||||
sta RowTop
|
||||
pla
|
||||
sta RowTop ; Even numbers from [0, 100] (50 elements)
|
||||
|
||||
; Get the position of the top edge within the tile and then add it to the sprite's height
|
||||
; to calculate the number of tiles that are overlapped. We use the actual width and height
|
||||
@ -90,7 +87,7 @@ _CalcDirtySprite
|
||||
and #$0018
|
||||
sta AreaIndex
|
||||
|
||||
txa ; Get the verical offset in the VBUFF memory
|
||||
txa ; Get the vertical offset in the VBUFF memory
|
||||
asl
|
||||
tax
|
||||
ldal :vbuff_mul,x
|
||||
@ -108,13 +105,19 @@ _CalcDirtySprite
|
||||
adc RowTop
|
||||
sta _Sprites+TS_LOOKUP_INDEX,y ; This is the index into the TileStoreLookup table
|
||||
|
||||
; Create an offset value for loading the calculated VBUFF addresses within the core renderer
|
||||
|
||||
eor #$FFFF
|
||||
sec
|
||||
adc _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
sta tmp1 ; Spill this value to direct page temp space
|
||||
|
||||
; Calculate the final address of the sprite data in the stamp buffer. We have to move earlier
|
||||
; in the buffer based on the horizontal offset and move up for each vertical offset.
|
||||
|
||||
txa
|
||||
and #$0003
|
||||
tax
|
||||
|
||||
adc tmp0 ; add to the vertical offset
|
||||
|
||||
; Subtract this value from the SPRITE_DISP address
|
||||
@ -134,12 +137,17 @@ _CalcDirtySprite
|
||||
and #$000C
|
||||
lsr ; max value = 4 = 0x04
|
||||
ora AreaIndex ; merge into the area index
|
||||
sta _Sprites+TS_COVERAGE_SIZE,y ; Save this value as a key to the coverage size of the sprite
|
||||
|
||||
; No need to copy the TileStore addresses into the Sprite's TILE_STORE_ADDR values. Just
|
||||
; hold a copy of the corner offset into the lookup table and the sprite's size in tiles.
|
||||
; Then, when we need to erase we can just lookup the values in the TileStoreLookup table.
|
||||
; Last task. Since we don't need to use the X-register to cache values; load the direct page 2
|
||||
; offset for the SPRITE_VBUFF_PTR and save it
|
||||
|
||||
tya
|
||||
ora #$100
|
||||
tax
|
||||
lda tmp1
|
||||
sta SPRITE_VBUFF_PTR,x
|
||||
|
||||
sta _Sprites+TS_COVERAGE_SIZE,y
|
||||
mdsOut rts
|
||||
|
||||
|
||||
@ -147,12 +155,6 @@ mdsOut rts
|
||||
; parallel structure to the Tile Store. This allows up to use the same TileStoreLookup offset
|
||||
; to index into the array of 16 sprite VBUFF addresses that are bound to a given tile
|
||||
_MarkDirtySpriteTiles
|
||||
lda VBuffArrayAddr,y ; Get the base address for the TileStore VBuff array for this sprite
|
||||
sta VBuffArrayPtr
|
||||
|
||||
lda _Sprites+TS_VBUFF_BASE,y ; This is the final upper-left cornder for this frame
|
||||
sta VBuffOrigin
|
||||
|
||||
lda _SpriteBits,y
|
||||
sta SpriteBit
|
||||
|
||||
@ -184,10 +186,6 @@ TSSetSprite mac
|
||||
ora TileStore+TS_SPRITE_FLAG,y
|
||||
sta TileStore+TS_SPRITE_FLAG,y
|
||||
|
||||
lda VBuffOrigin
|
||||
adc ]2
|
||||
sta [tmp0],y ; This is *very* carefully constructed....
|
||||
|
||||
lda TileStore+TS_DIRTY,y
|
||||
bne next
|
||||
|
||||
@ -203,31 +201,68 @@ TSSetSprite mac
|
||||
next
|
||||
<<<
|
||||
|
||||
ROW equ TILE_STORE_WIDTH*2
|
||||
COL equ TILE_DATA_SPAN
|
||||
|
||||
:mark1x1
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y ; get the address of this sprite's vbuff values
|
||||
lda _Sprites+TS_VBUFF_BASE,y ; get the starting vbuff address
|
||||
sta: {0*ROW}+{0*COL},x ; Put in the vbuff address
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2};#0
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}
|
||||
rts
|
||||
|
||||
:mark1x2
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2
|
||||
rts
|
||||
|
||||
:mark1x3
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{2*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4
|
||||
rts
|
||||
|
||||
:mark2x1
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0
|
||||
TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0
|
||||
rts
|
||||
|
||||
:mark2x2
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{1*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
@ -236,6 +271,20 @@ next
|
||||
rts
|
||||
|
||||
:mark2x3
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{2*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES}
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{2*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
@ -246,6 +295,14 @@ next
|
||||
rts
|
||||
|
||||
:mark3x1
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES
|
||||
sta: {2*ROW}+{0*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
@ -253,6 +310,20 @@ next
|
||||
rts
|
||||
|
||||
:mark3x2
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-VBUFF_TILE_COL_BYTES
|
||||
sta: {2*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {2*ROW}+{1*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
@ -263,6 +334,26 @@ next
|
||||
rts
|
||||
|
||||
:mark3x3
|
||||
ldx _Sprites+VBUFF_ARRAY_ADDR,y
|
||||
lda _Sprites+TS_VBUFF_BASE,y
|
||||
sta: {0*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {0*ROW}+{2*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES}
|
||||
sta: {1*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {1*ROW}+{2*COL},x
|
||||
adc #VBUFF_TILE_ROW_BYTES-{2*VBUFF_TILE_COL_BYTES}
|
||||
sta: {2*ROW}+{0*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {2*ROW}+{1*COL},x
|
||||
adc #VBUFF_TILE_COL_BYTES
|
||||
sta: {2*ROW}+{2*COL},x
|
||||
|
||||
ldx _Sprites+TS_LOOKUP_INDEX,y
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES}
|
||||
TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES}
|
||||
|
216
src/Tiles.s
216
src/Tiles.s
@ -120,14 +120,6 @@ InitTiles
|
||||
; lda TileProcs ; Same for non-dirty, non-sprite base case
|
||||
; stal TileStore+TS_BASE_TILE_DISP,x
|
||||
|
||||
; *** DEPRECATED ***
|
||||
; lda :vbuff ; array of sprite vbuff addresses per tile
|
||||
; stal TileStore+TS_VBUFF_ARRAY_ADDR,x
|
||||
; clc
|
||||
; adc #32
|
||||
; sta :vbuff
|
||||
; *** ********** ***
|
||||
|
||||
; The next set of values are constants that are simply used as cached parameters to avoid needing to
|
||||
; calculate any of these values during tile rendering
|
||||
|
||||
@ -267,3 +259,211 @@ _SetBG0YPos
|
||||
|
||||
stx OldStartY ; First change, so preserve the value
|
||||
:out rts
|
||||
|
||||
; Macro helper for the bit test tree
|
||||
; dobit bit_position,dest;next;exit
|
||||
dobit mac
|
||||
lsr
|
||||
bcc next_bit
|
||||
beq last_bit
|
||||
tax
|
||||
lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
sta sprite_ptr0+{]2*4}
|
||||
txa
|
||||
jmp ]3
|
||||
last_bit lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
sta sprite_ptr0+{]2*4}
|
||||
jmp ]4
|
||||
next_bit
|
||||
<<<
|
||||
|
||||
; Specialization for the first sprite which can just return the vbuff address
|
||||
; in a register if there is only one sprite intersecting the tile
|
||||
dobit1 mac
|
||||
lsr
|
||||
bcc next_bit
|
||||
beq last_bit
|
||||
tax
|
||||
lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
sta sprite_ptr0+{]2*4}
|
||||
txa
|
||||
jmp ]3
|
||||
last_bit lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
jmp ]4
|
||||
next_bit
|
||||
<<<
|
||||
|
||||
; Optimization discussion. In the Sprite2.s file, we calculate the VBUFF address for each tile overlapped
|
||||
; by a sprite:
|
||||
;
|
||||
; 4 lda VBuffOrigin
|
||||
; 3 adc ]2
|
||||
; 7 sta [tmp0],y
|
||||
;
|
||||
; and then in this macro it is loaded again and copied to the direct page. If a sprite is never drawn, this is
|
||||
; wasted work (which is not too ofter since >4 sprites would need to be overlapping), but still.
|
||||
;
|
||||
; 6 ldy: {]1*TILE_STORE_SIZE},x
|
||||
; 4 sty sprite_ptr0+{]2*4}
|
||||
;
|
||||
; Since we know *exactly* which sprite is being accessed, the _Sprites+TS_VBUFF_BASE,y value can be loaded without
|
||||
; an index
|
||||
;
|
||||
; 5 lda _Sprites+TS_VBUFF_BASE+{]1*2}
|
||||
; 6 adc {]1*TILE_STORE_SIZE},x
|
||||
; 4 sta sprite_ptr0+{]2*4}
|
||||
; 2 tya
|
||||
;
|
||||
; = a savings of at least (24 - 17) = 7 cycles per tile and more if the sprite is skipped.
|
||||
;
|
||||
; The problem is that this still required storing a value for the sprite in the tile store. What is ideal is
|
||||
; if there is a way to know implicitly which relative tile offset we are on for a given sprite and use
|
||||
; that to calculate the offset...
|
||||
;
|
||||
; What do we know
|
||||
; X = current tile
|
||||
; Sprite+TS_LOOKUP_INDEX
|
||||
;
|
||||
; txa
|
||||
; sbc _Sprites+TS_LOOKUP_INDEX+{]1*2}
|
||||
; tay
|
||||
; lda _Sprites+TS_VBUFF_BASE+{]1*2}
|
||||
; adc DisplacementTable,y
|
||||
; sta sprite_ptr0+{]2*4}
|
||||
;
|
||||
; Have the sprite select a table base which holds the offset values, pre-adjusted for the TS_LOOKUP_INDEX. The table
|
||||
; values are fixed. Yes!! This is the solution!! It will only need 288 bytes of total space
|
||||
;
|
||||
; Best implementation will pass the Tile Store index in Y instead of X
|
||||
;
|
||||
; 5 lda _Sprites+VBUFF_TABLE+{]1*2}
|
||||
; 6 sta self_mod
|
||||
; 6 lda $0000,x
|
||||
; 4 sta sprite_ptr0+{]2*4}
|
||||
; 2 tya
|
||||
;
|
||||
; or
|
||||
;
|
||||
; 5 lda _Sprites+VBUFF_TABLE+{]1*2}
|
||||
; 4 sta tmp0
|
||||
; 7 lda (tmp0),y
|
||||
; 4 sta sprite_ptr0+{]2*4}
|
||||
; 2 txa
|
||||
;
|
||||
; Even better, if the VBUFF_TABLE (only 32 bytes) was already stored in the second direct page
|
||||
;
|
||||
; 7 lda (VBUFF_TABLE+{]1*2}),y
|
||||
; 5 adc _Sprites+VBUFF_TABLE+{]1*2}
|
||||
; 4 sta sprite_ptr0+{]2*4}
|
||||
; 2 txa
|
||||
;
|
||||
; Final saving compared to current implementation is (24 - 18) = 6 cycles per tile and we eliminate
|
||||
; the need to pre-calculate
|
||||
;
|
||||
|
||||
; If we find a last bit (4th in this case) and will exit
|
||||
stpbit mac
|
||||
lsr
|
||||
bcc next_bit
|
||||
lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
sta sprite_ptr0+{]2*4}
|
||||
jmp ]3
|
||||
next_bit
|
||||
<<<
|
||||
|
||||
; Last bit test which *must* be set
|
||||
endbit mac
|
||||
lda (SPRITE_VBUFF_PTR+{]2*2}),y
|
||||
sta sprite_ptr0+{]2*4}
|
||||
jmp ]3
|
||||
<<<
|
||||
|
||||
; OPTIMIZATION:
|
||||
;
|
||||
; bit #$00FF ; Optimization to skip the first 8 bits if they are all zeros
|
||||
; bne norm_entry
|
||||
; xba
|
||||
; jmp skip_entry
|
||||
;
|
||||
; Placed at the entry point
|
||||
|
||||
; This is a complex, but fast subroutine that is called from the core tile rendering code. It
|
||||
; Takes a bitmap of sprites in the Accumulator and then extracts the VBuff addresses for the
|
||||
; target TileStore entry and places them in specific direct page locations.
|
||||
;
|
||||
; Inputs:
|
||||
; A = sprite bitmap (assumed to be non-zero)
|
||||
; Y = tile store index
|
||||
; D = second work page
|
||||
; B = vbuff array bank
|
||||
; Output:
|
||||
; X =
|
||||
;
|
||||
; ]1 address of single sprite process
|
||||
; ]2 address of two sprite process
|
||||
; ]3 address of three sprite process
|
||||
; ]4 address of four sprite process
|
||||
|
||||
SpriteBitsToVBuffAddrs mac
|
||||
dobit1 0;0;b_1_1;]1
|
||||
dobit1 1;0;b_2_1;]1
|
||||
dobit1 2;0;b_3_1;]1
|
||||
dobit1 3;0;b_4_1;]1
|
||||
dobit1 4;0;b_5_1;]1
|
||||
dobit1 5;0;b_6_1;]1
|
||||
dobit1 6;0;b_7_1;]1
|
||||
dobit1 7;0;b_8_1;]1
|
||||
dobit1 8;0;b_9_1;]1
|
||||
dobit1 9;0;b_10_1;]1
|
||||
dobit1 10;0;b_11_1;]1
|
||||
dobit1 11;0;b_12_1;]1
|
||||
dobit1 12;0;b_13_1;]1
|
||||
dobit1 13;0;b_14_1;]1
|
||||
dobit1 14;0;b_15_1;]1
|
||||
endbit 15;0;]1
|
||||
|
||||
b_1_1 dobit 1;1;b_2_2;]2
|
||||
b_2_1 dobit 2;1;b_3_2;]2
|
||||
b_3_1 dobit 3;1;b_4_2;]2
|
||||
b_4_1 dobit 4;1;b_5_2;]2
|
||||
b_5_1 dobit 5;1;b_6_2;]2
|
||||
b_6_1 dobit 6;1;b_7_2;]2
|
||||
b_7_1 dobit 7;1;b_8_2;]2
|
||||
b_8_1 dobit 8;1;b_9_2;]2
|
||||
b_9_1 dobit 9;1;b_10_2;]2
|
||||
b_10_1 dobit 10;1;b_11_2;]2
|
||||
b_11_1 dobit 11;1;b_12_2;]2
|
||||
b_12_1 dobit 12;1;b_13_2;]2
|
||||
b_13_1 dobit 13;1;b_14_2;]2
|
||||
b_14_1 dobit 14;1;b_15_2;]2
|
||||
b_15_1 endbit 15;1;]2
|
||||
|
||||
b_2_2 dobit 2;2;b_3_3;]3
|
||||
b_3_2 dobit 3;2;b_4_3;]3
|
||||
b_4_2 dobit 4;2;b_5_3;]3
|
||||
b_5_2 dobit 5;2;b_6_3;]3
|
||||
b_6_2 dobit 6;2;b_7_3;]3
|
||||
b_7_2 dobit 7;2;b_8_3;]3
|
||||
b_8_2 dobit 8;2;b_9_3;]3
|
||||
b_9_2 dobit 9;2;b_10_3;]3
|
||||
b_10_2 dobit 10;2;b_11_3;]3
|
||||
b_11_2 dobit 11;2;b_12_3;]3
|
||||
b_12_2 dobit 12;2;b_13_3;]3
|
||||
b_13_2 dobit 13;2;b_14_3;]3
|
||||
b_14_2 dobit 14;2;b_15_3;]3
|
||||
b_15_2 endbit 15;2;]3
|
||||
|
||||
b_3_3 stpbit 3;3;]4
|
||||
b_4_3 stpbit 4;3;]4
|
||||
b_5_3 stpbit 5;3;]4
|
||||
b_6_3 stpbit 6;3;]4
|
||||
b_7_3 stpbit 7;3;]4
|
||||
b_8_3 stpbit 8;3;]4
|
||||
b_9_3 stpbit 9;3;]4
|
||||
b_10_3 stpbit 10;3;]4
|
||||
b_11_3 stpbit 11;3;]4
|
||||
b_12_3 stpbit 12;3;]4
|
||||
b_13_3 stpbit 13;3;]4
|
||||
b_14_3 stpbit 14;3;]4
|
||||
b_15_3 endbit 15;3;]4
|
||||
<<<
|
41
src/Tool.s
41
src/Tool.s
@ -52,8 +52,23 @@ _CallTable
|
||||
adrl _TSRender-1
|
||||
adrl _TSLoadTileSet-1
|
||||
adrl _TSCreateSpriteStamp-1
|
||||
adrl _TSAddSprite-1
|
||||
adrl _TSMoveSprite-1
|
||||
adrl _TSUpdateSprite-1
|
||||
adrl _TSRemoveSprite-1
|
||||
_CTEnd
|
||||
|
||||
_GTEAddSprite MAC
|
||||
UserTool $1000+GTEToolNum
|
||||
<<<
|
||||
_GTEMoveSprite MAC
|
||||
UserTool $1100+GTEToolNum
|
||||
<<<
|
||||
_GTEUpdateSprite MAC
|
||||
UserTool $1200+GTEToolNum
|
||||
<<<
|
||||
_GTERemoveSprite MAC
|
||||
UserTool $1300+GTEToolNum
|
||||
<<<
|
||||
; Helper function to set the data back to the toolset default
|
||||
_SetDataBank sep #$20
|
||||
lda #^TileStore
|
||||
@ -285,6 +300,21 @@ _TSAddSprite
|
||||
|
||||
_TSExit #0;#8
|
||||
|
||||
_TSMoveSprite
|
||||
:spriteY equ FirstParam+0
|
||||
:spriteX equ FirstParam+2
|
||||
:spriteSlot equ FirstParam+4
|
||||
_TSEntry
|
||||
|
||||
lda :spriteX,s
|
||||
tax
|
||||
lda :spriteY,s
|
||||
tay
|
||||
lda :spriteSlot,s
|
||||
jsr _MoveSprite
|
||||
|
||||
_TSExit #0;#6
|
||||
|
||||
_TSUpdateSprite
|
||||
:vbuff equ FirstParam+0
|
||||
:spriteFlags equ FirstParam+2
|
||||
@ -300,6 +330,15 @@ _TSUpdateSprite
|
||||
|
||||
_TSExit #0;#6
|
||||
|
||||
_TSRemoveSprite
|
||||
:spriteSlot equ FirstParam+0
|
||||
_TSEntry
|
||||
|
||||
lda :spriteSlot,s
|
||||
jsr _UpdateSprite
|
||||
|
||||
_TSExit #0;#2
|
||||
|
||||
; Insert the GTE code
|
||||
|
||||
put Math.s
|
||||
|
@ -73,7 +73,7 @@ Counter equ tmp3
|
||||
|
||||
|
||||
; Patch an 8-bit or 16-bit valueS into the bank. These are a set up unrolled loops to
|
||||
; quickly patch in a constanct value, or a value from an array into a given set of
|
||||
; quickly patch in a constant value, or a value from an array into a given set of
|
||||
; templates.
|
||||
;
|
||||
; Because we have structured everything as parallel code blocks, most updates to the blitter
|
||||
|
@ -27,6 +27,18 @@ _TBCopyTileDataAndMaskToCBuffV
|
||||
jsr _TBCopyTileDataToCBuffV
|
||||
jmp _TBCopyTileMaskToCBuffV
|
||||
|
||||
_CopyTileDataToDP2
|
||||
]line equ 0
|
||||
lup 8
|
||||
ldal tiledata+{]line*4},x
|
||||
sta tmp_tile_data+{]line*4}
|
||||
|
||||
ldal tiledata+{]line*4}+2,x
|
||||
sta tmp_tile_data+{]line*4}+2
|
||||
]line equ ]line+1
|
||||
--^
|
||||
rts
|
||||
|
||||
_TBCopyTileDataToCBuff
|
||||
]line equ 0
|
||||
lup 8
|
||||
|
@ -84,18 +84,6 @@ CopyNoSprites
|
||||
|
||||
:tiledisp jmp $0000 ; render the tile
|
||||
|
||||
; Let's make a macro helper for the bit test tree
|
||||
; dobit src_offset,dest,next_target,end_target
|
||||
dobit MAC
|
||||
beq last_bit
|
||||
ldx: ]1,y
|
||||
stx ]2
|
||||
jmp ]3
|
||||
last_bit ldx: ]1,y
|
||||
stx ]2
|
||||
jmp ]4
|
||||
EOM
|
||||
|
||||
; The sprite code is just responsible for quickly copying all of the sprite data
|
||||
; into the direct page temp area.
|
||||
|
||||
@ -117,83 +105,9 @@ dirty_sprite_dispatch
|
||||
da CopyThreeSprites
|
||||
da CopyFourSprites ; MAX, don't bother with more than 4 sprites per tile
|
||||
|
||||
; This is very similar to the code in the dirty tile renderer, but we can't reuse
|
||||
; because that code draws directly to the graphics screen, and this code draws
|
||||
; to a temporary buffer that has a different stride.
|
||||
|
||||
; ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile
|
||||
;
|
||||
; lsr
|
||||
; bcc :loop_0_bit_1
|
||||
; dobit $0000;sprite_ptr0;:loop_1_bit_1;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_1 lsr
|
||||
; bcc :loop_0_bit_2
|
||||
; dobit $0002;sprite_ptr0;:loop_1_bit_2;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_2 lsr
|
||||
; bcc :loop_0_bit_3
|
||||
; dobit $0004;sprite_ptr0;:loop_1_bit_3;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_3 lsr
|
||||
; bcc :loop_0_bit_4
|
||||
; dobit $0006;sprite_ptr0;:loop_1_bit_4;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_4 lsr
|
||||
; bcc :loop_0_bit_5
|
||||
; dobit $0008;sprite_ptr0;:loop_1_bit_5;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_5 lsr
|
||||
; bcc :loop_0_bit_6
|
||||
; dobit $000A;sprite_ptr0;:loop_1_bit_6;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_6 lsr
|
||||
; bcc :loop_0_bit_7
|
||||
; dobit $000C;sprite_ptr0;:loop_1_bit_7;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_7 lsr
|
||||
; bcc :loop_0_bit_8
|
||||
; dobit $000E;sprite_ptr0;:loop_1_bit_8;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_8 lsr
|
||||
; bcc :loop_0_bit_9
|
||||
; dobit $0010;sprite_ptr0;:loop_1_bit_9;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_9 lsr
|
||||
; bcc :loop_0_bit_10
|
||||
; ldx: $0012,y
|
||||
; stx spriteIdx
|
||||
; cmp #0
|
||||
; jne :loop_1_bit_10
|
||||
; jmp CopyOneSprite
|
||||
|
||||
;:loop_0_bit_10 lsr
|
||||
; bcc :loop_0_bit_11
|
||||
; dobit $0014;sprite_ptr0;:loop_1_bit_11;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_11 lsr
|
||||
; bcc :loop_0_bit_12
|
||||
; dobit $0016;sprite_ptr0;:loop_1_bit_12;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_12 lsr
|
||||
; bcc :loop_0_bit_13
|
||||
; dobit $0018;sprite_ptr0;:loop_1_bit_13;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_13 lsr
|
||||
; bcc :loop_0_bit_14
|
||||
; dobit $001A;sprite_ptr0;:loop_1_bit_14;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_14 lsr
|
||||
; bcc :loop_0_bit_15
|
||||
; dobit $001C;sprite_ptr0;:loop_1_bit_15;CopyOneSprite
|
||||
|
||||
;:loop_0_bit_15 ldx: $001E,y
|
||||
; stx spriteIdx
|
||||
; jmp CopyOneSprite
|
||||
|
||||
; We can optimize later, for now just copy the sprite data and mask into its own
|
||||
; direct page buffer and combine with the tile data later
|
||||
|
||||
;
|
||||
; We set up direct page pointers to the mask bank and use the bank register for the
|
||||
; data.
|
||||
CopyFourSprites
|
||||
|
@ -30,7 +30,6 @@ _TBFastSpriteTile_VH
|
||||
; Need to update the X-register before calling this
|
||||
_TBApplySpriteData
|
||||
ldx _SPR_X_REG ; set to the unaligned tile block address in the sprite plane
|
||||
|
||||
]line equ 0
|
||||
lup 8
|
||||
lda blttmp+{]line*4}
|
||||
|
@ -2,18 +2,15 @@
|
||||
|
||||
put ../Defs.s
|
||||
put TileStoreDefs.s
|
||||
put ../blitter/Template.s
|
||||
|
||||
;-------------------------------------------------------------------------------------
|
||||
;
|
||||
; Buffer space
|
||||
|
||||
ds 256
|
||||
put ../blitter/Template.s
|
||||
|
||||
;-------------------------------------------------------------------------------------
|
||||
|
||||
TileStore ENT
|
||||
ds {TILE_STORE_SIZE*17}
|
||||
ds {TILE_STORE_SIZE*TILE_STORE_NUM}
|
||||
|
||||
;-------------------------------------------------------------------------------------
|
||||
;
|
||||
@ -23,7 +20,7 @@ TileStore ENT
|
||||
DirtyTileCount ENT
|
||||
ds 2
|
||||
DirtyTiles ENT
|
||||
ds TILE_STORE_SIZE ; At most this many tiles can possibly be update at once
|
||||
ds TILE_STORE_SIZE ; At most this many tiles can possibly be updated at once
|
||||
|
||||
;-------------------------------------------------------------------------------------
|
||||
;
|
||||
@ -373,9 +370,15 @@ ScreenModeWidth ENT
|
||||
ScreenModeHeight ENT
|
||||
dw 200,192,200,176,160,160,160,128,144,192,102,1
|
||||
|
||||
; List of addresses of the VBuff arrays for each Tile Store entry, indexed by sprite index
|
||||
VBuffArrayAddr ENT
|
||||
ds MAX_SPRITES*2
|
||||
; VBuff arrays for each sprite. We need at least a 3x3 block for each sprite and the shape of the
|
||||
; array must match the TileStore structure. The TileStore is 41 blocks wide. To keep things simple
|
||||
; we allocate 8 sprites in the first row and 8 more sprites in the 4th row. So we need to allocate a
|
||||
; total of 6 rows of TileStore space
|
||||
;
|
||||
; It is *critical* that this array be placed in a memory location that is greated than the largest
|
||||
; TileStore offset.
|
||||
VBuffArray ENT
|
||||
ds 6*{TILE_STORE_WIDTH*2}
|
||||
|
||||
; Convert sprite index to a bit position
|
||||
_SpriteBits ENT
|
||||
|
@ -14,24 +14,17 @@ TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5
|
||||
TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used
|
||||
TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000.
|
||||
TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender.
|
||||
;TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank
|
||||
|
||||
TS_BASE_TILE_COPY equ TILE_STORE_SIZE*9 ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering
|
||||
TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function
|
||||
TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function
|
||||
|
||||
; Hold values for up to 4 sprites per tile
|
||||
TS_VBUFF_ADDR_0 equ TILE_STORE_SIZE*12
|
||||
TS_VBUFF_ADDR_1 equ TILE_STORE_SIZE*13
|
||||
TS_VBUFF_ADDR_2 equ TILE_STORE_SIZE*14
|
||||
TS_VBUFF_ADDR_3 equ TILE_STORE_SIZE*15
|
||||
TS_VBUFF_ADDR_COUNT equ TILE_STORE_SIZE*16 ; replace usage of TS_VBUFF_ARRAY_ADDR with this later
|
||||
TILE_STORE_NUM equ 12 ; Need this many parallel arrays
|
||||
|
||||
; Sprite data structures. We cache quite a few pieces of information about the sprite
|
||||
; to make calculations faster, so this is hidden from the caller.
|
||||
|
||||
MAX_SPRITES equ 16
|
||||
SPRITE_REC_SIZE equ 52
|
||||
SPRITE_REC_SIZE equ 42
|
||||
|
||||
; Mark each sprite as ADDED, UPDATED, MOVED, REMOVED depending on the actions applied to it
|
||||
; on this frame. Quick note, the same Sprite ID cannot be removed and added in the same frame.
|
||||
@ -68,6 +61,7 @@ SPRITE_HEIGHT equ {MAX_SPRITES*32}
|
||||
SPRITE_CLIP_WIDTH equ {MAX_SPRITES*34}
|
||||
SPRITE_CLIP_HEIGHT equ {MAX_SPRITES*36}
|
||||
TS_VBUFF_BASE equ {MAX_SPRITES*38} ; Finalized VBUFF address based on the sprite position and tile offsets
|
||||
VBUFF_ARRAY_ADDR equ {MAX_SPRITES*40} ; Fixed address where this sprite's VBUFF addresses are stores. The array is the same shape as TileStore, but much smaller
|
||||
;TILE_DATA_OFFSET equ {MAX_SPRITES*2}
|
||||
;TILE_STORE_ADDR_1 equ {MAX_SPRITES*12}
|
||||
;TILE_STORE_ADDR_2 equ {MAX_SPRITES*14}
|
||||
|
@ -6,8 +6,8 @@
|
||||
; If there are sprites, then the sprite data is flattened and stored into a direct page buffer
|
||||
; and then copied into the code field
|
||||
_RenderTileFast
|
||||
; lda TileStore+TS_VBUFF_ADDR_COUNT,x ; How many sprites are on this tile?
|
||||
; bne SpriteDispatch ; This is faster if there are no sprites
|
||||
lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line?
|
||||
bne SpriteDispatch
|
||||
|
||||
NoSpriteFast
|
||||
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
|
||||
@ -24,35 +24,60 @@ nsf_patch jmp $0000
|
||||
FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataFast,_TBCopyDataFast
|
||||
; dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast
|
||||
|
||||
SpriteDispatch
|
||||
tax
|
||||
jmp (:,x) ; Dispatch to the other routines
|
||||
: da NoSpriteFast ; Placeholder
|
||||
da OneSpriteFast
|
||||
da TwoSpritesFast
|
||||
da ThreeSpritesFast
|
||||
da FourSpritesFast
|
||||
; NOTE: Inlining the dispatch would eliminate a JSR,RTS,LDX, and JMP (abs,x) because the exit code
|
||||
; could jump directly to the target address. Net savings of 20 cycles per tile. For a 16x16
|
||||
; sprite with a 3x3 block coverage this is 180 cycles per frame per block... This would also
|
||||
; preserve a register
|
||||
;
|
||||
; For comparison, a fast one sprite copy takes 22 cycles per word, so this would save
|
||||
; about 1/2 block of render time per tile.
|
||||
;
|
||||
; Need to determine if the sprite or tile data is on top, as that will decide whether the
|
||||
; sprite or tile data is copied into the temporary buffer first. Also, if TWO_LAYER is set
|
||||
; then the mask information must be copied as well....This is the last decision point.
|
||||
|
||||
; Pointers to sprite data and masks
|
||||
spritedata_0 equ tmp0
|
||||
spritedata_1 equ tmp2
|
||||
spritedata_2 equ tmp4
|
||||
spritedata_3 equ tmp6
|
||||
spritemask_0 equ tmp8
|
||||
spritemask_1 equ tmp10
|
||||
spritemask_2 equ tmp12
|
||||
spritemask_3 equ tmp14
|
||||
SpriteDispatch
|
||||
txy
|
||||
SpriteBitsToVBuffAddrs OneSpriteFast;OneSpriteFast;OneSpriteFast;OneSpriteFast
|
||||
sta sprite_ptr0
|
||||
ldx TileStore+TS_TILE_ADDR,y
|
||||
jsr _CopyTileDataToDP2 ; preserves Y
|
||||
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
|
||||
pha ; and put on the stack for later. Has TileStore bank in high byte.
|
||||
ldx sprite_ptr0 ; address of sprite vbuff info
|
||||
lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field
|
||||
tay
|
||||
; jmp _TBApplySpriteData2
|
||||
|
||||
_TBApplySpriteData2
|
||||
]line equ 0
|
||||
lup 8
|
||||
lda blttmp+{]line*4}
|
||||
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
|
||||
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
|
||||
sta: $0004+{]line*$1000},y
|
||||
|
||||
lda blttmp+{]line*4}+2
|
||||
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
|
||||
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
|
||||
sta: $0001+{]line*$1000},y
|
||||
]line equ ]line+1
|
||||
--^
|
||||
rts
|
||||
|
||||
; Where there are sprites involved, the first step is to call a routine to copy the
|
||||
; tile data into a temporary buffer. Then the sprite data is merged and placed into
|
||||
; the code field.
|
||||
;
|
||||
; A = vbuff address
|
||||
; Y = tile store address
|
||||
OneSpriteFast
|
||||
tyx
|
||||
lda TileStore+TS_TILE_ADDR,y
|
||||
per :-1
|
||||
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
|
||||
:
|
||||
ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
tax ; address of the sprite data
|
||||
lda TileStore+TS_BASE_TILE_COPY,y ; copy routine (handles flips and other behaviors)
|
||||
stal osf_copy+1
|
||||
osf_copy jsr $0000
|
||||
|
||||
; ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
|
||||
pha ; and put on the stack for later.
|
||||
lda TileStore+TS_CODE_ADDR_LOW,y
|
||||
@ -71,17 +96,17 @@ OneSpriteFast
|
||||
rts
|
||||
|
||||
TwoSpritesFast
|
||||
tyx
|
||||
lda TileStore+TS_TILE_ADDR,y
|
||||
per :-1
|
||||
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
|
||||
:
|
||||
lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
sta spritedata_0
|
||||
sta spritemask_0
|
||||
lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data
|
||||
sta spritedata_1
|
||||
sta spritemask_1
|
||||
; tyx
|
||||
; lda TileStore+TS_TILE_ADDR,y
|
||||
; per :-1
|
||||
; jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
|
||||
;:
|
||||
; lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
; sta spritedata_0
|
||||
; sta spritemask_0
|
||||
; lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data
|
||||
; sta spritedata_1
|
||||
; sta spritemask_1
|
||||
|
||||
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
|
||||
pha ; and put on the stack for later.
|
||||
@ -89,33 +114,33 @@ TwoSpritesFast
|
||||
tay
|
||||
plb ; set the code field bank
|
||||
|
||||
TwoSpritesToCodeField 0
|
||||
TwoSpritesToCodeField 1
|
||||
TwoSpritesToCodeField 2
|
||||
TwoSpritesToCodeField 3
|
||||
TwoSpritesToCodeField 4
|
||||
TwoSpritesToCodeField 5
|
||||
TwoSpritesToCodeField 6
|
||||
TwoSpritesToCodeField 7
|
||||
; TwoSpritesToCodeField 0
|
||||
; TwoSpritesToCodeField 1
|
||||
; TwoSpritesToCodeField 2
|
||||
; TwoSpritesToCodeField 3
|
||||
; TwoSpritesToCodeField 4
|
||||
; TwoSpritesToCodeField 5
|
||||
; TwoSpritesToCodeField 6
|
||||
; TwoSpritesToCodeField 7
|
||||
|
||||
rts
|
||||
|
||||
ThreeSpritesFast
|
||||
FourSpritesFast
|
||||
tyx
|
||||
lda TileStore+TS_TILE_ADDR,y
|
||||
per :-1
|
||||
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
|
||||
:
|
||||
lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
sta spritedata_0
|
||||
sta spritemask_0
|
||||
lda TileStore+TS_VBUFF_ADDR_1,y
|
||||
sta spritedata_1
|
||||
sta spritemask_1
|
||||
lda TileStore+TS_VBUFF_ADDR_2,y
|
||||
sta spritedata_2
|
||||
sta spritemask_2
|
||||
; tyx
|
||||
; lda TileStore+TS_TILE_ADDR,y
|
||||
; per :-1
|
||||
; jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
|
||||
;:
|
||||
; lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
|
||||
; sta spritedata_0
|
||||
; sta spritemask_0
|
||||
; lda TileStore+TS_VBUFF_ADDR_1,y
|
||||
; sta spritedata_1
|
||||
; sta spritemask_1
|
||||
; lda TileStore+TS_VBUFF_ADDR_2,y
|
||||
; sta spritedata_2
|
||||
; sta spritemask_2
|
||||
|
||||
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
|
||||
pha ; and put on the stack for later.
|
||||
@ -123,13 +148,13 @@ FourSpritesFast
|
||||
tay
|
||||
plb ; set the code field bank
|
||||
|
||||
ThreeSpritesToCodeField 0
|
||||
ThreeSpritesToCodeField 1
|
||||
ThreeSpritesToCodeField 2
|
||||
ThreeSpritesToCodeField 3
|
||||
ThreeSpritesToCodeField 4
|
||||
ThreeSpritesToCodeField 5
|
||||
ThreeSpritesToCodeField 6
|
||||
ThreeSpritesToCodeField 7
|
||||
; ThreeSpritesToCodeField 0
|
||||
; ThreeSpritesToCodeField 1
|
||||
; ThreeSpritesToCodeField 2
|
||||
; ThreeSpritesToCodeField 3
|
||||
; ThreeSpritesToCodeField 4
|
||||
; ThreeSpritesToCodeField 5
|
||||
; ThreeSpritesToCodeField 6
|
||||
; ThreeSpritesToCodeField 7
|
||||
|
||||
rts
|
Loading…
Reference in New Issue
Block a user