Start work on sprite attribute caching

This commit is contained in:
Lucas Scharenbroich 2022-02-04 12:37:05 -06:00
parent 4d7ff46250
commit 89ea425043
11 changed files with 20515 additions and 4022 deletions

View File

@ -1,10 +1,12 @@
; IIgs Game Engine
; IIgs Shooter
TYP $B3 ; S16 file
DSK GTEPacMan
DSK GTEShooter
XPL
; Segment #1 -- Main execution block
ASM Main.s

61
demos/smb/App.s Normal file
View File

@ -0,0 +1,61 @@
; IIgs Game Engine
TYP $B3 ; S16 file
DSK GTETestApp
XPL
; Segment #1 -- Main execution block
ASM Main.s
; Segment #2 -- ROM
ASM rom.s

4
demos/smb/Main.s Normal file
View File

@ -0,0 +1,4 @@
REL
DSK MAINSEG
rtl

16381
demos/smb/rom.s Normal file

File diff suppressed because it is too large Load Diff

12
demos/smb/wrapper.s Normal file
View File

@ -0,0 +1,12 @@
; Wrapper around the SMB ROM code. This takes care of setting any memory locations
; in the SMB ROM address space that interact with the system (like controller inputs).
;
; We also shim many of the LDA/STA instruction that modify the NES I/O to be
; JSRs to small subroutines that enqueue any changes that are handled once
; control returns. The queues are important, because we try to run the game
; logic at 60 fps, but the screen will update significantly slower than that.
;
; By queuing the changes, we can "catch up" to the game logic and prioritize
; audio output at 60 fps since audio stutter is much more disruptive that slow
; FPS.

View File

@ -208,7 +208,7 @@ App_TileMapBG0
dw $1021,$1021,$1031,$1032,$1021,$1021,$100f,$0010,$0011,$1012,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$100f,$0010,$0011,$1012,$1021,$1021,$0007,$0008,$0007,$0008,$0007,$0008,$0007,$0008,$0007,$0008,$0007,$0008,$0007,$0008,$0007,$0008,$1021,$1021,$1021,$1021
dw $1021,$1021,$1030,$0015,$0015,$0015,$0015,$1033,$1021,$1021,$1021,$1021,$1009,$100a,$1021,$1021,$1021,$1021,$1021,$1021,$001a,$001a,$001a,$001a,$0040,$0040,$001a,$001a,$001a,$001a,$1021,$1021
dw $1021,$1030,$0015,$0036,$0015,$0015,$0036,$0015,$1033,$1021,$1800,$1804,$0800,$0804,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1022,$1023,$1022,$1023,$1021,$1021,$1021,$1030,$0015,$0036,$1230,$1021,$1021,$1057,$1056,$1057,$1056,$1057,$1056,$1057,$1056,$1021,$1022,$1023,$1021,$1021,$1057,$1056,$1057,$1056,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021
dw $1021,$1030,$0015,$0036,$0015,$0015,$0036,$0015,$1033,$1021,$1800,$1804,$0800,$0804,$5800,$5804,$4800,$4804,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1022,$1023,$1022,$1023,$1021,$1021,$1021,$1030,$0015,$0036,$1230,$1021,$1021,$5057,$5056,$5057,$5056,$1057,$1056,$1057,$1056,$1021,$1022,$1023,$1021,$1021,$1057,$1056,$1057,$1056,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021
dw $1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021,$1022,$1023,$1022,$1023,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1021,$1030,$0015,$0036,$0015,$0015,$0036,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1021,$1021,$1022,$1023,$1022,$1023,$1022,$1023,$1021,$1021
dw $1021,$1030,$0015,$0036,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1022,$1023,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021
dw $1021,$1030,$0015,$0036,$0015,$0015,$0036,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1022,$1023,$1022,$1023,$1021,$1021,$1021,$1030,$0015,$0036,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1022,$1023,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021
@ -216,7 +216,7 @@ App_TileMapBG0
dw $1021,$1030,$0015,$0036,$1033,$1021,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021,$0000,$1021,$1022,$1023,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$0005,$0006,$1021,$1021,$1021,$1021
dw $1021,$1030,$0015,$0036,$0015,$0015,$0036,$0015,$1033,$1021,$1021,$1021,$0005,$0006,$1021,$1021,$1021,$1021,$1021,$1021,$001a,$001a,$001a,$001a,$0040,$0040,$001a,$001a,$001a,$001a,$1021,$1021
dw $1030,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$1033,$1808,$180C,$0808,$080C,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$0025,$0025,$0025,$0025,$1026,$1021,$1030,$0015,$0015,$0015,$0015,$1230,$1021,$1077,$1076,$1077,$1076,$1077,$1076,$1077,$1076,$1024,$0025,$0025,$1026,$1021,$1077,$1076,$1077,$1076,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021
dw $1030,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$1033,$1808,$180C,$0808,$080C,$5808,$580C,$4808,$480C,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$0025,$0025,$0025,$0025,$1026,$1021,$1030,$0015,$0015,$0015,$0015,$1230,$1021,$5077,$5076,$5077,$5076,$1077,$1076,$1077,$1076,$1024,$0025,$0025,$1026,$1021,$1077,$1076,$1077,$1076,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1021
dw $1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1021,$1021,$1021,$1024,$0025,$0025,$0025,$0025,$1026,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1030,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1013,$0014,$0015,$1016,$1021,$1024,$0025,$0025,$0025,$0025,$0025,$0025,$1026,$1021
dw $1030,$0015,$0015,$0015,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$1026,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$0025,$0025,$1026,$1021,$1021,$1021,$1021,$1021,$1021,$1021
dw $1030,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$0025,$0025,$0025,$0025,$1026,$1021,$1030,$0015,$0015,$0015,$0015,$1033,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1024,$0025,$0025,$1026,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021,$1021

File diff suppressed because it is too large Load Diff

View File

@ -184,7 +184,7 @@ EvtLoop
sec
sbc ScreenY1
cmp #8
bcs :vsyncloop
bcs :vsyncloop ; Wait until we're within the top 8 scanlines
lda #1
jsl SetBorderColor
:no_vsync

View File

@ -122,7 +122,12 @@ _ClearSpriteFromTileStore
; This function looks at the sprite list and renders the sprite plane data into the appropriate
; tiles in the code field. There are a few phases to this routine. The assumption is that
; any sprite that needs to be re-drawn has been marked as DIRTY.
; any sprite that needs to be re-drawn has been marked as DIRTY or DAMAGED.
;
; A DIRTY sprite is one that has moved, so it needs to be erased/redrawn in the sprite
; buffer AND the tiles it covers marked for refresh. A DAMAGED sprite shared one or more
; tiles with a DIRTY sprite, so it needs to be redraw in the sprite buffer (but not erased!)
; and its tile do NOT need to be marked for refresh.
;
; In the first phase, we run through the list of dirty sprites and erase them from their
; OLD_VBUFF_ADDR. This clears the sprite plane buffers. We also iterate through the
@ -135,7 +140,6 @@ _ClearSpriteFromTileStore
; In the second phase, the sprite is re-drawn into the sprite plane buffers and the appropriate
; Tile Store locations are marked as dirty. It is important to recognize that the sprites themselves
; can be marked dirty, and the underlying tiles in the tile store are independently marked dirty.
forceSpriteFlag ds 2
_RenderSprites
@ -143,7 +147,7 @@ _RenderSprites
; last time that the frame was rendered, then we need to mark all of the sprites as dirty so that
; the tiles on which they were located at the previous frame will be refreshed
;
; OPTIMIZATION NOTE: Shoud check that the sprite actually chanegs position. If the screen scrolles
; OPTIMIZATION NOTE: Should check that the sprite actually changes position. If the screen scrolls
; by +X, but the sprite moves by -X (so it's relative position is unchanged), then
; it does NOT need to be marked as dirty.
@ -161,6 +165,32 @@ _RenderSprites
sta forceSpriteFlag
:no_chng_y
; Alter first phase. _OpenListHead is, essentially, a count of how many sprites. We can use that as an early-out
; test to stop scanning the SPRITE_STATUS values once all active sprites have been accounted for.
; lda _OpenListHead
; beq :exit1
; lsr
; sta tmp0
; lda _Sprites+SPRITE_STATUS
; beq :exit1
; ldy #0
; jsr _ClearSprite
; dec tmp0
; beq :exit1
; lda _Sprites+SPRITE_STATUS+2
; beq :exit1
; ldy #2
; jsr _ClearSprite
; dec tmp0
; beq :exit1
; ...
;:exit1
; First phase, erase all dirty sprites
ldy #0
:loop1 lda _Sprites+SPRITE_STATUS,y ; If the status is zero, that's the sentinel value
@ -281,37 +311,16 @@ _GetTileAt
clc
rts
; X = _Sprites array offset
_EraseSprite
txy
; Y = _Sprites array offset
_EraseSpriteY
lda _Sprites+OLD_VBUFF_ADDR,y
beq :noerase
lda _Sprites+SPRITE_ID,y
and #$1800 ; use bits 11 and 12 to dispatch (oly care about size)
lsr
lsr
xba
tax
jmp (:erase_sprite,x)
ldx _Sprites+SPRITE_DISP,y ; get the dispatch index for this sprite
jmp (:do_erase,x)
:noerase rts
:erase_sprite dw erase_8x8,erase_8x16,erase_16x8,erase_16x16
:do_erase dw _EraseTileSprite8x8,_EraseTileSprite8x16
dw _EraseTileSprite16x8,_EraseTileSprite16x16
erase_8x8
ldx _Sprites+OLD_VBUFF_ADDR,y
jmp _EraseTileSprite8x8 ; erase from the old position
erase_8x16
ldx _Sprites+OLD_VBUFF_ADDR,y
jmp _EraseTileSprite8x16
erase_16x8
ldx _Sprites+OLD_VBUFF_ADDR,y
jmp _EraseTileSprite16x8
erase_16x16
ldx _Sprites+OLD_VBUFF_ADDR,y
jmp _EraseTileSprite16x16
; X = _Sprites array offset
_DrawSprite
@ -768,8 +777,9 @@ _CacheSpriteBanks
SPRITE_PLANE_SPAN equ 256
; X = bank address
; A = bank address
_EraseTileSprite8x8
tax
phb ; Save the bank to switch to the sprite plane
pei SpriteBanks
@ -795,6 +805,7 @@ _EraseTileSprite8x8
rts
_EraseTileSprite8x16
tax
phb ; Save the bank to switch to the sprite plane
pei SpriteBanks
@ -820,6 +831,7 @@ _EraseTileSprite8x16
rts
_EraseTileSprite16x8
tax
phb ; Save the bank to switch to the sprite plane
pei SpriteBanks
@ -849,6 +861,7 @@ _EraseTileSprite16x8
rts
_EraseTileSprite16x16
tax
phb ; Save the bank to switch to the sprite plane
pei SpriteBanks
@ -968,6 +981,8 @@ _AddSprite
jsr _GetSpriteVBuffAddrTmp ; Preserves X-register
sta _Sprites+VBUFF_ADDR,x
jsr _PrecalcAllSpriteInfo ; Cache stuff
txa ; And return the sprite ID
clc ; Mark that the sprite was successfully added
@ -983,6 +998,22 @@ _AddSprite
rts
; Precalculate some cached values for a sprite. These are *only* to make other part of code,
; specifically the draw/erase routines more efficient.
;
; There are variations of thi routine based on whether we are adding a new sprite, updating
; it's tile information, or changing its position.
;
; X = sprite index
_PrecalcAllSpriteInfo
lda _Sprites+SPRITE_ID,x
and #$1800 ; use bits 11 and 12 to dispatch (oly care about size)
lsr
lsr
xba
sta _Sprites+SPRITE_DISP,x
rts
; Remove a sprite from the list. Just mark its STATUS as FREE and it will be
; picked up in the next AddSprite.
;
@ -1032,6 +1063,8 @@ _UpdateSpriteXnc
jsr _GetTileAddr ; This applies the TILE_ID_MASK
sta _Sprites+TILE_DATA_OFFSET,x
jsr _PrecalcAllSpriteInfo ; Cache stuff
lda #SPRITE_STATUS_DIRTY ; Content is changing, mark as dirty
sta _Sprites+SPRITE_STATUS,x
@ -1093,12 +1126,13 @@ _MoveSpriteXnc
NUM_BUFF_LINES equ 24
MAX_SPRITES equ 16
SPRITE_REC_SIZE equ 34
SPRITE_REC_SIZE equ 36
SPRITE_STATUS_EMPTY equ 0
SPRITE_STATUS_CLEAN equ 1
SPRITE_STATUS_EMPTY equ 0 ; slot is unitialized / free
SPRITE_STATUS_CLEAN equ 1 ;
SPRITE_STATUS_DIRTY equ 2
SPRITE_STATUS_FREE equ 4
SPRITE_STATUS_DAMAGED equ 8
SPRITE_STATUS equ {MAX_SPRITES*0}
TILE_DATA_OFFSET equ {MAX_SPRITES*2}
@ -1117,6 +1151,7 @@ TILE_STORE_ADDR_7 equ {MAX_SPRITES*26}
TILE_STORE_ADDR_8 equ {MAX_SPRITES*28}
TILE_STORE_ADDR_9 equ {MAX_SPRITES*30}
TILE_STORE_ADDR_10 equ {MAX_SPRITES*32}
SPRITE_DISP equ {MAX_SPRITES*34} ; pre-calculated index for jmp (abs,x) based on sprite size
; Maintain the index of the next open sprite slot. This allows us to have amortized
; constant sprite add performance. A negative value means no slots are available.

View File

@ -84,21 +84,18 @@ mdsOut rts
_MarkDirtySprite
stz _Sprites+TILE_STORE_ADDR_1,x ; Clear the this sprite's dirty tile list in case of an early exit
lda _SpriteBits,x
lda _SpriteBits,x ; Cache its bit flag to mark in the tile slots
sta SpriteBit
; Clip the sprite's extent to the screen so we can assume (mostly) position values from here on out. Note that
; the sprite width and height are _only_ used in the clip and afterward all calculation use the clip rect
;
; OPTIMIZATION NODE: These values can be calculated in AddSprite/MoveSprite once and stored in the sprite
; record since the screen size doesn't change.
; record since the screen size doesn't change. An off-screen flag can be set.
lda _Sprites+SPRITE_ID,x ; Get an index into the height/width tables based on the sprite bits
and #$1800
xba
lsr
lsr
tay
ldy _Sprites+SPRITE_DISP,x ; Get an index into the height/width tables based on the sprite bits
; lda _Sprites+IS_OFF_SCREEN,x ; Check if the sprite is visible in the playfield
; bne mdsOut
lda _Sprites+SPRITE_X,x
bpl :pos_x
@ -189,7 +186,8 @@ _MarkDirtySprite
sta ColLeft
; Sneak a pre-calculation here. Calculate the upper-left corder of the sprite in the sprite plane.
; We can reuse this in all of the routines below
; We can reuse this in all of the routines below. This is not the (x,y) of the sprite itself, but
; the corner of the tile it overlaps with
clc
lda TileTop
@ -219,10 +217,10 @@ _MarkDirtySprite
; Dispatch to the calculated sizing
; Begin a list of subroutines to cover all of the valid sprite size compinations. This is all unrolled code,
; maily to be able to do an unrolled fill of the TILE_STORE_ADDR_X values. Thus, it's important that the clipping
; function does its job properly since it allows up to save a lot of time here.
; mainly to be able to do an unrolled fill of the TILE_STORE_ADDR_X values. Thus, it's important that the clipping
; function does its job properly since it allows us to save a lot of time here.
;
; These functional are a trade off of being composable versus fast. Having to pay for multiple JSR/RTS invoations
; These functions are a trade off of being composable versus fast. Having to pay for multiple JSR/RTS invocations
; in the hot sprite path isn't great, but we're at a point of diminishing returns.
;
; There *might* be some speed gained by pushing a list of :mark_R_C addressed onto the stack in the clipping routing

View File

@ -678,10 +678,10 @@ _PushDirtyTileX
lda TileStore+TS_DIRTY,x
bpl :occupied2
lda DirtyTileCount
sta TileStore+TS_DIRTY,x
txa ; any non-negative value will work, this saves work below
sta TileStore+TS_DIRTY,x ; and is 1 cycle fater than loading a constanct value
txa
; txa
ldx DirtyTileCount
sta DirtyTiles,x