Update; tool is compiling while referencing new rederer pipeline

This commit is contained in:
Lucas Scharenbroich 2022-05-18 00:34:25 -05:00
parent 7f6e5d1b1f
commit 01e92a7b62
11 changed files with 14183 additions and 938 deletions

13688
demos/tool/Zelda.TileSet.s Normal file

File diff suppressed because it is too large Load Diff

1
demos/zelda/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
GTEZelda

View File

@ -36,3 +36,6 @@ _GTESetBG0Origin MAC
_GTERender MAC
UserTool $D00+GTEToolNum
<<<
_GTELoadTileSet MAC
UserTool $E00+GTEToolNum
<<<

View File

@ -12,7 +12,9 @@
},
"scripts": {
"test": "npm run build && build-image.bat %npm_package_config_cadius% && %npm_package_config_gsport%",
"build": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Master.s"
"build": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Master.s",
"build:debug": "%npm_package_config_merlin32% -V %npm_package_config_macros% ./src/Debug.s",
"debug": "%npm_package_config_crossrunner% ./src/Debug160 -Source ./src/Debug160_S02__Output.txt -Debug -CompatibilityLayer"
},
"repository": {
"type": "git",

View File

@ -28,12 +28,12 @@ _Render
jsr _ApplyBG0XPosPre
jsr _ApplyBG1XPosPre
jsr _RenderSprites ; Once the BG0 X and Y positions are committed, update sprite data
; jsr _RenderSprites ; Once the BG0 X and Y positions are committed, update sprite data
jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles
jsr _UpdateBG1TileMap ; that need to be updated in the code field
; jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles
; jsr _UpdateBG1TileMap ; that need to be updated in the code field
jsr _ApplyTiles ; This function actually draws the new tiles into the code field
jsr _ApplyTilesFast ; This function actually draws the new tiles into the code field
jsr _ApplyBG0XPos ; Patch the code field instructions with exit BRA opcode
jsr _ApplyBG1XPos ; Update the direct page value based on the horizontal position
@ -92,6 +92,73 @@ _Render
stz LastRender ; Mark that a full render was just performed
rts
; The _ApplyTilesFast is the same as _ApplyTiles, but we use the _RenderTileFast subroutine
_ApplyTilesFast
tdc
clc
adc #$100 ; move to the next page
tcd
ldy DirtyTileCount
beq :out
:loop
; Retrieve the offset of the next dirty Tile Store items in the X-register
jsr _PopDirtyTile2
; Call the generic dispatch with the Tile Store record pointer at by the X-register.
phb
jsr _RenderTileFast
plb
; Loop again until the list of dirty tiles is empty
ldy DirtyTileCount
bne :loop
:out
tdc ; Move back to the original direct page
sec
sbc #$100
tcd
rts
; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code
; field. In this function we switch to the second direct page which holds the temporary
; working buffers for tile rendering.
;
_ApplyTiles
tdc
clc
adc #$100 ; move to the next page
tcd
bra :begin
:loop
; Retrieve the offset of the next dirty Tile Store items in the X-register
jsr _PopDirtyTile2
; Call the generic dispatch with the Tile Store record pointer at by the X-register.
phb
; jsr _RenderTile2
plb
; Loop again until the list of dirty tiles is empty
:begin ldy DirtyTileCount
bne :loop
tdc ; Move back to the original direct page
sec
sbc #$100
tcd
rts
; This is a specialized render function that only updates the dirty tiles *and* draws them
; directly onto the SHR graphics buffer. The playfield is not used at all. In some way, this
; ignores almost all of the capabilities of GTE, but it does provide a convenient way to use
@ -104,10 +171,11 @@ _RenderDirty
lda LastRender ; If the full renderer was last called, we assume that
bne :norecalc ; the scroll positions have likely changed, so recalculate
jsr _RecalcTileScreenAddrs ; them to make sure sprites draw at the correct screen address
; jsr _ClearSpritesFromCodeField ; Restore the tiles to their non-sprite versions
:norecalc
jsr _RenderSprites
jsr _ApplyDirtyTiles
; jsr _RenderSprites
; jsr _ApplyDirtyTiles
lda #1
sta LastRender
@ -135,31 +203,31 @@ _ApplyDirtyTiles
; Only render solid tiles and sprites
_RenderDirtyTile
ldal TileStore+TS_SPRITE_FLAG,x ; This is a bitfield of all the sprites that intersect this tile, only care if non-zero or not
bne dirty_sprite
ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile?
beq NoSpritesDirty ; This is faster if there are no sprites
lda TileStore+TS_TILE_ID,y ; Check if the tile has
jmp (dirty_dispatch,x)
dirty_dispatch
da NoSpritesDirty
da OneSpriteDirty
da TwoSpritesDirty
da ThreeSpritesDirty
da FourSpritesDirty
; The rest of this function handles that non-sprite blit, which is super fast since it blits directly from the
; tile data store to the graphics screen with no masking. The only extra work is selecting a blit function
; based on the tile flip flags.
pei TileStoreBankAndBank01 ; Special value that has the TileStore bank in LSB and $01 in MSB
plb
lda TileStore+TS_DIRTY_TILE_DISP,x ; load and patch in the appropriate subroutine
stal :tiledisp+1
ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile
lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
tax
plb ; set the bank
;
; B is set to Bank 01
; A is set to the tile word offset (0 through 80 in steps of 4)
; Y is set to the top-left address of the tile in SHR screen
; X is set to the address of the tile data
:tiledisp jmp $0000 ; render the tile
; A is set to the address of the tile data
NoSpritesDirty
tyx
ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank
jmp (TileStore+TS_DIRTY_TILE_DISP,x) ; go to the tile copy routine (just basics)
; Use some temporary space for the spriteIdx array (maximum of 4 entries)
@ -168,464 +236,17 @@ screenAddr equ tmp10
tileAddr equ tmp11
spriteIdx equ tmp12
; Handler for the sprite path
dirty_sprite
pei TileStoreBankAndTileDataBank ; Special value that has the TileStore bank in LSB and TileData bank in MSB
plb
; Cache a couple of values into the direct page, but preserve the Accumulator
ldy TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
sty tileAddr
ldy TileStore+TS_SCREEN_ADDR,x ; Get the on-screen address of this tile
sty screenAddr
; Now do all of the deferred work of actually drawing the sprites. We put considerable effort into
; figuring out if there is only one sprite or more than one since we optimize the former case as it
; is very common and can be done significantly faster.
;
; This is a big, unrolled chunk of code that packs the VBUFF addresses for the sprite positions marked
; in the bitfield into the spriteIdx array and then jumps to an optimized rendering function based on
; the number of sprites on the tile.
;
; After each set bit is identified, we check to see if that was the last one and immediately exit. Since
; a maximum of 4 sprites are processed per tile, this only results in (at most) 4 extra branch instructions.
ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile
lsr
bcc :loop_0_bit_1
ldx: $0000,y
stx spriteIdx
cmp #0
jne :loop_1_bit_1
jmp BlitOneSprite
:loop_0_bit_1 lsr
bcc :loop_0_bit_2
ldx: $0002,y
stx spriteIdx
cmp #0
jne :loop_1_bit_2
jmp BlitOneSprite
:loop_0_bit_2 lsr
bcc :loop_0_bit_3
ldx: $0004,y
stx spriteIdx
cmp #0
jne :loop_1_bit_3
jmp BlitOneSprite
:loop_0_bit_3 lsr
bcc :loop_0_bit_4
ldx: $0006,y
stx spriteIdx
cmp #0
jne :loop_1_bit_4
jmp BlitOneSprite
:loop_0_bit_4 lsr
bcc :loop_0_bit_5
ldx: $0008,y
stx spriteIdx
cmp #0
jne :loop_1_bit_5
jmp BlitOneSprite
:loop_0_bit_5 lsr
bcc :loop_0_bit_6
ldx: $000A,y
stx spriteIdx
cmp #0
jne :loop_1_bit_6
jmp BlitOneSprite
:loop_0_bit_6 lsr
bcc :loop_0_bit_7
ldx: $000C,y
stx spriteIdx
cmp #0
jne :loop_1_bit_7
jmp BlitOneSprite
:loop_0_bit_7 lsr
bcc :loop_0_bit_8
ldx: $000E,y
stx spriteIdx
cmp #0
jne :loop_1_bit_8
jmp BlitOneSprite
:loop_0_bit_8 lsr
bcc :loop_0_bit_9
ldx: $0010,y
stx spriteIdx
cmp #0
jne :loop_1_bit_9
jmp BlitOneSprite
:loop_0_bit_9 lsr
bcc :loop_0_bit_10
ldx: $0012,y
stx spriteIdx
cmp #0
jne :loop_1_bit_10
jmp BlitOneSprite
:loop_0_bit_10 lsr
bcc :loop_0_bit_11
ldx: $0014,y
stx spriteIdx
cmp #0
jne :loop_1_bit_11
jmp BlitOneSprite
:loop_0_bit_11 lsr
bcc :loop_0_bit_12
ldx: $0016,y
stx spriteIdx
cmp #0
jne :loop_1_bit_12
jmp BlitOneSprite
:loop_0_bit_12 lsr
bcc :loop_0_bit_13
ldx: $0018,y
stx spriteIdx
cmp #0
jne :loop_1_bit_13
jmp BlitOneSprite
:loop_0_bit_13 lsr
bcc :loop_0_bit_14
ldx: $001A,y
stx spriteIdx
cmp #0
jne :loop_1_bit_14
jmp BlitOneSprite
:loop_0_bit_14 lsr
bcc :loop_0_bit_15
ldx: $001C,y
stx spriteIdx
cmp #0
jne :loop_1_bit_15
jmp BlitOneSprite
; If we get to bit 15, then it *must* be a bit that is set
:loop_0_bit_15 ldx: $001E,y
stx spriteIdx
jmp BlitOneSprite
:loop_1_bit_1 lsr
bcc :loop_1_bit_2
ldx: $0002,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_2
jmp BlitTwoSprites
:loop_1_bit_2 lsr
bcc :loop_1_bit_3
ldx: $0004,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_3
jmp BlitTwoSprites
:loop_1_bit_3 lsr
bcc :loop_1_bit_4
ldx: $0006,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_4
jmp BlitTwoSprites
:loop_1_bit_4 lsr
bcc :loop_1_bit_5
ldx: $0008,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_5
jmp BlitTwoSprites
:loop_1_bit_5 lsr
bcc :loop_1_bit_6
ldx: $000A,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_6
jmp BlitTwoSprites
:loop_1_bit_6 lsr
bcc :loop_1_bit_7
ldx: $000C,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_7
jmp BlitTwoSprites
:loop_1_bit_7 lsr
bcc :loop_1_bit_8
ldx: $000E,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_8
jmp BlitTwoSprites
:loop_1_bit_8 lsr
bcc :loop_1_bit_9
ldx: $0010,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_9
jmp BlitTwoSprites
:loop_1_bit_9 lsr
bcc :loop_1_bit_10
ldx: $0012,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_10
jmp BlitTwoSprites
:loop_1_bit_10 lsr
bcc :loop_1_bit_11
ldx: $0014,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_11
jmp BlitTwoSprites
:loop_1_bit_11 lsr
bcc :loop_1_bit_12
ldx: $0016,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_12
jmp BlitTwoSprites
:loop_1_bit_12 lsr
bcc :loop_1_bit_13
ldx: $0018,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_13
jmp BlitTwoSprites
:loop_1_bit_13 lsr
bcc :loop_1_bit_14
ldx: $001A,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_14
jmp BlitTwoSprites
:loop_1_bit_14 lsr
bcc :loop_1_bit_15
ldx: $001C,y
stx spriteIdx+2
cmp #0
jne :loop_2_bit_15
jmp BlitTwoSprites
:loop_1_bit_15 ldx: $001E,y
stx spriteIdx+2
jmp BlitTwoSprites
:loop_2_bit_2 lsr
bcc :loop_2_bit_3
ldx: $0004,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_3
jmp BlitThreeSprites
:loop_2_bit_3 lsr
bcc :loop_2_bit_4
ldx: $0006,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_4
jmp BlitThreeSprites
:loop_2_bit_4 lsr
bcc :loop_2_bit_5
ldx: $0008,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_5
jmp BlitThreeSprites
:loop_2_bit_5 lsr
bcc :loop_2_bit_6
ldx: $000A,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_6
jmp BlitThreeSprites
:loop_2_bit_6 lsr
bcc :loop_2_bit_7
ldx: $000C,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_7
jmp BlitThreeSprites
:loop_2_bit_7 lsr
bcc :loop_2_bit_8
ldx: $000E,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_8
jmp BlitThreeSprites
:loop_2_bit_8 lsr
bcc :loop_2_bit_9
ldx: $0010,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_9
jmp BlitThreeSprites
:loop_2_bit_9 lsr
bcc :loop_2_bit_10
ldx: $0012,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_10
jmp BlitThreeSprites
:loop_2_bit_10 lsr
bcc :loop_2_bit_11
ldx: $0014,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_11
jmp BlitThreeSprites
:loop_2_bit_11 lsr
bcc :loop_2_bit_12
ldx: $0016,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_12
jmp BlitThreeSprites
:loop_2_bit_12 lsr
bcc :loop_2_bit_13
ldx: $0018,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_13
jmp BlitThreeSprites
:loop_2_bit_13 lsr
bcc :loop_2_bit_14
ldx: $001A,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_14
jmp BlitThreeSprites
:loop_2_bit_14 lsr
bcc :loop_2_bit_15
ldx: $001C,y
stx spriteIdx+4
cmp #0
jne :loop_3_bit_15
jmp BlitThreeSprites
:loop_2_bit_15 ldx: $001E,y
stx spriteIdx+4
jmp BlitThreeSprites
:loop_3_bit_3 lsr
bcc :loop_3_bit_4
ldx $0006,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_4 lsr
bcc :loop_3_bit_5
ldx $0008,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_5 lsr
bcc :loop_3_bit_6
ldx $000A,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_6 lsr
bcc :loop_3_bit_7
ldx $000C,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_7 lsr
bcc :loop_3_bit_8
ldx $000E,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_8 lsr
bcc :loop_3_bit_9
ldx $0010,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_9 lsr
bcc :loop_3_bit_10
ldx $0012,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_10 lsr
bcc :loop_3_bit_11
ldx $0014,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_11 lsr
bcc :loop_3_bit_12
ldx $0016,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_12 lsr
bcc :loop_3_bit_13
ldx $0018,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_13 lsr
bcc :loop_3_bit_14
ldx $001A,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_14 lsr
bcc :loop_3_bit_15
ldx $001C,y
stx spriteIdx+6
jmp BlitFourSprites
:loop_3_bit_15 ldx $001E,y
stx spriteIdx+6
jmp BlitFourSprites
; If there are two or more sprites at a tile, we can still be fast, but need to do extra work because
; the VBUFF values need to be read from the direct page. Thus, the direct page cannot be mapped onto
; the graphics screen. We use the stack instead, but have to do extra work to save and restore the
; stack value.
BlitFourSprites
BlitThreeSprites
BlitTwoSprites
FourSpritesDirty
ThreeSpritesDirty
TwoSpritesDirty
sta tileAddr
sty screenAddr
plb
tsc
sta stkSave ; Save the stack on the direct page
@ -806,7 +427,7 @@ BlitTwoSprites
; sprite and renders directly to the screen
;
; NOTE: Expect X-register to already have been set to the correct VBUFF address
BlitOneSprite
OneSpriteDirty
ldy tileAddr ; load the address of this tile's data
lda screenAddr ; Get the on-screen address of this tile

View File

@ -1,5 +1,22 @@
; Basic tile functions
; Copy tileset data from a pointer in memory to the tiledata back
; X = high word
; A = low word
_LoadTileSet
sta tmp0
stx tmp2
ldy #0
tyx
:loop lda [tmp0],y
stal tiledata,x
dex
dex
dey
dey
bne :loop
rts
; Low-level function to take a tile descriptor and return the address in the tiledata
; bank. This is not too useful in the fast-path because the fast-path does more
@ -199,12 +216,12 @@ _SetTile
bra :out
:fast
; lda FastTileProcs,y
; stal TileStore+TS_BASE_TILE_DISP,x
lda FastTileProcs,y
stal TileStore+TS_BASE_TILE_DISP,x
:out
; txa ; Add this tile to the list of dirty tiles to refresh
; jmp _PushDirtyTileX ; on the next call to _ApplyTiles
txa ; Add this tile to the list of dirty tiles to refresh
jmp _PushDirtyTileX ; on the next call to _ApplyTiles
:nochange rts

View File

@ -3,7 +3,6 @@
; Ref: Toolbox Reference, Volume 2, Appendix A
; Ref: IIgs Tech Note #73
; use Load.Macs.s
use Mem.Macs.s
use Misc.Macs.s
use Util.Macs
@ -51,6 +50,7 @@ _CallTable
adrl _TSSetTile-1
adrl _TSSetBG0Origin-1
adrl _TSRender-1
adrl _TSLoadTileSet-1
_CTEnd
; Do nothing when the tool set is installed
@ -223,9 +223,20 @@ xPos equ FirstParam+2
; Render()
_TSRender
_TSEntry
; jsr _Render
jsr _Render
_TSExit #0;#0
; LoadTileSet(Pointer)
_TSLoadTileSet
TSPtr equ FirstParam
_TSEntry
lda TSPtr+2,s
tax
lda TSPtr,s
jsr _LoadTileSet
_TSExit #0;#4
; Insert the GTE code
@ -235,11 +246,17 @@ _TSRender
put Timer.s
put Graphics.s
put Tiles.s
; put Render.s
put Render.s
put tiles/DirtyTileQueue.s
put tiles/FastRenderer.s
put blitter/Horz.s
put blitter/Vert.s
put blitter/BG0.s
put blitter/BG1.s
put blitter/Template.s
put blitter/TemplateUtils.s
put blitter/Tables.s
put blitter/Blitter.s
put blitter/TileProcs.s
put blitter/Tiles00000.s
; put blitter/Tiles.s

View File

@ -4,26 +4,15 @@
_TBFillPEAOpcode
sep #$20
lda #$F4
sta: $0000,y
sta: $0003,y
sta $1000,y
sta $1003,y
sta $2000,y
sta $2003,y
sta $3000,y
sta $3003,y
sta $4000,y
sta $4003,y
sta $5000,y
sta $5003,y
sta $6000,y
sta $6003,y
sta $7000,y
sta $7003,y
]line equ 0
lup 8
sta: $0000+{]line*$1000},y
sta: $0003+{]line*$1000},y
]line equ ]line+1
--^
rep #$20
rts
; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is
; because we can avoid needing to use both the X and Y registers during the compositing process and
; reserve Y to hold the code field address.

View File

@ -37,16 +37,7 @@
;
; It is simply too slow to try to horizontally reverse the pixel data on the fly. This still allows
; for up to 512 tiles to be stored in a single bank, which should be sufficient.
; Use some temporary space for the spriteIdx array (maximum of 4 entries)
stkSave equ tmp9
screenAddr equ tmp10
tileAddr equ tmp11
spriteIdx equ tmp12
;
; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile
; Store record contains all of the low-level information that's needed to call the renderer.
;
@ -128,7 +119,7 @@ dirty_sprite_dispatch
; This is very similar to the code in the dirty tile renderer, but we can't reuse
; because that code draws directly to the graphics screen, and this code draws
; to a temporary budder that has a different stride.
; to a temporary buffer that has a different stride.
; ldy TileStore+TS_VBUFF_ARRAY_ADDR,x ; base address of the VBUFF sprite address array for this tile
;
@ -692,138 +683,10 @@ _CopyBG1Tile
; TileStore+TS_SPRITE_ADDR_15
; TileStore+TS_SPRITE_ADDR_16
; TileStore+
;TileStore ENT
; ds TILE_STORE_SIZE*11
; A list of dirty tiles that need to be updated in a given frame
DirtyTileCount ds 2
DirtyTiles ds TILE_STORE_SIZE ; At most this many tiles can possibly be update at once
_ClearDirtyTiles
bra :hop
:loop
jsr _PopDirtyTile
:hop
lda DirtyTileCount
bne :loop
rts
; Append a new dirty tile record
;
; A = result of _GetTileStoreOffset for X, Y
;
; The main purpose of this function is to
;
; 1. Avoid marking the same tile dirty multiple times, and
; 2. Pre-calculating all of the information necessary to render the tile
PushDirtyTile ENT
phb
phk
plb
jsr _PushDirtyTile
plb
rtl
; alternate version that is very slightly slower, but preserves the y-register
_PushDirtyTile
tax
; alternate entry point if the x-register is already set
_PushDirtyTileX
ldal TileStore+TS_DIRTY,x
bne :occupied2
inc ; any non-zero value will work
stal TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value
txa
ldx DirtyTileCount ; 4
sta DirtyTiles,x ; 6
inx ; 2
inx ; 2
stx DirtyTileCount ; 4 = 18
rts
:occupied2
txa ; Make sure TileStore offset is returned in the accumulator
rts
; Remove a dirty tile from the list and return it in state ready to be rendered. It is important
; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update,
; because this routine merges the tile IDs stored in the Tile Store with the Sprite
; information to set the TILE_SPRITE_BIT. This is the *only* place in the entire code base that
; applies this bit to a tile descriptor.
PopDirtyTile ENT
phb
phk
plb
jsr _PopDirtyTile
plb
rtl
_PopDirtyTile
ldy DirtyTileCount
bne _PopDirtyTile2
rts
_PopDirtyTile2 ; alternate entry point
dey
dey
sty DirtyTileCount ; remove last item from the list
ldx DirtyTiles,y ; load the offset into the Tile Store
lda #$FFFF
stal TileStore+TS_DIRTY,x ; clear the occupied backlink
rts
; Run through the dirty tile list and render them into the code field
ApplyTiles ENT
phb
phk
plb
jsr _ApplyTiles
plb
rtl
; The _ApplyTiles function is responsible for rendering all of the dirty tiles into the code
; field. In this function we switch to the second direct page which holds the temporary
; working buffers for tile rendering.
_ApplyTiles
tdc
clc
adc #$100 ; move to the next page
tcd
bra :begin
:loop
; Retrieve the offset of the next dirty Tile Store items in the X-register
jsr _PopDirtyTile2
; Call the generic dispatch with the Tile Store record pointer at by the X-register.
phb
jsr _RenderTile2
plb
; Loop again until the list of dirty tiles is empty
:begin ldy DirtyTileCount
bne :loop
tdc ; Move back to the original direct page
sec
sbc #$100
tcd
rts
; To make processing the tile faster, we do them in chunks of eight. This allows the loop to be
; unrolled, which means we don't have to keep track of the register value and makes it faster to
; clear the dirty tile flag after being processed.
; _ApplyTilesUnrolled
tdc ; Move to the dedicated direct page for tile rendering
clc
adc #$100

View File

@ -0,0 +1,64 @@
; A list of dirty tiles that need to be updated in a given frame
DirtyTileCount ds 2
DirtyTiles ds TILE_STORE_SIZE ; At most this many tiles can possibly be update at once
_ClearDirtyTiles
bra :hop
:loop
jsr _PopDirtyTile
:hop
lda DirtyTileCount
bne :loop
rts
; Append a new dirty tile record
;
; A = result of _GetTileStoreOffset for X, Y
;
; The main purpose of this function is to
;
; 1. Avoid marking the same tile dirty multiple times, and
; 2. Pre-calculating all of the information necessary to render the tile
_PushDirtyTile
tax
; alternate entry point if the x-register is already set
_PushDirtyTileX
ldal TileStore+TS_DIRTY,x
bne :occupied2
inc ; any non-zero value will work
stal TileStore+TS_DIRTY,x ; and is 1 cycle faster than loading a constant value
txa
ldx DirtyTileCount ; 4
sta DirtyTiles,x ; 6
inx ; 2
inx ; 2
stx DirtyTileCount ; 4 = 18
rts
:occupied2
txa ; Make sure TileStore offset is returned in the accumulator
rts
; Remove a dirty tile from the list and return it in state ready to be rendered. It is important
; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update,
; because this routine merges the tile IDs stored in the Tile Store with the Sprite
; information to set the TILE_SPRITE_BIT. This is the *only* place in the entire code base that
; applies this bit to a tile descriptor.
_PopDirtyTile
ldy DirtyTileCount
bne _PopDirtyTile2
rts
_PopDirtyTile2 ; alternate entry point
dey
dey
sty DirtyTileCount ; remove last item from the list
ldx DirtyTiles,y ; load the offset into the Tile Store
lda #$FFFF
stal TileStore+TS_DIRTY,x ; clear the occupied backlink
rts

View File

@ -8,9 +8,7 @@
_RenderTileFast
ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile?
beq NoSpritesFast ; This is faster if there are no sprites
lda TileStore+TS_TILE_ID,y ; Check if the tile has
jmp (fast_dispatch,x)
jmp (fast_dispatch,x) ; Dispatch to the other routines
fast_dispatch
da NoSpritesFast
da OneSpriteFast
@ -31,6 +29,16 @@ NoSpritesFast
; ENGINE_MODE_DYN_TILES are both off.
FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast
; Pointers to sprite data and masks
spritedata_0 equ tmp0
spritedata_1 equ tmp2
spritedata_2 equ tmp4
spritedata_3 equ tmp6
spritemask_0 equ tmp8
spritemask_1 equ tmp10
spritemask_2 equ tmp12
spritemask_3 equ tmp14
; Where there are sprites involved, the first step is to call a routine to copy the
; tile data into a temporary buffer. Then the sprite data is merged and placed into
; the code field.
@ -47,19 +55,15 @@ OneSpriteFast
tay
plb ; set the code field bank
]line equ 0
lup 8
lda blttmp+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
OneSpriteToCodeField 0
OneSpriteToCodeField 1
OneSpriteToCodeField 2
OneSpriteToCodeField 3
OneSpriteToCodeField 4
OneSpriteToCodeField 5
OneSpriteToCodeField 6
OneSpriteToCodeField 7
lda blttmp+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
rts
TwoSpritesFast
@ -81,25 +85,15 @@ TwoSpritesFast
tay
plb ; set the code field bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda blttmp+{]line*4}
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0004+{]line*$1000},x
TwoSpritesToCodeField 0
TwoSpritesToCodeField 1
TwoSpritesToCodeField 2
TwoSpritesToCodeField 3
TwoSpritesToCodeField 4
TwoSpritesToCodeField 5
TwoSpritesToCodeField 6
TwoSpritesToCodeField 7
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda blttmp+{]line*4}+2
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0001+{]line*$1000},x
]line equ ]line+1
--^
rts
ThreeSpritesFast
@ -125,27 +119,13 @@ FourSpritesFast
tay
plb ; set the code field bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda blttmp+{]line*4}
andl [spritemask_2],y
oral [spritedata_2],y
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0004+{]line*$1000},x
ThreeSpritesToCodeField 0
ThreeSpritesToCodeField 1
ThreeSpritesToCodeField 2
ThreeSpritesToCodeField 3
ThreeSpritesToCodeField 4
ThreeSpritesToCodeField 5
ThreeSpritesToCodeField 6
ThreeSpritesToCodeField 7
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda blttmp+{]line*4}+2
andl [spritemask_2],y
oral [spritedata_2],y
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0001+{]line*$1000},x
]line equ ]line+1
--^
rts