Checkpoint of final tile proc reorg for generalize new infrastructure for different engine modes

This commit is contained in:
Lucas Scharenbroich 2022-06-20 15:55:09 -05:00
parent 12b05139c2
commit 1f9c9b3f5b
22 changed files with 1172 additions and 451 deletions

View File

@ -153,6 +153,7 @@ DP2_DIRTY_TILE_CALLBACK equ 162
; Some pre-defined bank values
DP2_TILEDATA_AND_TILESTORE_BANKS equ 164
DP2_SPRITEDATA_AND_TILESTORE_BANKS equ 166
DP2_TILEDATA_AND_SPRITEDATA_BANKS equ 168
SPRITE_VBUFF_PTR equ 224 ; 32 bytes of adjusted pointers to VBuffArray addresses
; End direct page values
@ -186,16 +187,16 @@ PAD_BUTTON_A equ $02
PAD_KEY_DOWN equ $04
; Tile constants
TILE_ID_MASK equ $01FF
TILE_SPRITE_BIT equ $8000 ; Set if this tile intersects an active sprite
; TILE_RESERVED_BIT equ $8000
TILE_PRIORITY_BIT equ $4000 ; Put tile on top of sprite
TILE_FRINGE_BIT equ $2000
TILE_MASK_BIT equ $1000
TILE_DYN_BIT equ $0800
TILE_FRINGE_BIT equ $2000 ; Unused
TILE_MASK_BIT equ $1000 ; Hint bit used in TWO_LAYER_MODE to optimize rendering
TILE_DYN_BIT equ $0800 ; Is this a Dynamic Tile?
TILE_VFLIP_BIT equ $0400
TILE_HFLIP_BIT equ $0200
TILE_ID_MASK equ $01FF
TILE_CTRL_MASK equ $FE00
TILE_PROC_MASK equ $F800 ; Select tile proc for rendering
; TILE_PROC_MASK equ $F800 ; Select tile proc for rendering
; Sprite constants
SPRITE_HIDE equ $2000

View File

@ -211,8 +211,8 @@ _RenderDirtyTile
; Y is set to the top-left address of the tile in SHR screen
; A is set to the address of the tile data
NoSpritesDirty
lda TileStore+TS_DIRTY_TILE_DISP,y
stal :nsd+1
; lda TileStore+TS_DIRTY_TILE_DISP,y
; stal :nsd+1
ldx TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile
lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank

View File

@ -605,6 +605,11 @@ _CacheSpriteBanks
ldx #$100
sta DP2_TILEDATA_AND_TILESTORE_BANKS,x ; put a reversed copy in the second direct page
lda #>spritedata
and #$FF00
ora #^tiledata
sta DP2_TILEDATA_AND_SPRITEDATA_BANKS,x
lda #>spritedata
and #$FF00
ora #^TileStore

View File

@ -79,6 +79,7 @@ InitTiles
:col equ tmp0
:row equ tmp1
:vbuff equ tmp2
:base equ tmp3
; Initialize the Tile Store
@ -106,12 +107,18 @@ InitTiles
lda EngineMode
bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER
beq :fast
bit #ENGINE_MODE_TWO_LAYER
beq :dyn
; ldal TileProcs
; sta TileStore+TS_BASE_TILE_DISP,x
bra :out
:fast
lda #0 ; Initialize with Tile 0
ldy #FastOverZero
ldy #FastOverZA
jsr _SetTileProcs
:dyn lda #0 ; Initialize with Tile 0
ldy #DynOverZA
jsr _SetTileProcs
:out
@ -135,7 +142,8 @@ InitTiles
sta TileStore+TS_CODE_ADDR_HIGH,x ; High word of the tile address (just the bank)
lda BRowTableLow,y
sta TileStore+TS_BASE_ADDR,x ; May not be needed later if we can figure out the right constant...
sta :base
; sta TileStore+TS_BASE_ADDR,x ; May not be needed later if we can figure out the right constant...
lda :col ; Set the offset values based on the column
asl ; of this tile
@ -145,7 +153,8 @@ InitTiles
tay
lda Col2CodeOffset+2,y
clc
adc TileStore+TS_BASE_ADDR,x
adc :base
; adc TileStore+TS_BASE_ADDR,x
sta TileStore+TS_CODE_ADDR_LOW,x ; Low word of the tile address in the code field
dec :col
@ -167,18 +176,25 @@ InitTiles
; Y = tile row [0, 25] (26 rows)
;
; Registers are not preserved
_SetTile
pha
jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position
tay
pla
cmp TileStore+TS_TILE_ID,y ; Only set to dirty if the value changed
beq :nochange
oldTileId equ blttmp ; This location is used in _SetTileProcs, too
newTileId equ blttmp+2
procIdx equ blttmp+4
sta TileStore+TS_TILE_ID,y ; Value is different, store it.
_SetTile
sta newTileId
jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position
tax
lda TileStore+TS_TILE_ID,x
cmp newTileId
bne :changed
rts
:changed sta oldTileId
lda newTileId
sta TileStore+TS_TILE_ID,x ; Value is different, store it.
jsr _GetTileAddr
sta TileStore+TS_TILE_ADDR,y ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later
sta TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later
; Set the standard renderer procs for this tile.
;
@ -191,55 +207,81 @@ _SetTile
; functionality. Sometimes it is simple, but in cases of the sprites overlapping Dynamic Tiles and other cases
; it can be more involved.
; Calculate the base tile proc selector from the tile Id
stz procIdx
lda newTileId
clc
bit #TILE_PRIORITY_BIT
beq :low_priority
sec
:low_priority asl procIdx
clc
bit #TILE_ID_MASK
bne :not_zero
sec
:not_zero asl procIdx
clc
bit #TILE_VFLIP_BIT
beq :no_vflip
sec
:no_vflip asl procIdx
; Multiple by 6 to get the correct table entry index
asl procIdx
lda procIdx
asl
adc procIdx
tay
; Now integrate with the engine mode indicator
lda EngineMode
bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER
bne :not_fast
brl _SetTileFast
:nochange rts
brl :setTileFast
:not_fast
lda TileStore+TS_TILE_ID,y
:not_fast bit #ENGINE_MODE_TWO_LAYER
bne :not_dyn
brl :setTileDyn
:not_dyn
lda TileStore+TS_TILE_ID,x
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
xba
tax
tay
; ldal DirtyTileProcs,x
; sta TileStore+TS_DIRTY_TILE_DISP,y
; ldal CopyTileProcs,x
; sta TileStore+TS_DIRTY_TILE_COPY,y
lda TileStore+TS_TILE_ID,y ; Get the non-sprite dispatch address
lda TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address
and #TILE_CTRL_MASK
xba
tax
; ldal TileProcs,x
tay
; ldal TileProcs,y
; sta TileStore+TS_BASE_TILE_DISP,y
jmp _PushDirtyTileY ; on the next call to _ApplyTiles
jmp _PushDirtyTileX ; on the next call to _ApplyTiles
; Specialized check for when the engine is in "Fast" mode. If is a simple decision tree based on whether
; the tile priority bit is set, and whether this is the special tile 0 or not.
_SetTileFast
tyx
lda TileStore+TS_TILE_ID,x
bit #TILE_PRIORITY_BIT
beq :fast_over
:fast_under bit #TILE_ID_MASK
beq :fast_under_zero
ldy #FastUnderNonZero
:setTileFast
lda #FastProcs
lda procIdx
jsr _SetTileProcs
jmp _PushDirtyTileX
:fast_under_zero ldy #FastUnderZero
jsr _SetTileProcs
jmp _PushDirtyTileX
:fast_over bit #TILE_ID_MASK
beq :fast_over_zero
ldy #FastOverNonZero
jsr _SetTileProcs
jmp _PushDirtyTileX
:fast_over_zero ldy #FastOverZero
; Specialized check for when the engine has enabled dynamic tiles. In this case we are no longer
; guaranteed that the opcodes in a tile are PEA instructions. If the old tile and the new tile
; are both Dynamic tiles or both Basic tiles, then we can use an optimized routine. Otherwise
; we must set the opcodes as well as the operands
:setTileDyn
lda #DynProcs
lda procIdx
jsr _SetTileProcs
jmp _PushDirtyTileX
@ -247,83 +289,84 @@ _SetTileFast
jmp _PushDirtyTileY ; on the next call to _ApplyTiles
; X = Tile Store offset
; Y = table address
; A = TILE_ID
; Y = Engine Mode Base Table address
; A = Table proc index
;
; see TileProcTables in static/TileStore.s
bnkPtr equ blttmp
tblPtr equ blttmp+4
stpTmp equ blttmp+8
_SetTileProcs
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
xba
sta stpTmp ; save it
; Set a long pointer to this bank
sty tblPtr
clc
adc tblPtr
sta tblPtr
phk
phk
pla
and #$00FF
stz bnkPtr ; pointer to this bank
sta bnkPtr+2
sty tblPtr ; pointer to the table
sta tblPtr+2
; Lookup the base tile procedure
; Lookup the tile procedures
clc
ldy #0
lda [tblPtr],y ; load address of the base tile proc array
adc stpTmp ; add the offset
tay
lda [bnkPtr],y ; load the actual value
stal K_TS_BASE_TILE_DISP,x ; store it in the dispatch table
lda [tblPtr],y
stal K_TS_BASE_TILE_DISP,x
; Lookup the tile copy routine
clc
ldy #2
lda [tblPtr],y ; load address to the tile copy proc array
adc stpTmp
tay
lda [bnkPtr],y
stal K_TS_COPY_TILE_DATA,x
; Finally, load in the last two addresses directly
ldy #4
lda [tblPtr],y
stal K_TS_SPRITE_TILE_DISP,x
ldy #6
ldy #4
lda [tblPtr],y
stal K_TS_ONE_SPRITE,x
rts
; TileProcTables
;
; Tables of tuples used to populate the K_TS_* dispatch arrays for different combinations. Easier to maintain
; than a bunch of conditional code. Each "table" address holds four pointers to routines to handle the four
; combinations of HFLIP and VFLIP bits.
; Tables of tuples used to populate the K_TS_* dispatch arrays for different combinations. This is
; easier to maintain than a bunch of conditional code. Each etry hold three addresses.
;
; First address: A table of routines that render a tile when there is no sprite present
; Second address: A table of routines that copy a tile into the direct page workspace
; Third address: The general sprite routine; currently only used for Over/Under selection
; Fourth address: The specific sprite routine to use when only one sprite intersects the tile
FastOverNonZero dw FastTileProcs,FastTileCopy,FastSpriteOver,_OneSpriteFastOver
FastOverZero dw FastTileProcs0,FastTileCopy0,FastSpriteOver,_OneSpriteFastOver0
FastUnderNonZero dw FastTileProcs,FastTileCopy,FastSpriteUnder,_OneSpriteFastUnder
FastUnderZero dw FastTileProcs0,FastTileCopy0,FastSpriteUnder,_OneSpriteFastUnder0
; First address: Draw a tile directly into the code buffer (no sprites)
; Second address: Draw a tile merged with sprite data from the direct page
; Third address: Specialize routine to draw a tile merged with one sprite
;
; There are unique tuples of routines for all of the different combinations of tile properties
; and engine modes. This is an extesive number of combinations, but it simplified the development
; and maintainence of the rendering subroutines. Also, the difference subroutines can be written
; in any way and can make use of their on subroutines to reduce code size.
;
; Properties:
;
; [MODE] ENGINE_MODE: Fast, Dyn, TwoLayer
; [Z | N] Is Tile 0? : Yes, No
; [A | V] Is VFLIP? : Yes, No
; [Over | Under] Priority? : Yes, No
;
; So eight tuples per engine mode; 24 tuples total. Table name convention
;
; <MODE><Over|Under><Z|N><A|V>
FastProcs
FastOverZA dw _TBConstTile0,GenericOverZero,_OneSpriteFastOver0
FastOverZV dw _TBConstTile0,GenericOverZero,_OneSpriteFastOver0
FastOverNA dw _TBCopyDataFast,GenericOverAFast,_OneSpriteFastOverA
FastOverNV dw _TBCopyDataVFast,GenericOverVFast,_OneSpriteFastOverV
FastUnderZA dw _TBConstTile0,GenericUnderZero,GenericUnderZero
FastUnderZV dw _TBConstTile0,GenericUnderZero,GenericUnderZero
FastUnderNA dw _TBCopyDataFast,GenericUnderAFast,_OneSpriteFastUnderA
FastUnderNV dw _TBCopyDataVFast,GenericUnderVFast,_OneSpriteFastUnderV
; The routines will come from this table when ENGINE_MODE_TWO_LAYER and ENGINE_MODE_DYN_TILES
; are both off.
FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast
FastTileCopy dw _CopyTileDataToDP2,_CopyTileDataToDP2,_CopyTileDataToDP2V,_CopyTileDataToDP2V
FastTileProcs0 dw _TBConstTile0,_TBConstTile0,_TBConstTile0,_TBConstTile0
FastTileCopy0 dw _TBConstTileDataToDP2,_TBConstTileDataToDP2,_TBConstTileDataToDP2,_TBConstTileDataToDP2
DynProcs
DynOverZA
DynOverZV
DynOverNA
DynOverNV
DynUnderZA
DynUnderZV
DynUnderNA
DynUnderNV
; SetBG0XPos
;
@ -387,8 +430,7 @@ last_bit lda (SPRITE_VBUFF_PTR+{]1*2}),y
next_bit
<<<
; Specialization for the first sprite which can just return the vbuff address
; in a register if there is only one sprite intersecting the tile
; Specialization for the first sprite which can optimize its dispatch if its the only one
; dobit bit_position,dest;next;exit
dobit1 mac
lsr
@ -404,7 +446,8 @@ dobit1 mac
last_bit lda (SPRITE_VBUFF_PTR+{]1*2}),y
clc ; pre-adjust these later
adc _Sprites+TS_VBUFF_BASE+{]1*2}
jmp ]4
sta sprite_ptr0+{]2*4}
jmp (K_TS_ONE_SPRITE,x)
next_bit
<<<
@ -522,6 +565,7 @@ b_15_3 endbit 15;3;]4
; Store some tables in the K bank that will be used exclusively for jmp (abs,x) dispatch
K_TS_BASE_TILE_DISP ds TILE_STORE_SIZE ; draw the tile without a sprite
K_TS_COPY_TILE_DATA ds TILE_STORE_SIZE ; copy the tile into temp storage (used when tile below sprite)
K_TS_COPY_TILE_DATA ds TILE_STORE_SIZE ; copy/merge the tile into temp storage
K_TS_SPRITE_TILE_DISP ds TILE_STORE_SIZE ; select the sprite routine for this tile
K_TS_ONE_SPRITE ds TILE_STORE_SIZE ; specialized sprite routine when only one sprite covers the tile
K_TS_ONE_SPRITE ds TILE_STORE_SIZE ; specialized sprite routine when only one sprite covers the tile
K_TS_APPLY_TILE_DATA ds TILE_STORE_SIZE ; move tile from temp storage into code field

View File

@ -363,8 +363,12 @@ _TSGetSeconds
put Sprite2.s
put SpriteRender.s
put Render.s
put render/Render.s
put render/Fast.s
put render/Sprite1.s
put render/Sprite2.s
put tiles/DirtyTileQueue.s
put tiles/FastRenderer.s
; put tiles/FastRenderer.s
put blitter/Horz.s
put blitter/Vert.s
put blitter/BG0.s

View File

@ -84,10 +84,11 @@ _TBCopyTileMaskToCBuffV
; _TBConstTile
;
; A specialized routine that fills in a tile with a single constant value. It's intended to be used to
; fill in solid colors, so there are no specialized horizontal or verical flipped variants
; fill in solid colors, so there are no specialized horizontal or verical flipped variantsConstUnderZero
_TBConstTile0 tax
_TBConstTileX
lda #0
_TBConstTileX sta: $0001,y
sta: $0001,y
sta: $0004,y
sta $1001,y
sta $1004,y
@ -105,7 +106,11 @@ _TBConstTileX sta: $0001,y
sta $7004,y
plb
rts
; jmp _TBFillPEAOpcode
_TBConstTileSlow0
tax
jsr _TBFillPEAOpcode
jmp _TBConstTileX
_TBConstTileDataToDP2
]line equ 0

View File

@ -59,8 +59,8 @@ CopyNoSprites
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later
lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000)
sta _BASE_ADDR+1 ; so we can get by just copying the high byte
; lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000)
; sta _BASE_ADDR+1 ; so we can get by just copying the high byte
rep #$20
lda TileStore+TS_BASE_TILE_DISP,x ; Get the address of the renderer for this tile

View File

@ -42,6 +42,7 @@ _TBSolidTile_VH
; register value. This must be restored prior to returning
_TBCopyDataFast
tax
_TBCopyDataFastX
]line equ 0
lup 8
ldal tiledata+{]line*4},x
@ -53,6 +54,10 @@ _TBCopyDataFast
plb
rts
_TBCopyDataSlow
tax
jsr _TBFillPEAOpcode
jmp _TBCopyDataFastX
_TBCopyData
]line equ 0
@ -67,6 +72,7 @@ _TBCopyData
_TBCopyDataVFast
tax
_TBCopyDataVFastX
]src equ 7
]dest equ 0
lup 8
@ -80,6 +86,11 @@ _TBCopyDataVFast
plb
rts
_TBCopyDataVSlow
tax
jsr _TBFillPEAOpcode
jmp _TBCopyDataVFastX
_TBCopyDataV
]src equ 7
]dest equ 0

View File

@ -12,6 +12,48 @@ _TBDynamicTile_00
jsr _TBDynamicData
jmp _TBFillLdaDpOpcode
_TBDynamic
ldal TileStore+TS_TILE_ID,x
and #$007F
ora #$4800
]line equ 0 ; render the first column
lup 8
sta: $0004+{]line*$1000},y
]line equ ]line+1
--^
inc ; advance to the next word
inc
]line equ 0 ; render the second column
lup 8
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
sep #$20
lda #$B5
sta: $0000,y
sta: $0003,y
sta $1000,y
sta $1003,y
sta $2000,y
sta $2003,y
sta $3000,y
sta $3003,y
sta $4000,y
sta $4003,y
sta $5000,y
sta $5003,y
sta $6000,y
sta $6003,y
sta $7000,y
sta $7003,y
rep #$20
plb
rts
; Primitive to render a dynamic tile
;
; LDA 00,x / PHA where the operand is fixed when the tile is rendered

View File

@ -3,6 +3,48 @@
; This tile type does not explicitly support horizontal or vertical flipping. An appropriate tile
; descriptor should be passed into CopyTileToDyn to put the horizontally or vertically flipped source
; data into the dynamic tile buffer
_TBDynamicSpriteTile
sta _X_REG
ldal TileStore+TS_JMP_ADDR,x ; Get the address of the exception handler
sta _JTBL_CACHE
ldal TileStore+TS_TILE_ID,x ; Get the original tile descriptor
and #$007F ; clamp to < (32 * 4)
ora #$B500
xba
sta _OP_CACHE ; This is the 2-byte opcode for to load the data
CopyDynWord 0;$0003
CopyDynWord 4;$1003
CopyDynWord 8;$2003
CopyDynWord 12;$3003
CopyDynWord 16;$4003
CopyDynWord 20;$5003
CopyDynWord 24;$6003
CopyDynWord 28;$7003
clc
lda _JTBL_CACHE
adc #32 ; All the snippets are 32 bytes wide and, since we're
sta _JTBL_CACHE ; within one tile, the second column is consecutive
lda _OP_CACHE
adc #$0200
sta _OP_CACHE
CopyDynWord 2;$0000
CopyDynWord 6;$1000
CopyDynWord 10;$2000
CopyDynWord 14;$3000
CopyDynWord 18;$4000
CopyDynWord 22;$5000
CopyDynWord 26;$6000
CopyDynWord 30;$7000
plb
rts
_TBDynamicSpriteTile_00
sty _Y_REG ; This is restored in the macro
@ -53,6 +95,56 @@ _TBDynamicSpriteTile_00
rts
; Create a masked render based on data in the direct page temporary buffer
;
; ]1 : sprite buffer offset
; ]2 : code field offset
CopyDynWord mac
lda tmp_sprite_mask+{]1} ; load the mask value
bne mixed ; a non-zero value may be mixed
; This is a solid word
lda #$00F4 ; PEA instruction
sta: ]2,y
lda tmp_sprite_data+{]1} ; load the sprite data
sta: ]2+1,y ; PEA operand
bra next
mixed cmp #$FFFF ; All 1's in the mask is a fully transparent sprite word
beq transparent
lda #$004C ; JMP to handler
sta: {]2},y
lda _JTBL_CACHE ; Get the offset to the exception handler for this column
ora #{]2&$F000} ; adjust for the current row offset
sta: {]2}+1,y
tax ; This becomes the new address that we use to patch in
lda _OP_CACHE ; Get the LDA dp,x instruction for this column
sta: $0000,x
lda #$0029 ; AND #SPRITE_MASK
sta: $0002,x
lda tmp_sprite_mask+{]1}
sta: $0003,x
lda #$0009 ; ORA #SPRITE_DATA
sta: $0005,x
lda tmp_sprite_data+{]1}
sta: $0006,x
lda #$0D80 ; branch to the prologue (BRA *+15)
sta: $0008,x
bra next
; This is a transparent word, so just show the dynamic data
transparent
lda #$4800 ; Put the PHA in the third byte
sta: {]2}+1,y
lda _OP_CACHE ; Store the LDA dp,x instruction with operand
sta: {]2},y
next
<<<
; Masked renderer for a dynamic tile with sprite data overlaid.
;
@ -71,7 +163,7 @@ CopyDynSpriteWord MAC
; If MASK == 0, then we can do a PEA. If MASK == $FFFF, then fall back to the simple Dynamic Tile
; code.
ldal spritemask+{]1},x ; load the mask value
bne mixed ; a non-zero value may be mixed
bne mixed ; a non-zero value may be mixed
; This is a solid word
lda #$00F4 ; PEA instruction

148
src/render/Fast.s Normal file
View File

@ -0,0 +1,148 @@
; Collection of render function used when the engine is in "FAST" mode. In this mode
; there are no dynamic tile or two layer tiles enabled, so all of the tiles are comprised
; of PEA opcodes. These functions take advantage of this as the fact that masks are
; not needed to improve rendering speed.
;
; The following functions are defined here
;
; GenericOverAFast : Places data from tmp_sprite_data on top of the TileStore's tile
; GenericUnderAFast : Places the TileStore's tile on top of tmp_sprite_data
GenericOverAFast
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
plb
]line equ 0
lup 8
ldal tiledata+{]line*4},x
and tmp_sprite_mask+{]line*4}
ora tmp_sprite_data+{]line*4}
sta: $0004+{]line*$1000},y
ldal tiledata+{]line*4}+2,x
and tmp_sprite_mask+{]line*4}+2
ora tmp_sprite_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
GenericOverVFast
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
plb
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4},x
and tmp_sprite_mask+{]line*4}
ora tmp_sprite_data+{]line*4}
sta: $0004+{]line*$1000},y
ldal tiledata+{]src*4}+2,x
and tmp_sprite_mask+{]line*4}+2
ora tmp_sprite_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]src equ ]src-1
]dest equ ]dest+1
--^
plb
rts
GenericOverZero
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
plb
]line equ 0
lup 8
lda tmp_sprite_data+{]line*4}
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
GenericUnderAFast
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
plb
]line equ 0
lup 8
lda tmp_sprite_data+{]line*4}
andl tiledata+{]line*4}+32,x
oral tiledata+{]line*4}+32,x
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
andl tiledata+{]line*4}+32+2,x
oral tiledata+{]line*4}+32+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
GenericUnderVFast
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
plb
]src equ 7
]dest equ 0
lup 8
lda tmp_sprite_data+{]line*4}
andl tiledata+{]src*4}+32,x
oral tiledata+{]src*4}+32,x
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
andl tiledata+{]src*4}+32+2,x
oral tiledata+{]src*4}+32+2,x
sta: $0001+{]line*$1000},y
]src equ ]src-1
]dest equ ]dest+1
--^
plb
rts
GenericUnderZero
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
plb
lda #0
]line equ 0
lup 8
sta: $0004+{]line*$1000},y
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts

83
src/render/README.txt Normal file
View File

@ -0,0 +1,83 @@
This folder contains the rendering tuples for the different type of tile rendering modes
that are defined by both the engine mode and the specific tile attributes. There are
a *lot* or variants, so they are cataloged here.
The top-level TileRender function in the main entry point that defined the overal tile render
flow as well as the register parameters and calling conventions for each of the modular
components.
There are 5 pluggable functions that make up a rendering mode
1. K_TS_BASE_TILE_DISP
An address to a function that will render a tile into the code field. There are no
sprites to handle in this case.
Arguments:
A: TileData/TileMask address
B: code field bank
Y: address of the tile in the code bank
X: TileStore offset
Return:
None
If additional TileStore properties are needed for the renderer, they can be read using the X
register.
2. K_TS_SPRITE_TILE_DISP
Selects the top-level handler for rendering a tile with a sprite. Currently, this is used to
select between rendering a sprite above the tile, or under the tile based on the value of the
TILE_PRIORITY_BIT.
Arguments:
A: TileStore+TS_SPRITE_FLAG
X: TileStore offset
Return:
Y: TileStore offset
sprite_ptrX dirct page values set to the sprite VBuff addresses
The handler routine is responsible for examining the TS_SPRITE_FLAG value and dispatching
to an appropriate routine to handle the number of sprites intersecting the tile.
3. K_TS_ONE_SPRITE
A specialized routine when K_TS_SPRITE_TILE_DISP determines there is only one sprite to render
it MUST dispatch to this function. The K_TS_ONE_SPRITE routine MAY make use of the K_TS_COPY_TILE_DATA
and K_TS_APPLY_TILE_DATA functions, but is not required to do so.
4. K_TS_COPY_TILE_DATA & K_TS_APPLY_TILE_DATA
A pair of function that copye tile data (and possible mask information) into a temporary
direct page space and then render that workspace into the code field.
These functions are used as building blocks by the generic Over/Under multi-sprite
rendering code.
K_TS_COPY_TILE_DATA
Arguments:
B: Set to the TileData bank
Y: Set to the tile address
Return:
X: preserve the X register
K_TS_APPLY_TILE_DATA
Arguments:
B: code field bank
Y: address of the tile in the code bank
Return:
None
Generic Flow
1. Is there a sprite?
No -> Call K_TS_BASE_TILE_DISP to render a tile into the code field
Yes -> Call K_TS_SPRITE_TILE_DISP
Over : Copy tile data + mask to DP, Copy sprite data + mask to DP, render tile to code field
Under : Copy sprite data to DP,

231
src/render/Render.s Normal file
View File

@ -0,0 +1,231 @@
; If there are no sprites, then we copy the tile data into the code field as fast as possible.
; If there are sprites, then additional work is required
_RenderTile
lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line?
bne :sprites
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank
jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine
; Execute the sprite tree. If there is only one sprite, control will immediately be passed to
; the routine at K_TS_ONE_SPRITE. Otherwise, the control passed to the routines with a different
; number of sprites. These routines need to copy the flattened sprite data and mask into the
; direct page workspace to be used by the K_TS_SPRITE_TILE_DISP routine
:sprites txy
SpriteBitsToVBuffAddrs $0000;TwoSprites;ThreeSprites;FourSprites
; Dispatch vectors for the two, three and four sprite functions. These just
; flatten the sprite data into the direct page workspace and then pass control
; to the configurable routine which is set in SetTile and knows what to do
; based on the tile properties (over/under, engine mode, etc.)
TwoSprites tyx
jsr CopyTwoSpritesDataAndMaskToDP
jmp (K_TS_SPRITE_TILE_DISP,x)
ThreeSprites tyx
jsr CopyThreeSpritesDataAndMaskToDP
jmp (K_TS_SPRITE_TILE_DISP,x)
FourSprites tyx
jsr CopyFourSpritesDataAndMaskToDP
jmp (K_TS_SPRITE_TILE_DISP,x)
; Helper functions (and macros)
; CopyTileToDP -- executes the K_TS_COPY_TILE_DATA routine. This may copy just data or data+mask
; information to the direct page
_CopyTileToDP mac
ldy TileStore+TS_TILE_ADDR,x ; load the tile address
pei DP2_TILEDATA_AND_TILESTORE_BANKS
plb ; set to the tiledata bank
jsr (K_TS_COPY_TILE_DATA,x) ; preserves X-reg
plb
<<<
CopyTileToDP
_CopyTileToDP
rts
; CopyTileToDPSprite -- same as above, but returns with the Data BAnk set to the sprite data bank
_CopyTileToDPSprite mac
ldy TileStore+TS_TILE_ADDR,x ; load the tile address
pei DP2_TILEDATA_AND_SPRITEDATA_BANKS
plb ; set to the tiledata bank
jsr (K_TS_COPY_TILE_DATA,x) ; preserves X-reg
plb
<<<
CopyTileToDPSprite
_CopyTileToDPSprite
rts
; Simple pair of routines that copies just the tile data to the direct page workspace. Data Bank
; must be set to the TileData bank in entry.
;
; Preserves the X-register
CopyTileDataToDP
]line equ 0
lup 8
lda tiledata+{]line*4},y
sta tmp_tile_data+{]line*4}
lda tiledata+{]line*4}+2,y
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
rts
CopyTileDataToDPV
]src equ 7
]dest equ 0
lup 8
lda tiledata+{]src*4},y
sta tmp_tile_data+{]dest*4}
lda tiledata+{]src*4}+2,y
sta tmp_tile_data+{]dest*4}+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts
; Copy both the tile and mask data to the driect page space
_CopyTileDataAndMaskToDP
]line equ 0
lup 8
lda tiledata+{]line*4},y
sta tmp_tile_data+{]line*4}
lda tiledata+{]line*4}+32,y
sta tmp_tile_mask+{]line*4}
lda tiledata+{]line*4}+2,y
sta tmp_tile_data+{]line*4}+2
lda tiledata+{]line*4}+32+2,y
sta tmp_tile_mask+{]line*4}+2
]line equ ]line+1
--^
rts
_CopyTileDataAndMaskToDPV
]src equ 7
]dest equ 0
lup 8
lda tiledata+{]src*4},y
sta tmp_tile_data+{]dest*4}
lda tiledata+{]src*4}+32,y
sta tmp_tile_mask+{]dest*4}
lda tiledata+{]src*4}+2,y
sta tmp_tile_data+{]dest*4}+2
lda tiledata+{]src*4}+32+2,y
sta tmp_tile_mask+{]dest*4}+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts
; Given a populate tmp_sprite_data buffer to use as a base, merge it with a tile and write to the
; code field
MergeSpriteWithTileFast
ldx TileStore+TS_TILE_ADDR,y
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field
tay
plb
]line equ 0
lup 8
lda tmp_sprite_data+{]line*4}
andl tiledata+{]line*4}+32,x
oral tiledata+{]line*4},x
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
andl tiledata+{]line*4}+32+2,x
oral tiledata+{]line*4}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
MergeSpriteWithTileSlow
ldx TileStore+TS_TILE_ADDR,y
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field
tay
plb
]line equ 0
lup 8
lda tmp_sprite_data+{]line*4}
andl tiledata+{]line*4}+32,x
oral tiledata+{]line*4},x
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
andl tiledata+{]line*4}+32+2,x
oral tiledata+{]line*4}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
jmp _FillPEAOpcode
; Now, implement the generic Two, Three and Four sprite routines for both Over and Under rendering. These
; are fairly involved, so we try to only have a single implementation of them for now without excessve
; specialization.
FourSpriteLine mac
; and [sprite_ptr3],y
db $37,sprite_ptr3
ora (sprite_ptr3),y
; and [sprite_ptr2],y
db $37,sprite_ptr2
ora (sprite_ptr2),y
; and [sprite_ptr1],y
db $37,sprite_ptr1
ora (sprite_ptr1),y
; and [sprite_ptr0],y
db $37,sprite_ptr0
ora (sprite_ptr0),y
<<<
FourSpritesFast
tyx ; save for after compositing the sprites
ldy TileStore+TS_TILE_ADDR,x
pei DP2_TILEDATA_AND_TILESTORE_BANKS
plb
jsr (K_TS_COPY_TILE_DATA,x)
plb
pei DP2_SPRITEDATA_AND_TILESTORE_BANKS
plb ; set the sprite data bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
FourSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
FourSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp (K_TS_APPLY_TILE_DATA,x)

51
src/render/Slow.s Normal file
View File

@ -0,0 +1,51 @@
; Identical routines to those in Fast.s, but also set the opcode. Used to render solid
; tiles when the engine mode has other capabilities turned on
;
; The following functions are defined here
;
; GenericOverSlow : Places data from tmp_sprite_data on top of the TileStore's tile
; GenericUnderSlow : Places the TileStore's tile on top of tmp_sprite_data
GenericOverSlow
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
]line equ 0
lup 8
ldal tiledata+{]line*4},x
and tmp_sprite_mask+{]line*4}
ora tmp_sprite_data+{]line*4}
sta: $0004+{]line*$1000},y
ldal tiledata+{]line*4}+2,x
and tmp_sprite_mask+{]line*4}+2
ora tmp_sprite_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
jmp _FillPEAOpcode
GenericUnderSlow
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x
tax
]line equ 0
lup 8
lda tmp_sprite_data+{]line*4}
andl tiledata+{]line*4}+32,x
oral tiledata+{]line*4}+32,x
sta: $0004+{]line*$1000},y
lda tmp_sprite_data+{]line*4}+2
andl tiledata+{]line*4}+32+2,x
oral tiledata+{]line*4}+32+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
jmp _FillPEAOpcode

180
src/render/Sprite1.s Normal file
View File

@ -0,0 +1,180 @@
; Specialized routines that can be assigned to K_TS_ONE_SPRITE for rendering a single sprite into
; a tile. There are more variants of this function because having a single sprite in a tile is a very
; common scenario, so we put additional effort into optimizing this case.
;------------------------------
; Section: Above Tile Renderers
; The simplest implementation. When drawing a sprite over Tile 0 in FAST mode, we can just copy the
; sprite data into the coe field directly.
_OneSpriteFastOver0
ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
phy ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
tax ; VBuff address from SpriteBitsToVBuffAddrs macro
plb ; set to the code field bank
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb ; Restore the TileStore bank
rts
; Next implementation; drawing a sprite onto a regular tile. In this case we need to make
; use of the K_TS_COPY_TILE_DATA function because that takes care of copying the correct
; tile data into the direct page buffer.
; The 1-sprite dispatch prserves the X-register, so it already points to the TileStore
_OneSpriteFastOverV
jsr CopyTileDataToDPV
bra _OneSpriteFastOver
_OneSpriteFastOverA
jsr CopyTileDataToDP
_OneSpriteFastOver
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
ldx sprite_ptr0
plb
]line equ 0
lup 8
lda tmp_tile_data+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda tmp_tile_data+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
; This is the "SLOW" variant that fills in the PEA opcode specialized for Tile 0.
_OneSpriteSlowOver0
ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
phy ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
tax ; VBuff address from SpriteBitsToVBuffAddrs macro
plb ; set to the code field bank
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
jmp _FillPEAOpcode
; Slow variant for regular tile.
_OneSpriteSlowOver
jsr CopyTileDataToDP
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
ldx sprite_ptr0
plb
]line equ 0
lup 8
lda tmp_tile_data+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda tmp_tile_data+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
; Fall through here to give the common case a small boost
_FillPEAOpcode
sep #$20
lda #$F4
]line equ 0
lup 8
sta: $0000+{]line*$1000},y
sta: $0003+{]line*$1000},y
]line equ ]line+1
--^
rep #$20
plb ; Restore the TileStore bank
rts
;------------------------------
; Section: Below Tile Renderers
; Drawing under the zero tile is the same as not drawing a sprite fo both the fast and slow cases
_OneSpriteFastUnderA
jsr _CopyTileDataAndMaskToDP
bra _OneSpriteFastUnder
_OneSpriteFastUnderV
jsr _CopyTileDataAndMaskToDPV
_OneSpriteFastUnder
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
ldx sprite_ptr0
plb
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
ora tmp_tile_mask+{]line*4}
ora tmp_tile_data+{]line*4}
sta: $0004+{]line*$1000},y
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
ora tmp_tile_mask+{]line*4}+2
ora tmp_tile_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
_OneSpriteSlowUnder0
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank
jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine
;--------------------------------
; Helper functions for one Sprite
CopyOneSpriteDataToDP
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta tmp_sprite_data+{]line*4}
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta tmp_sprite_data+{]line*4}+2
]line equ ]line+1
--^
rts

102
src/render/Sprite2.s Normal file
View File

@ -0,0 +1,102 @@
; Specialize routines for handling two sprites. Like Sprite3.s and Sprite4.s there are four
; variants -- one to handle over / under sprite orders and one each for whether the mask needs
; to be used or not.
TwoSpriteLine mac
db $37,sprite_ptr1 ; and [sprite_ptr1],y
ora (sprite_ptr1),y
db $37,sprite_ptr0 ; and [sprite_ptr0],y
ora (sprite_ptr0),y
<<<
TwoSpriteData mac
lda (sprite_ptr1),y
db $37,sprite_ptr0 ; and [sprite_ptr0],y
ora (sprite_ptr0),y
<<<
TwoSpriteMask mac
db $B7,sprite_ptr1 ; lda [sprite_ptr1],y
db $37,sprite_ptr0 ; and [sprite_ptr0],y
<<<
TwoSpritesOver
tyx ; save after compositing the sprites
phb ; save the current bank
jsr CopyTileToDPSprite ; copy necessary tile data to the direct page
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
TwoSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
TwoSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp (K_TS_APPLY_TILE_DATA,x)
TwoSpritesUnderFast
tyx ; save after compositing the sprites
phb ; save the current bank
jsr CopyTwoSpritesDataToDP ; copy necessary sprite data to the direct page
jmp MergeSpriteWithTileFast
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
TwoSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
TwoSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp (K_TS_APPLY_TILE_DATA,x)
;---------------------------------
; Helper functions for two Sprites
CopyTwoSpritesDataToDP
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
TwoSpriteData
sta tmp_sprite_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
TwoSpriteData
sta tmp_sprite_data+{]line*4}+2
]line equ ]line+1
--^
rts
CopyFourSpritesDataAndMaskToDP
CopyThreeSpritesDataAndMaskToDP
CopyTwoSpritesDataAndMaskToDP
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
TwoSpriteData
sta tmp_sprite_data+{]line*4}
TwoSpriteMask
sta tmp_sprite_mask+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
TwoSpriteData
sta tmp_sprite_data+{]line*4}+2
TwoSpriteMask
sta tmp_sprite_mask+{]line*4}+2
]line equ ]line+1
--^
jmp (K_TS_SPRITE_TILE_DISP,x)

36
src/render/Sprite3.s Normal file
View File

@ -0,0 +1,36 @@
ThreeSpriteLine mac
db $37,sprite_ptr2 ; and [sprite_ptr2],y
ora (sprite_ptr2),y
db $37,sprite_ptr1 ; and [sprite_ptr1],y
ora (sprite_ptr1),y
db $37,sprite_ptr0 ; and [sprite_ptr0],y
ora (sprite_ptr0),y
<<<
; Three sprites wiithout extra masking
ThreeSpritesFast
tyx ; save for after compositing the sprites
ldy TileStore+TS_TILE_ADDR,x
pei DP2_TILEDATA_AND_SPRITEDATA_BANKS
plb ; set to the tiledata bank
jsr (K_TS_COPY_TILE_DATA,x)
plb ; set to the sprite data bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
ThreeSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
ThreeSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp _CopyDP2ToCodeField

0
src/render/Sprite4.s Normal file
View File

View File

@ -388,11 +388,13 @@ OldOneSecVec ENT
ds 4
Timers ENT
ds TIMER_REC_SIZE*MAX_TIMERS
; From the IIgs ref
DefaultPalette ENT
dw $0000,$007F,$0090,$0FF0
dw $000F,$0080,$0f70,$0FFF
dw $0fa9,$0ff0,$00e0,$04DF
dw $0d00,$078f,$0ccc,$0FFF
dw $0000,$0777,$0841,$072C
dw $000F,$0080,$0F70,$0D00
dw $0FA9,$0FF0,$00E0,$04DF
dw $0DAF,$078F,$0CCC,$0FFF
; 0. Full Screen : 40 x 25 320 x 200 (32,000 bytes (100.0%))
; 1. Sword of Sodan : 34 x 24 272 x 192 (26,112 bytes ( 81.6%))

View File

@ -12,13 +12,14 @@ TS_TILE_ADDR equ {TILE_STORE_SIZE*3} ; cached value, the address
TS_CODE_ADDR_LOW equ {TILE_STORE_SIZE*4} ; const value, address of this tile in the code fields
TS_CODE_ADDR_HIGH equ {TILE_STORE_SIZE*5}
TS_WORD_OFFSET equ {TILE_STORE_SIZE*6} ; const value, word offset value for this tile if LDA (dp),y instructions re used
TS_BASE_ADDR equ {TILE_STORE_SIZE*7} ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000.
;TS_BASE_ADDR equ {TILE_STORE_SIZE*7} ; const value, because there are two rows of tiles per bank, this is set to $0000 or $8000.
TS_JMP_ADDR equ {TILE_STORE_SIZE*7} ; const value, address of the 32-byte snippet space for this tile
TS_SCREEN_ADDR equ {TILE_STORE_SIZE*8} ; cached value of on-screen location of tile. Used for DirtyRender.
; TODO: Move these arrays into the K bank to support direct dispatch via jmp (abs,x)
TS_BASE_TILE_COPY equ {TILE_STORE_SIZE*9} ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering
; TS_BASE_TILE_COPY equ {TILE_STORE_SIZE*9} ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering
; TS_BASE_TILE_DISP equ {TILE_STORE_SIZE*10} ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function
TS_DIRTY_TILE_DISP equ {TILE_STORE_SIZE*11} ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function
; TS_DIRTY_TILE_DISP equ {TILE_STORE_SIZE*11} ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function
TILE_STORE_NUM equ 12 ; Need this many parallel arrays

View File

@ -56,11 +56,9 @@ _PushDirtyTileY
:occupied2
tya ; Make sure TileStore offset is returned in the accumulator
rts
; Remove a dirty tile from the list and return it in state ready to be rendered. It is important
; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update,
; because this routine merges the tile IDs stored in the Tile Store with the Sprite
; information to set the TILE_SPRITE_BIT. This is the *only* place in the entire code base that
; applies this bit to a tile descriptor.
; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update.
_PopDirtyTile
ldy DirtyTileCount
bne _PopDirtyTile2
@ -93,7 +91,6 @@ pdtf_not_empty
cpx #16 ; If there are >= 8 elements, then
bcs full_chunk ; do a full chunk
; stz DP2_DIRTY_TILE_COUNT ; Otherwise, this pass will handle them all
jmp (at_table,x)
at_table da at_exit,at_one,at_two,at_three
da at_four,at_five,at_six,at_seven
@ -109,76 +106,76 @@ full_chunk txa
ldx DirtyTiles+14,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+12,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+10,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+8,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+6,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+4,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+2,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
ldy DP2_DIRTY_TILE_COUNT
ldx DirtyTiles+0,y
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
jmp _PopDirtyTilesFast
; These routines just handle between 1 and 7 dirty tiles
at_seven
ldx DirtyTiles+12
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_six
ldx DirtyTiles+10
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_five
ldx DirtyTiles+8
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_four
ldx DirtyTiles+6
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_three
ldx DirtyTiles+4
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_two
ldx DirtyTiles+2
stz TileStore+TS_DIRTY,x
jsr _RenderTileFast
jsr _RenderTile
at_one
ldx DirtyTiles+0
stz TileStore+TS_DIRTY,x
jmp _RenderTileFast
jmp _RenderTile

View File

@ -1,314 +0,0 @@
; If the engine mode has the second background layer disabled, we take advantage of that to
; be more efficient in our rendering. Basically, without the second layer, there is no need
; to use the tile mask information.
;
; If there are no sprites, then we copy the tile data into the code field as fast as possible.
; If there are sprites, then the sprite data is flattened and stored into a direct page buffer
; and then copied into the code field
_RenderTileFast
lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line?
bne :sprites
_OneSpriteFastUnder0
_RenderNoSprite
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank
jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine
:sprites jmp (K_TS_SPRITE_TILE_DISP,x) ; go to the sprite+tile routine
; Optimized routines to render sprites on top of the tile data and update the code field
; assuming that the opcode will never need to be reset, e.g. all of the instructions are
; PEA opcodes, so only the operands need to be set.
;
; Since the sprite is drawn on top of the tile, the first step is to copy the tile data
; into the direct page temporary space, then dispatch to the appropriate sprite rendering
; subroutine
FastSpriteOver
txy
SpriteBitsToVBuffAddrs OneSpriteFast;TwoSpritesFast;ThreeSpritesFast;FourSpritesFast
; Optimized routines for drawing sprites underneath the tile. In this case, the sprite is drawn first,
; so we have to calculate the sprite dispatch subrotine to copy the sprite data into the direct
; page space and then merge it with the tile data at the end.
FastSpriteUnder
txy
SpriteBitsToVBuffAddrs OneSpriteFastUnder;OneSpriteFastUnder;OneSpriteFastUnder;OneSpriteFastUnder
; This handles sprites with the tile above
OneSpriteFastUnder
tyx
jmp (K_TS_ONE_SPRITE,x)
; General copy
_OneSpriteFastUnder
tax
jsr _CopySpriteDataToDP2 ; preserves Y
ldx TileStore+TS_TILE_ADDR,y
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field
tay
plb
]line equ 0
lup 8
lda tmp_tile_data+{]line*4}
andl tiledata+{]line*4}+32,x
oral tiledata+{]line*4},x
sta: $0004+{]line*$1000},y
lda tmp_tile_data+{]line*4}+2
andl tiledata+{]line*4}+32+2,x
oral tiledata+{]line*4}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
_CopySpriteDataToDP2
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta tmp_tile_data+{]line*4}
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
rts
; Where there are sprites involved, the first step is to call a routine to copy the
; tile data into a temporary buffer. Then the sprite data is merged and placed into
; the code field.
;
; A = vbuff address
; Y = tile store address
OneSpriteFast
tyx
jmp (K_TS_ONE_SPRITE,x)
; Specialize when the tile is Tile 0
_OneSpriteFastOver0
ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
phy ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
tax
plb
]line equ 0
lup 8
ldal spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
; General copy
_OneSpriteFastOver
sta sprite_ptr0
ldy TileStore+TS_TILE_ADDR,x ; load the tile address
jsr (K_TS_COPY_TILE_DATA,x) ; This routine *must* preserve X register
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
ldx sprite_ptr0
plb
]line equ 0
lup 8
lda tmp_tile_data+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda tmp_tile_data+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb
rts
TwoSpriteLine mac
; and [sprite_ptr1],y
db $37,sprite_ptr1
ora (sprite_ptr1),y
; and [sprite_ptr0],y
db $37,sprite_ptr0
ora (sprite_ptr0),y
<<<
TwoSpritesFast
tyx ; save for after compositing the sprites
ldy TileStore+TS_TILE_ADDR,x
pei DP2_TILEDATA_AND_TILESTORE_BANKS
plb
jsr (K_TS_COPY_TILE_DATA,x)
plb
pei DP2_SPRITEDATA_AND_TILESTORE_BANKS
plb ; set the sprite data bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
TwoSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
TwoSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb ; restore access to data bank
; Fall through
_CopyDP2ToCodeField
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has TileStore bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
plb ; Set the CODE_ADDR_HIGH bank
]line equ 0
lup 8
lda tmp_tile_data+{]line*4}
sta: $0004+{]line*$1000},y
lda tmp_tile_data+{]line*4}+2
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
plb ; Reset to the bank in the top byte of CODE_ADDR_HIGH
rts
ThreeSpriteLine mac
; and [sprite_ptr2],y
db $37,sprite_ptr2
ora (sprite_ptr2),y
; and [sprite_ptr1],y
db $37,sprite_ptr1
ora (sprite_ptr1),y
; and [sprite_ptr0],y
db $37,sprite_ptr0
ora (sprite_ptr0),y
<<<
ThreeSpritesFast
tyx ; save for after compositing the sprites
ldy TileStore+TS_TILE_ADDR,x
pei DP2_TILEDATA_AND_TILESTORE_BANKS
plb
jsr (K_TS_COPY_TILE_DATA,x)
plb
pei DP2_SPRITEDATA_AND_TILESTORE_BANKS
plb ; set the sprite data bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
ThreeSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
ThreeSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp _CopyDP2ToCodeField
FourSpriteLine mac
; and [sprite_ptr3],y
db $37,sprite_ptr3
ora (sprite_ptr3),y
; and [sprite_ptr2],y
db $37,sprite_ptr2
ora (sprite_ptr2),y
; and [sprite_ptr1],y
db $37,sprite_ptr1
ora (sprite_ptr1),y
; and [sprite_ptr0],y
db $37,sprite_ptr0
ora (sprite_ptr0),y
<<<
FourSpritesFast
tyx ; save for after compositing the sprites
ldy TileStore+TS_TILE_ADDR,x
pei DP2_TILEDATA_AND_TILESTORE_BANKS
plb
jsr (K_TS_COPY_TILE_DATA,x)
plb
pei DP2_SPRITEDATA_AND_TILESTORE_BANKS
plb ; set the sprite data bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda tmp_tile_data+{]line*4}
FourSpriteLine
sta tmp_tile_data+{]line*4}
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda tmp_tile_data+{]line*4}+2
FourSpriteLine
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
jmp _CopyDP2ToCodeField
_CopyTileDataToDP2
pei DP2_TILEDATA_AND_TILESTORE_BANKS ; Setting the bank saves 16 cycles and costs 14, so it's a bit faster,
plb ; but we really do it to preserve the X register
]line equ 0
lup 8
lda tiledata+{]line*4},y
sta tmp_tile_data+{]line*4}
lda tiledata+{]line*4}+2,y
sta tmp_tile_data+{]line*4}+2
]line equ ]line+1
--^
plb
rts
_CopyTileDataToDP2V
pei DP2_TILEDATA_AND_TILESTORE_BANKS ; Setting the bank saves 16 cycles and costs 14, so it's a bit faster,
plb ; but we really do it to preserve the X register
]src equ 7
]dest equ 0
lup 8
lda tiledata+{]src*4},y
sta tmp_tile_data+{]dest*4}
lda tiledata+{]src*4}+2,y
sta tmp_tile_data+{]dest*4}+2
]src equ ]src-1
]dest equ ]dest+1
--^
plb
rts