From 1f9c9b3f5bc0135caa9b7c5255e10b0bd6e14290 Mon Sep 17 00:00:00 2001 From: Lucas Scharenbroich Date: Mon, 20 Jun 2022 15:55:09 -0500 Subject: [PATCH] Checkpoint of final tile proc reorg for generalize new infrastructure for different engine modes --- src/Defs.s | 13 +- src/Render.s | 4 +- src/Sprite.s | 5 + src/Tiles.s | 234 ++++++++++++++++----------- src/Tool.s | 6 +- src/blitter/TileProcs.s | 11 +- src/blitter/Tiles.s | 4 +- src/blitter/Tiles00000.s | 11 ++ src/blitter/Tiles00001.s | 42 +++++ src/blitter/Tiles10001.s | 94 ++++++++++- src/render/Fast.s | 148 +++++++++++++++++ src/render/README.txt | 83 ++++++++++ src/render/Render.s | 231 +++++++++++++++++++++++++++ src/render/Slow.s | 51 ++++++ src/render/Sprite1.s | 180 +++++++++++++++++++++ src/render/Sprite2.s | 102 ++++++++++++ src/render/Sprite3.s | 36 +++++ src/render/Sprite4.s | 0 src/static/TileStore.s | 10 +- src/static/TileStoreDefs.s | 7 +- src/tiles/DirtyTileQueue.s | 37 ++--- src/tiles/FastRenderer.s | 314 ------------------------------------- 22 files changed, 1172 insertions(+), 451 deletions(-) create mode 100644 src/render/Fast.s create mode 100644 src/render/README.txt create mode 100644 src/render/Render.s create mode 100644 src/render/Slow.s create mode 100644 src/render/Sprite1.s create mode 100644 src/render/Sprite2.s create mode 100644 src/render/Sprite3.s create mode 100644 src/render/Sprite4.s diff --git a/src/Defs.s b/src/Defs.s index 3f2a9b7..8f05c28 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -153,6 +153,7 @@ DP2_DIRTY_TILE_CALLBACK equ 162 ; Some pre-defined bank values DP2_TILEDATA_AND_TILESTORE_BANKS equ 164 DP2_SPRITEDATA_AND_TILESTORE_BANKS equ 166 +DP2_TILEDATA_AND_SPRITEDATA_BANKS equ 168 SPRITE_VBUFF_PTR equ 224 ; 32 bytes of adjusted pointers to VBuffArray addresses ; End direct page values @@ -186,16 +187,16 @@ PAD_BUTTON_A equ $02 PAD_KEY_DOWN equ $04 ; Tile constants -TILE_ID_MASK equ $01FF -TILE_SPRITE_BIT equ $8000 ; Set if this tile intersects an active sprite +; TILE_RESERVED_BIT equ $8000 TILE_PRIORITY_BIT equ $4000 ; Put tile on top of sprite -TILE_FRINGE_BIT equ $2000 -TILE_MASK_BIT equ $1000 -TILE_DYN_BIT equ $0800 +TILE_FRINGE_BIT equ $2000 ; Unused +TILE_MASK_BIT equ $1000 ; Hint bit used in TWO_LAYER_MODE to optimize rendering +TILE_DYN_BIT equ $0800 ; Is this a Dynamic Tile? TILE_VFLIP_BIT equ $0400 TILE_HFLIP_BIT equ $0200 +TILE_ID_MASK equ $01FF TILE_CTRL_MASK equ $FE00 -TILE_PROC_MASK equ $F800 ; Select tile proc for rendering +; TILE_PROC_MASK equ $F800 ; Select tile proc for rendering ; Sprite constants SPRITE_HIDE equ $2000 diff --git a/src/Render.s b/src/Render.s index 7a7bba6..2c046dd 100644 --- a/src/Render.s +++ b/src/Render.s @@ -211,8 +211,8 @@ _RenderDirtyTile ; Y is set to the top-left address of the tile in SHR screen ; A is set to the address of the tile data NoSpritesDirty - lda TileStore+TS_DIRTY_TILE_DISP,y - stal :nsd+1 +; lda TileStore+TS_DIRTY_TILE_DISP,y +; stal :nsd+1 ldx TileStore+TS_SCREEN_ADDR,y ; Get the on-screen address of this tile lda TileStore+TS_TILE_ADDR,y ; load the address of this tile's data (pre-calculated) plb ; set the code field bank diff --git a/src/Sprite.s b/src/Sprite.s index 6789c69..6632a3b 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -605,6 +605,11 @@ _CacheSpriteBanks ldx #$100 sta DP2_TILEDATA_AND_TILESTORE_BANKS,x ; put a reversed copy in the second direct page + lda #>spritedata + and #$FF00 + ora #^tiledata + sta DP2_TILEDATA_AND_SPRITEDATA_BANKS,x + lda #>spritedata and #$FF00 ora #^TileStore diff --git a/src/Tiles.s b/src/Tiles.s index 917a6c0..0f9f7d9 100644 --- a/src/Tiles.s +++ b/src/Tiles.s @@ -79,6 +79,7 @@ InitTiles :col equ tmp0 :row equ tmp1 :vbuff equ tmp2 +:base equ tmp3 ; Initialize the Tile Store @@ -106,12 +107,18 @@ InitTiles lda EngineMode bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER beq :fast + bit #ENGINE_MODE_TWO_LAYER + beq :dyn ; ldal TileProcs ; sta TileStore+TS_BASE_TILE_DISP,x bra :out :fast lda #0 ; Initialize with Tile 0 - ldy #FastOverZero + ldy #FastOverZA + jsr _SetTileProcs + +:dyn lda #0 ; Initialize with Tile 0 + ldy #DynOverZA jsr _SetTileProcs :out @@ -135,7 +142,8 @@ InitTiles sta TileStore+TS_CODE_ADDR_HIGH,x ; High word of the tile address (just the bank) lda BRowTableLow,y - sta TileStore+TS_BASE_ADDR,x ; May not be needed later if we can figure out the right constant... + sta :base +; sta TileStore+TS_BASE_ADDR,x ; May not be needed later if we can figure out the right constant... lda :col ; Set the offset values based on the column asl ; of this tile @@ -145,7 +153,8 @@ InitTiles tay lda Col2CodeOffset+2,y clc - adc TileStore+TS_BASE_ADDR,x + adc :base +; adc TileStore+TS_BASE_ADDR,x sta TileStore+TS_CODE_ADDR_LOW,x ; Low word of the tile address in the code field dec :col @@ -167,18 +176,25 @@ InitTiles ; Y = tile row [0, 25] (26 rows) ; ; Registers are not preserved -_SetTile - pha - jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position - tay - pla - - cmp TileStore+TS_TILE_ID,y ; Only set to dirty if the value changed - beq :nochange +oldTileId equ blttmp ; This location is used in _SetTileProcs, too +newTileId equ blttmp+2 +procIdx equ blttmp+4 - sta TileStore+TS_TILE_ID,y ; Value is different, store it. +_SetTile + sta newTileId + jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position + tax + + lda TileStore+TS_TILE_ID,x + cmp newTileId + bne :changed + rts + +:changed sta oldTileId + lda newTileId + sta TileStore+TS_TILE_ID,x ; Value is different, store it. jsr _GetTileAddr - sta TileStore+TS_TILE_ADDR,y ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later + sta TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later ; Set the standard renderer procs for this tile. ; @@ -191,55 +207,81 @@ _SetTile ; functionality. Sometimes it is simple, but in cases of the sprites overlapping Dynamic Tiles and other cases ; it can be more involved. +; Calculate the base tile proc selector from the tile Id + stz procIdx + lda newTileId + + clc + bit #TILE_PRIORITY_BIT + beq :low_priority + sec +:low_priority asl procIdx + + clc + bit #TILE_ID_MASK + bne :not_zero + sec +:not_zero asl procIdx + + clc + bit #TILE_VFLIP_BIT + beq :no_vflip + sec +:no_vflip asl procIdx + +; Multiple by 6 to get the correct table entry index + + asl procIdx + lda procIdx + asl + adc procIdx + tay + +; Now integrate with the engine mode indicator + lda EngineMode bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER bne :not_fast - brl _SetTileFast -:nochange rts + brl :setTileFast -:not_fast - lda TileStore+TS_TILE_ID,y +:not_fast bit #ENGINE_MODE_TWO_LAYER + bne :not_dyn + brl :setTileDyn + +:not_dyn + lda TileStore+TS_TILE_ID,x and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value xba - tax + tay ; ldal DirtyTileProcs,x ; sta TileStore+TS_DIRTY_TILE_DISP,y ; ldal CopyTileProcs,x ; sta TileStore+TS_DIRTY_TILE_COPY,y - lda TileStore+TS_TILE_ID,y ; Get the non-sprite dispatch address + lda TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address and #TILE_CTRL_MASK xba - tax -; ldal TileProcs,x + tay +; ldal TileProcs,y ; sta TileStore+TS_BASE_TILE_DISP,y - jmp _PushDirtyTileY ; on the next call to _ApplyTiles + jmp _PushDirtyTileX ; on the next call to _ApplyTiles ; Specialized check for when the engine is in "Fast" mode. If is a simple decision tree based on whether ; the tile priority bit is set, and whether this is the special tile 0 or not. -_SetTileFast - tyx - lda TileStore+TS_TILE_ID,x - bit #TILE_PRIORITY_BIT - beq :fast_over -:fast_under bit #TILE_ID_MASK - beq :fast_under_zero - ldy #FastUnderNonZero +:setTileFast + lda #FastProcs + lda procIdx jsr _SetTileProcs jmp _PushDirtyTileX -:fast_under_zero ldy #FastUnderZero - jsr _SetTileProcs - jmp _PushDirtyTileX - -:fast_over bit #TILE_ID_MASK - beq :fast_over_zero - ldy #FastOverNonZero - jsr _SetTileProcs - jmp _PushDirtyTileX - -:fast_over_zero ldy #FastOverZero +; Specialized check for when the engine has enabled dynamic tiles. In this case we are no longer +; guaranteed that the opcodes in a tile are PEA instructions. If the old tile and the new tile +; are both Dynamic tiles or both Basic tiles, then we can use an optimized routine. Otherwise +; we must set the opcodes as well as the operands +:setTileDyn + lda #DynProcs + lda procIdx jsr _SetTileProcs jmp _PushDirtyTileX @@ -247,83 +289,84 @@ _SetTileFast jmp _PushDirtyTileY ; on the next call to _ApplyTiles ; X = Tile Store offset -; Y = table address -; A = TILE_ID +; Y = Engine Mode Base Table address +; A = Table proc index ; ; see TileProcTables in static/TileStore.s bnkPtr equ blttmp tblPtr equ blttmp+4 -stpTmp equ blttmp+8 _SetTileProcs - and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value - xba - sta stpTmp ; save it ; Set a long pointer to this bank + sty tblPtr + clc + adc tblPtr + sta tblPtr + phk phk pla and #$00FF - stz bnkPtr ; pointer to this bank - sta bnkPtr+2 - - sty tblPtr ; pointer to the table sta tblPtr+2 -; Lookup the base tile procedure +; Lookup the tile procedures - clc ldy #0 - lda [tblPtr],y ; load address of the base tile proc array - adc stpTmp ; add the offset - tay - lda [bnkPtr],y ; load the actual value - stal K_TS_BASE_TILE_DISP,x ; store it in the dispatch table + lda [tblPtr],y + stal K_TS_BASE_TILE_DISP,x -; Lookup the tile copy routine - - clc ldy #2 - lda [tblPtr],y ; load address to the tile copy proc array - adc stpTmp - tay - lda [bnkPtr],y - stal K_TS_COPY_TILE_DATA,x - -; Finally, load in the last two addresses directly - - ldy #4 lda [tblPtr],y stal K_TS_SPRITE_TILE_DISP,x - ldy #6 + ldy #4 lda [tblPtr],y stal K_TS_ONE_SPRITE,x rts - ; TileProcTables ; -; Tables of tuples used to populate the K_TS_* dispatch arrays for different combinations. Easier to maintain -; than a bunch of conditional code. Each "table" address holds four pointers to routines to handle the four -; combinations of HFLIP and VFLIP bits. +; Tables of tuples used to populate the K_TS_* dispatch arrays for different combinations. This is +; easier to maintain than a bunch of conditional code. Each etry hold three addresses. ; -; First address: A table of routines that render a tile when there is no sprite present -; Second address: A table of routines that copy a tile into the direct page workspace -; Third address: The general sprite routine; currently only used for Over/Under selection -; Fourth address: The specific sprite routine to use when only one sprite intersects the tile -FastOverNonZero dw FastTileProcs,FastTileCopy,FastSpriteOver,_OneSpriteFastOver -FastOverZero dw FastTileProcs0,FastTileCopy0,FastSpriteOver,_OneSpriteFastOver0 -FastUnderNonZero dw FastTileProcs,FastTileCopy,FastSpriteUnder,_OneSpriteFastUnder -FastUnderZero dw FastTileProcs0,FastTileCopy0,FastSpriteUnder,_OneSpriteFastUnder0 +; First address: Draw a tile directly into the code buffer (no sprites) +; Second address: Draw a tile merged with sprite data from the direct page +; Third address: Specialize routine to draw a tile merged with one sprite +; +; There are unique tuples of routines for all of the different combinations of tile properties +; and engine modes. This is an extesive number of combinations, but it simplified the development +; and maintainence of the rendering subroutines. Also, the difference subroutines can be written +; in any way and can make use of their on subroutines to reduce code size. +; +; Properties: +; +; [MODE] ENGINE_MODE: Fast, Dyn, TwoLayer +; [Z | N] Is Tile 0? : Yes, No +; [A | V] Is VFLIP? : Yes, No +; [Over | Under] Priority? : Yes, No +; +; So eight tuples per engine mode; 24 tuples total. Table name convention +; +; +FastProcs +FastOverZA dw _TBConstTile0,GenericOverZero,_OneSpriteFastOver0 +FastOverZV dw _TBConstTile0,GenericOverZero,_OneSpriteFastOver0 +FastOverNA dw _TBCopyDataFast,GenericOverAFast,_OneSpriteFastOverA +FastOverNV dw _TBCopyDataVFast,GenericOverVFast,_OneSpriteFastOverV +FastUnderZA dw _TBConstTile0,GenericUnderZero,GenericUnderZero +FastUnderZV dw _TBConstTile0,GenericUnderZero,GenericUnderZero +FastUnderNA dw _TBCopyDataFast,GenericUnderAFast,_OneSpriteFastUnderA +FastUnderNV dw _TBCopyDataVFast,GenericUnderVFast,_OneSpriteFastUnderV -; The routines will come from this table when ENGINE_MODE_TWO_LAYER and ENGINE_MODE_DYN_TILES -; are both off. -FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast -FastTileCopy dw _CopyTileDataToDP2,_CopyTileDataToDP2,_CopyTileDataToDP2V,_CopyTileDataToDP2V - -FastTileProcs0 dw _TBConstTile0,_TBConstTile0,_TBConstTile0,_TBConstTile0 -FastTileCopy0 dw _TBConstTileDataToDP2,_TBConstTileDataToDP2,_TBConstTileDataToDP2,_TBConstTileDataToDP2 +DynProcs +DynOverZA +DynOverZV +DynOverNA +DynOverNV +DynUnderZA +DynUnderZV +DynUnderNA +DynUnderNV ; SetBG0XPos ; @@ -387,8 +430,7 @@ last_bit lda (SPRITE_VBUFF_PTR+{]1*2}),y next_bit <<< -; Specialization for the first sprite which can just return the vbuff address -; in a register if there is only one sprite intersecting the tile +; Specialization for the first sprite which can optimize its dispatch if its the only one ; dobit bit_position,dest;next;exit dobit1 mac lsr @@ -404,7 +446,8 @@ dobit1 mac last_bit lda (SPRITE_VBUFF_PTR+{]1*2}),y clc ; pre-adjust these later adc _Sprites+TS_VBUFF_BASE+{]1*2} - jmp ]4 + sta sprite_ptr0+{]2*4} + jmp (K_TS_ONE_SPRITE,x) next_bit <<< @@ -522,6 +565,7 @@ b_15_3 endbit 15;3;]4 ; Store some tables in the K bank that will be used exclusively for jmp (abs,x) dispatch K_TS_BASE_TILE_DISP ds TILE_STORE_SIZE ; draw the tile without a sprite -K_TS_COPY_TILE_DATA ds TILE_STORE_SIZE ; copy the tile into temp storage (used when tile below sprite) +K_TS_COPY_TILE_DATA ds TILE_STORE_SIZE ; copy/merge the tile into temp storage K_TS_SPRITE_TILE_DISP ds TILE_STORE_SIZE ; select the sprite routine for this tile -K_TS_ONE_SPRITE ds TILE_STORE_SIZE ; specialized sprite routine when only one sprite covers the tile \ No newline at end of file +K_TS_ONE_SPRITE ds TILE_STORE_SIZE ; specialized sprite routine when only one sprite covers the tile +K_TS_APPLY_TILE_DATA ds TILE_STORE_SIZE ; move tile from temp storage into code field \ No newline at end of file diff --git a/src/Tool.s b/src/Tool.s index 4e6f48e..8863e74 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -363,8 +363,12 @@ _TSGetSeconds put Sprite2.s put SpriteRender.s put Render.s + put render/Render.s + put render/Fast.s + put render/Sprite1.s + put render/Sprite2.s put tiles/DirtyTileQueue.s - put tiles/FastRenderer.s +; put tiles/FastRenderer.s put blitter/Horz.s put blitter/Vert.s put blitter/BG0.s diff --git a/src/blitter/TileProcs.s b/src/blitter/TileProcs.s index a8a7d10..ad50277 100644 --- a/src/blitter/TileProcs.s +++ b/src/blitter/TileProcs.s @@ -84,10 +84,11 @@ _TBCopyTileMaskToCBuffV ; _TBConstTile ; ; A specialized routine that fills in a tile with a single constant value. It's intended to be used to -; fill in solid colors, so there are no specialized horizontal or verical flipped variants +; fill in solid colors, so there are no specialized horizontal or verical flipped variantsConstUnderZero _TBConstTile0 tax +_TBConstTileX lda #0 -_TBConstTileX sta: $0001,y + sta: $0001,y sta: $0004,y sta $1001,y sta $1004,y @@ -105,7 +106,11 @@ _TBConstTileX sta: $0001,y sta $7004,y plb rts -; jmp _TBFillPEAOpcode + +_TBConstTileSlow0 + tax + jsr _TBFillPEAOpcode + jmp _TBConstTileX _TBConstTileDataToDP2 ]line equ 0 diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s index b1779b0..5a55228 100644 --- a/src/blitter/Tiles.s +++ b/src/blitter/Tiles.s @@ -59,8 +59,8 @@ CopyNoSprites lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line pha ; and put on the stack for later - lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000) - sta _BASE_ADDR+1 ; so we can get by just copying the high byte +; lda TileStore+TS_BASE_ADDR+1,x ; load the base address of the code field ($0000 or $8000) +; sta _BASE_ADDR+1 ; so we can get by just copying the high byte rep #$20 lda TileStore+TS_BASE_TILE_DISP,x ; Get the address of the renderer for this tile diff --git a/src/blitter/Tiles00000.s b/src/blitter/Tiles00000.s index 0d3f6e8..7921c2b 100644 --- a/src/blitter/Tiles00000.s +++ b/src/blitter/Tiles00000.s @@ -42,6 +42,7 @@ _TBSolidTile_VH ; register value. This must be restored prior to returning _TBCopyDataFast tax +_TBCopyDataFastX ]line equ 0 lup 8 ldal tiledata+{]line*4},x @@ -53,6 +54,10 @@ _TBCopyDataFast plb rts +_TBCopyDataSlow + tax + jsr _TBFillPEAOpcode + jmp _TBCopyDataFastX _TBCopyData ]line equ 0 @@ -67,6 +72,7 @@ _TBCopyData _TBCopyDataVFast tax +_TBCopyDataVFastX ]src equ 7 ]dest equ 0 lup 8 @@ -80,6 +86,11 @@ _TBCopyDataVFast plb rts +_TBCopyDataVSlow + tax + jsr _TBFillPEAOpcode + jmp _TBCopyDataVFastX + _TBCopyDataV ]src equ 7 ]dest equ 0 diff --git a/src/blitter/Tiles00001.s b/src/blitter/Tiles00001.s index 505abb2..f44aefa 100644 --- a/src/blitter/Tiles00001.s +++ b/src/blitter/Tiles00001.s @@ -12,6 +12,48 @@ _TBDynamicTile_00 jsr _TBDynamicData jmp _TBFillLdaDpOpcode +_TBDynamic + ldal TileStore+TS_TILE_ID,x + and #$007F + ora #$4800 + +]line equ 0 ; render the first column + lup 8 + sta: $0004+{]line*$1000},y +]line equ ]line+1 + --^ + + inc ; advance to the next word + inc + +]line equ 0 ; render the second column + lup 8 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + sep #$20 + lda #$B5 + sta: $0000,y + sta: $0003,y + sta $1000,y + sta $1003,y + sta $2000,y + sta $2003,y + sta $3000,y + sta $3003,y + sta $4000,y + sta $4003,y + sta $5000,y + sta $5003,y + sta $6000,y + sta $6003,y + sta $7000,y + sta $7003,y + rep #$20 + + plb + rts ; Primitive to render a dynamic tile ; ; LDA 00,x / PHA where the operand is fixed when the tile is rendered diff --git a/src/blitter/Tiles10001.s b/src/blitter/Tiles10001.s index d834eca..dd63ee4 100644 --- a/src/blitter/Tiles10001.s +++ b/src/blitter/Tiles10001.s @@ -3,6 +3,48 @@ ; This tile type does not explicitly support horizontal or vertical flipping. An appropriate tile ; descriptor should be passed into CopyTileToDyn to put the horizontally or vertically flipped source ; data into the dynamic tile buffer +_TBDynamicSpriteTile + sta _X_REG + ldal TileStore+TS_JMP_ADDR,x ; Get the address of the exception handler + sta _JTBL_CACHE + + ldal TileStore+TS_TILE_ID,x ; Get the original tile descriptor + and #$007F ; clamp to < (32 * 4) + ora #$B500 + xba + sta _OP_CACHE ; This is the 2-byte opcode for to load the data + + CopyDynWord 0;$0003 + CopyDynWord 4;$1003 + CopyDynWord 8;$2003 + CopyDynWord 12;$3003 + CopyDynWord 16;$4003 + CopyDynWord 20;$5003 + CopyDynWord 24;$6003 + CopyDynWord 28;$7003 + + clc + lda _JTBL_CACHE + adc #32 ; All the snippets are 32 bytes wide and, since we're + sta _JTBL_CACHE ; within one tile, the second column is consecutive + + lda _OP_CACHE + adc #$0200 + sta _OP_CACHE + + CopyDynWord 2;$0000 + CopyDynWord 6;$1000 + CopyDynWord 10;$2000 + CopyDynWord 14;$3000 + CopyDynWord 18;$4000 + CopyDynWord 22;$5000 + CopyDynWord 26;$6000 + CopyDynWord 30;$7000 + + plb + rts + + _TBDynamicSpriteTile_00 sty _Y_REG ; This is restored in the macro @@ -53,6 +95,56 @@ _TBDynamicSpriteTile_00 rts +; Create a masked render based on data in the direct page temporary buffer +; +; ]1 : sprite buffer offset +; ]2 : code field offset +CopyDynWord mac + lda tmp_sprite_mask+{]1} ; load the mask value + bne mixed ; a non-zero value may be mixed + +; This is a solid word + lda #$00F4 ; PEA instruction + sta: ]2,y + lda tmp_sprite_data+{]1} ; load the sprite data + sta: ]2+1,y ; PEA operand + bra next + +mixed cmp #$FFFF ; All 1's in the mask is a fully transparent sprite word + beq transparent + + lda #$004C ; JMP to handler + sta: {]2},y + lda _JTBL_CACHE ; Get the offset to the exception handler for this column + ora #{]2&$F000} ; adjust for the current row offset + sta: {]2}+1,y + tax ; This becomes the new address that we use to patch in + + lda _OP_CACHE ; Get the LDA dp,x instruction for this column + sta: $0000,x + + lda #$0029 ; AND #SPRITE_MASK + sta: $0002,x + lda tmp_sprite_mask+{]1} + sta: $0003,x + + lda #$0009 ; ORA #SPRITE_DATA + sta: $0005,x + lda tmp_sprite_data+{]1} + sta: $0006,x + + lda #$0D80 ; branch to the prologue (BRA *+15) + sta: $0008,x + bra next + +; This is a transparent word, so just show the dynamic data +transparent + lda #$4800 ; Put the PHA in the third byte + sta: {]2}+1,y + lda _OP_CACHE ; Store the LDA dp,x instruction with operand + sta: {]2},y +next + <<< ; Masked renderer for a dynamic tile with sprite data overlaid. ; @@ -71,7 +163,7 @@ CopyDynSpriteWord MAC ; If MASK == 0, then we can do a PEA. If MASK == $FFFF, then fall back to the simple Dynamic Tile ; code. ldal spritemask+{]1},x ; load the mask value - bne mixed ; a non-zero value may be mixed + bne mixed ; a non-zero value may be mixed ; This is a solid word lda #$00F4 ; PEA instruction diff --git a/src/render/Fast.s b/src/render/Fast.s new file mode 100644 index 0000000..1b3c41c --- /dev/null +++ b/src/render/Fast.s @@ -0,0 +1,148 @@ +; Collection of render function used when the engine is in "FAST" mode. In this mode +; there are no dynamic tile or two layer tiles enabled, so all of the tiles are comprised +; of PEA opcodes. These functions take advantage of this as the fact that masks are +; not needed to improve rendering speed. +; +; The following functions are defined here +; +; GenericOverAFast : Places data from tmp_sprite_data on top of the TileStore's tile +; GenericUnderAFast : Places the TileStore's tile on top of tmp_sprite_data + +GenericOverAFast + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + plb + +]line equ 0 + lup 8 + ldal tiledata+{]line*4},x + and tmp_sprite_mask+{]line*4} + ora tmp_sprite_data+{]line*4} + sta: $0004+{]line*$1000},y + + ldal tiledata+{]line*4}+2,x + and tmp_sprite_mask+{]line*4}+2 + ora tmp_sprite_data+{]line*4}+2 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb + rts + +GenericOverVFast + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + plb + +]src equ 7 +]dest equ 0 + lup 8 + ldal tiledata+{]src*4},x + and tmp_sprite_mask+{]line*4} + ora tmp_sprite_data+{]line*4} + sta: $0004+{]line*$1000},y + + ldal tiledata+{]src*4}+2,x + and tmp_sprite_mask+{]line*4}+2 + ora tmp_sprite_data+{]line*4}+2 + sta: $0001+{]line*$1000},y +]src equ ]src-1 +]dest equ ]dest+1 + --^ + plb + rts + +GenericOverZero + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + plb + +]line equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb + rts + +GenericUnderAFast + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + plb + +]line equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + andl tiledata+{]line*4}+32,x + oral tiledata+{]line*4}+32,x + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + andl tiledata+{]line*4}+32+2,x + oral tiledata+{]line*4}+32+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb + rts + +GenericUnderVFast + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + plb + +]src equ 7 +]dest equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + andl tiledata+{]src*4}+32,x + oral tiledata+{]src*4}+32,x + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + andl tiledata+{]src*4}+32+2,x + oral tiledata+{]src*4}+32+2,x + sta: $0001+{]line*$1000},y +]src equ ]src-1 +]dest equ ]dest+1 + --^ + + plb + rts + +GenericUnderZero + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + plb + lda #0 + +]line equ 0 + lup 8 + sta: $0004+{]line*$1000},y + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb + rts diff --git a/src/render/README.txt b/src/render/README.txt new file mode 100644 index 0000000..0dbffdf --- /dev/null +++ b/src/render/README.txt @@ -0,0 +1,83 @@ +This folder contains the rendering tuples for the different type of tile rendering modes +that are defined by both the engine mode and the specific tile attributes. There are +a *lot* or variants, so they are cataloged here. + +The top-level TileRender function in the main entry point that defined the overal tile render +flow as well as the register parameters and calling conventions for each of the modular +components. + +There are 5 pluggable functions that make up a rendering mode + +1. K_TS_BASE_TILE_DISP + + An address to a function that will render a tile into the code field. There are no + sprites to handle in this case. + + Arguments: + A: TileData/TileMask address + B: code field bank + Y: address of the tile in the code bank + X: TileStore offset + + Return: + None + + If additional TileStore properties are needed for the renderer, they can be read using the X + register. + +2. K_TS_SPRITE_TILE_DISP + + Selects the top-level handler for rendering a tile with a sprite. Currently, this is used to + select between rendering a sprite above the tile, or under the tile based on the value of the + TILE_PRIORITY_BIT. + + Arguments: + A: TileStore+TS_SPRITE_FLAG + X: TileStore offset + + Return: + Y: TileStore offset + sprite_ptrX dirct page values set to the sprite VBuff addresses + + The handler routine is responsible for examining the TS_SPRITE_FLAG value and dispatching + to an appropriate routine to handle the number of sprites intersecting the tile. + +3. K_TS_ONE_SPRITE + + A specialized routine when K_TS_SPRITE_TILE_DISP determines there is only one sprite to render + it MUST dispatch to this function. The K_TS_ONE_SPRITE routine MAY make use of the K_TS_COPY_TILE_DATA + and K_TS_APPLY_TILE_DATA functions, but is not required to do so. + +4. K_TS_COPY_TILE_DATA & K_TS_APPLY_TILE_DATA + + A pair of function that copye tile data (and possible mask information) into a temporary + direct page space and then render that workspace into the code field. + + These functions are used as building blocks by the generic Over/Under multi-sprite + rendering code. + + K_TS_COPY_TILE_DATA + Arguments: + B: Set to the TileData bank + Y: Set to the tile address + Return: + X: preserve the X register + + K_TS_APPLY_TILE_DATA + Arguments: + B: code field bank + Y: address of the tile in the code bank + Return: + None + + + +Generic Flow + + 1. Is there a sprite? + No -> Call K_TS_BASE_TILE_DISP to render a tile into the code field + + Yes -> Call K_TS_SPRITE_TILE_DISP + + Over : Copy tile data + mask to DP, Copy sprite data + mask to DP, render tile to code field + Under : Copy sprite data to DP, \ No newline at end of file diff --git a/src/render/Render.s b/src/render/Render.s new file mode 100644 index 0000000..d0a6cc5 --- /dev/null +++ b/src/render/Render.s @@ -0,0 +1,231 @@ +; If there are no sprites, then we copy the tile data into the code field as fast as possible. +; If there are sprites, then additional work is required +_RenderTile + lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line? + bne :sprites + + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + plb ; set the code field bank + jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine + +; Execute the sprite tree. If there is only one sprite, control will immediately be passed to +; the routine at K_TS_ONE_SPRITE. Otherwise, the control passed to the routines with a different +; number of sprites. These routines need to copy the flattened sprite data and mask into the +; direct page workspace to be used by the K_TS_SPRITE_TILE_DISP routine +:sprites txy + SpriteBitsToVBuffAddrs $0000;TwoSprites;ThreeSprites;FourSprites + +; Dispatch vectors for the two, three and four sprite functions. These just +; flatten the sprite data into the direct page workspace and then pass control +; to the configurable routine which is set in SetTile and knows what to do +; based on the tile properties (over/under, engine mode, etc.) +TwoSprites tyx + jsr CopyTwoSpritesDataAndMaskToDP + jmp (K_TS_SPRITE_TILE_DISP,x) + +ThreeSprites tyx + jsr CopyThreeSpritesDataAndMaskToDP + jmp (K_TS_SPRITE_TILE_DISP,x) + +FourSprites tyx + jsr CopyFourSpritesDataAndMaskToDP + jmp (K_TS_SPRITE_TILE_DISP,x) + +; Helper functions (and macros) + +; CopyTileToDP -- executes the K_TS_COPY_TILE_DATA routine. This may copy just data or data+mask +; information to the direct page +_CopyTileToDP mac + ldy TileStore+TS_TILE_ADDR,x ; load the tile address + pei DP2_TILEDATA_AND_TILESTORE_BANKS + plb ; set to the tiledata bank + jsr (K_TS_COPY_TILE_DATA,x) ; preserves X-reg + plb + <<< +CopyTileToDP + _CopyTileToDP + rts + +; CopyTileToDPSprite -- same as above, but returns with the Data BAnk set to the sprite data bank +_CopyTileToDPSprite mac + ldy TileStore+TS_TILE_ADDR,x ; load the tile address + pei DP2_TILEDATA_AND_SPRITEDATA_BANKS + plb ; set to the tiledata bank + jsr (K_TS_COPY_TILE_DATA,x) ; preserves X-reg + plb + <<< +CopyTileToDPSprite + _CopyTileToDPSprite + rts + +; Simple pair of routines that copies just the tile data to the direct page workspace. Data Bank +; must be set to the TileData bank in entry. +; +; Preserves the X-register +CopyTileDataToDP +]line equ 0 + lup 8 + lda tiledata+{]line*4},y + sta tmp_tile_data+{]line*4} + + lda tiledata+{]line*4}+2,y + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + rts + +CopyTileDataToDPV +]src equ 7 +]dest equ 0 + lup 8 + lda tiledata+{]src*4},y + sta tmp_tile_data+{]dest*4} + + lda tiledata+{]src*4}+2,y + sta tmp_tile_data+{]dest*4}+2 +]src equ ]src-1 +]dest equ ]dest+1 + --^ + rts + +; Copy both the tile and mask data to the driect page space +_CopyTileDataAndMaskToDP +]line equ 0 + lup 8 + lda tiledata+{]line*4},y + sta tmp_tile_data+{]line*4} + lda tiledata+{]line*4}+32,y + sta tmp_tile_mask+{]line*4} + + lda tiledata+{]line*4}+2,y + sta tmp_tile_data+{]line*4}+2 + lda tiledata+{]line*4}+32+2,y + sta tmp_tile_mask+{]line*4}+2 +]line equ ]line+1 + --^ + rts + +_CopyTileDataAndMaskToDPV +]src equ 7 +]dest equ 0 + lup 8 + lda tiledata+{]src*4},y + sta tmp_tile_data+{]dest*4} + lda tiledata+{]src*4}+32,y + sta tmp_tile_mask+{]dest*4} + + lda tiledata+{]src*4}+2,y + sta tmp_tile_data+{]dest*4}+2 + lda tiledata+{]src*4}+32+2,y + sta tmp_tile_mask+{]dest*4}+2 +]src equ ]src-1 +]dest equ ]dest+1 + --^ + rts + +; Given a populate tmp_sprite_data buffer to use as a base, merge it with a tile and write to the +; code field +MergeSpriteWithTileFast + ldx TileStore+TS_TILE_ADDR,y + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field + tay + plb + +]line equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + andl tiledata+{]line*4}+32,x + oral tiledata+{]line*4},x + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + andl tiledata+{]line*4}+32+2,x + oral tiledata+{]line*4}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + plb + rts + +MergeSpriteWithTileSlow + ldx TileStore+TS_TILE_ADDR,y + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field + tay + plb + +]line equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + andl tiledata+{]line*4}+32,x + oral tiledata+{]line*4},x + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + andl tiledata+{]line*4}+32+2,x + oral tiledata+{]line*4}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + jmp _FillPEAOpcode + + + + + +; Now, implement the generic Two, Three and Four sprite routines for both Over and Under rendering. These +; are fairly involved, so we try to only have a single implementation of them for now without excessve +; specialization. + + +FourSpriteLine mac +; and [sprite_ptr3],y + db $37,sprite_ptr3 + ora (sprite_ptr3),y +; and [sprite_ptr2],y + db $37,sprite_ptr2 + ora (sprite_ptr2),y +; and [sprite_ptr1],y + db $37,sprite_ptr1 + ora (sprite_ptr1),y +; and [sprite_ptr0],y + db $37,sprite_ptr0 + ora (sprite_ptr0),y + <<< + +FourSpritesFast + tyx ; save for after compositing the sprites + + ldy TileStore+TS_TILE_ADDR,x + pei DP2_TILEDATA_AND_TILESTORE_BANKS + plb + jsr (K_TS_COPY_TILE_DATA,x) + plb + + pei DP2_SPRITEDATA_AND_TILESTORE_BANKS + plb ; set the sprite data bank + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda tmp_tile_data+{]line*4} + FourSpriteLine + sta tmp_tile_data+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda tmp_tile_data+{]line*4}+2 + FourSpriteLine + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + + plb + jmp (K_TS_APPLY_TILE_DATA,x) + + diff --git a/src/render/Slow.s b/src/render/Slow.s new file mode 100644 index 0000000..a6a7c8a --- /dev/null +++ b/src/render/Slow.s @@ -0,0 +1,51 @@ +; Identical routines to those in Fast.s, but also set the opcode. Used to render solid +; tiles when the engine mode has other capabilities turned on +; +; The following functions are defined here +; +; GenericOverSlow : Places data from tmp_sprite_data on top of the TileStore's tile +; GenericUnderSlow : Places the TileStore's tile on top of tmp_sprite_data + +GenericOverSlow + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + +]line equ 0 + lup 8 + ldal tiledata+{]line*4},x + and tmp_sprite_mask+{]line*4} + ora tmp_sprite_data+{]line*4} + sta: $0004+{]line*$1000},y + + ldal tiledata+{]line*4}+2,x + and tmp_sprite_mask+{]line*4}+2 + ora tmp_sprite_data+{]line*4}+2 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + jmp _FillPEAOpcode + +GenericUnderSlow + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x + tax + +]line equ 0 + lup 8 + lda tmp_sprite_data+{]line*4} + andl tiledata+{]line*4}+32,x + oral tiledata+{]line*4}+32,x + sta: $0004+{]line*$1000},y + + lda tmp_sprite_data+{]line*4}+2 + andl tiledata+{]line*4}+32+2,x + oral tiledata+{]line*4}+32+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + jmp _FillPEAOpcode diff --git a/src/render/Sprite1.s b/src/render/Sprite1.s new file mode 100644 index 0000000..28d433a --- /dev/null +++ b/src/render/Sprite1.s @@ -0,0 +1,180 @@ +; Specialized routines that can be assigned to K_TS_ONE_SPRITE for rendering a single sprite into +; a tile. There are more variants of this function because having a single sprite in a tile is a very +; common scenario, so we put additional effort into optimizing this case. + +;------------------------------ +; Section: Above Tile Renderers + +; The simplest implementation. When drawing a sprite over Tile 0 in FAST mode, we can just copy the +; sprite data into the coe field directly. + +_OneSpriteFastOver0 + ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + phy ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + tax ; VBuff address from SpriteBitsToVBuffAddrs macro + plb ; set to the code field bank + +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb ; Restore the TileStore bank + rts + +; Next implementation; drawing a sprite onto a regular tile. In this case we need to make +; use of the K_TS_COPY_TILE_DATA function because that takes care of copying the correct +; tile data into the direct page buffer. + +; The 1-sprite dispatch prserves the X-register, so it already points to the TileStore + +_OneSpriteFastOverV + jsr CopyTileDataToDPV + bra _OneSpriteFastOver + +_OneSpriteFastOverA + jsr CopyTileDataToDP + +_OneSpriteFastOver + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + ldx sprite_ptr0 + plb + +]line equ 0 + lup 8 + lda tmp_tile_data+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda tmp_tile_data+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + plb + rts + +; This is the "SLOW" variant that fills in the PEA opcode specialized for Tile 0. + +_OneSpriteSlowOver0 + ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + phy ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + tax ; VBuff address from SpriteBitsToVBuffAddrs macro + plb ; set to the code field bank + +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + jmp _FillPEAOpcode + +; Slow variant for regular tile. + +_OneSpriteSlowOver + jsr CopyTileDataToDP + + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + ldx sprite_ptr0 + plb + +]line equ 0 + lup 8 + lda tmp_tile_data+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda tmp_tile_data+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + +; Fall through here to give the common case a small boost +_FillPEAOpcode + sep #$20 + lda #$F4 +]line equ 0 + lup 8 + sta: $0000+{]line*$1000},y + sta: $0003+{]line*$1000},y +]line equ ]line+1 + --^ + rep #$20 + + plb ; Restore the TileStore bank + rts + +;------------------------------ +; Section: Below Tile Renderers + +; Drawing under the zero tile is the same as not drawing a sprite fo both the fast and slow cases +_OneSpriteFastUnderA + jsr _CopyTileDataAndMaskToDP + bra _OneSpriteFastUnder + +_OneSpriteFastUnderV + jsr _CopyTileDataAndMaskToDPV + +_OneSpriteFastUnder + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + ldx sprite_ptr0 + plb + +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + ora tmp_tile_mask+{]line*4} + ora tmp_tile_data+{]line*4} + sta: $0004+{]line*$1000},y + + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + ora tmp_tile_mask+{]line*4}+2 + ora tmp_tile_data+{]line*4}+2 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + plb + rts + +_OneSpriteSlowUnder0 + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has TileStore bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + plb ; set the code field bank + jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine + +;-------------------------------- +; Helper functions for one Sprite +CopyOneSpriteDataToDP +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 +]line equ ]line+1 + --^ + rts \ No newline at end of file diff --git a/src/render/Sprite2.s b/src/render/Sprite2.s new file mode 100644 index 0000000..6f296da --- /dev/null +++ b/src/render/Sprite2.s @@ -0,0 +1,102 @@ +; Specialize routines for handling two sprites. Like Sprite3.s and Sprite4.s there are four +; variants -- one to handle over / under sprite orders and one each for whether the mask needs +; to be used or not. +TwoSpriteLine mac + db $37,sprite_ptr1 ; and [sprite_ptr1],y + ora (sprite_ptr1),y + db $37,sprite_ptr0 ; and [sprite_ptr0],y + ora (sprite_ptr0),y + <<< + +TwoSpriteData mac + lda (sprite_ptr1),y + db $37,sprite_ptr0 ; and [sprite_ptr0],y + ora (sprite_ptr0),y + <<< + +TwoSpriteMask mac + db $B7,sprite_ptr1 ; lda [sprite_ptr1],y + db $37,sprite_ptr0 ; and [sprite_ptr0],y + <<< + +TwoSpritesOver + tyx ; save after compositing the sprites + phb ; save the current bank + jsr CopyTileToDPSprite ; copy necessary tile data to the direct page + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda tmp_tile_data+{]line*4} + TwoSpriteLine + sta tmp_tile_data+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda tmp_tile_data+{]line*4}+2 + TwoSpriteLine + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + + plb + jmp (K_TS_APPLY_TILE_DATA,x) + + +TwoSpritesUnderFast + tyx ; save after compositing the sprites + phb ; save the current bank + jsr CopyTwoSpritesDataToDP ; copy necessary sprite data to the direct page + jmp MergeSpriteWithTileFast + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda tmp_tile_data+{]line*4} + TwoSpriteLine + sta tmp_tile_data+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda tmp_tile_data+{]line*4}+2 + TwoSpriteLine + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + + plb + jmp (K_TS_APPLY_TILE_DATA,x) + +;--------------------------------- +; Helper functions for two Sprites +CopyTwoSpritesDataToDP +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + TwoSpriteData + sta tmp_sprite_data+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + TwoSpriteData + sta tmp_sprite_data+{]line*4}+2 +]line equ ]line+1 + --^ + rts +CopyFourSpritesDataAndMaskToDP +CopyThreeSpritesDataAndMaskToDP +CopyTwoSpritesDataAndMaskToDP +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + TwoSpriteData + sta tmp_sprite_data+{]line*4} + TwoSpriteMask + sta tmp_sprite_mask+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + TwoSpriteData + sta tmp_sprite_data+{]line*4}+2 + TwoSpriteMask + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ + jmp (K_TS_SPRITE_TILE_DISP,x) + diff --git a/src/render/Sprite3.s b/src/render/Sprite3.s new file mode 100644 index 0000000..30bc408 --- /dev/null +++ b/src/render/Sprite3.s @@ -0,0 +1,36 @@ + +ThreeSpriteLine mac + db $37,sprite_ptr2 ; and [sprite_ptr2],y + ora (sprite_ptr2),y + db $37,sprite_ptr1 ; and [sprite_ptr1],y + ora (sprite_ptr1),y + db $37,sprite_ptr0 ; and [sprite_ptr0],y + ora (sprite_ptr0),y + <<< + +; Three sprites wiithout extra masking +ThreeSpritesFast + tyx ; save for after compositing the sprites + + ldy TileStore+TS_TILE_ADDR,x + pei DP2_TILEDATA_AND_SPRITEDATA_BANKS + plb ; set to the tiledata bank + jsr (K_TS_COPY_TILE_DATA,x) + plb ; set to the sprite data bank + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda tmp_tile_data+{]line*4} + ThreeSpriteLine + sta tmp_tile_data+{]line*4} + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda tmp_tile_data+{]line*4}+2 + ThreeSpriteLine + sta tmp_tile_data+{]line*4}+2 +]line equ ]line+1 + --^ + + plb + jmp _CopyDP2ToCodeField diff --git a/src/render/Sprite4.s b/src/render/Sprite4.s new file mode 100644 index 0000000..e69de29 diff --git a/src/static/TileStore.s b/src/static/TileStore.s index f88cb96..7b13242 100644 --- a/src/static/TileStore.s +++ b/src/static/TileStore.s @@ -388,11 +388,13 @@ OldOneSecVec ENT ds 4 Timers ENT ds TIMER_REC_SIZE*MAX_TIMERS + +; From the IIgs ref DefaultPalette ENT - dw $0000,$007F,$0090,$0FF0 - dw $000F,$0080,$0f70,$0FFF - dw $0fa9,$0ff0,$00e0,$04DF - dw $0d00,$078f,$0ccc,$0FFF + dw $0000,$0777,$0841,$072C + dw $000F,$0080,$0F70,$0D00 + dw $0FA9,$0FF0,$00E0,$04DF + dw $0DAF,$078F,$0CCC,$0FFF ; 0. Full Screen : 40 x 25 320 x 200 (32,000 bytes (100.0%)) ; 1. Sword of Sodan : 34 x 24 272 x 192 (26,112 bytes ( 81.6%)) diff --git a/src/static/TileStoreDefs.s b/src/static/TileStoreDefs.s index c6a41c2..8a5d3d1 100644 --- a/src/static/TileStoreDefs.s +++ b/src/static/TileStoreDefs.s @@ -12,13 +12,14 @@ TS_TILE_ADDR equ {TILE_STORE_SIZE*3} ; cached value, the address TS_CODE_ADDR_LOW equ {TILE_STORE_SIZE*4} ; const value, address of this tile in the code fields TS_CODE_ADDR_HIGH equ {TILE_STORE_SIZE*5} TS_WORD_OFFSET equ {TILE_STORE_SIZE*6} ; const value, word offset value for this tile if LDA (dp),y instructions re used -TS_BASE_ADDR equ {TILE_STORE_SIZE*7} ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000. +;TS_BASE_ADDR equ {TILE_STORE_SIZE*7} ; const value, because there are two rows of tiles per bank, this is set to $0000 or $8000. +TS_JMP_ADDR equ {TILE_STORE_SIZE*7} ; const value, address of the 32-byte snippet space for this tile TS_SCREEN_ADDR equ {TILE_STORE_SIZE*8} ; cached value of on-screen location of tile. Used for DirtyRender. ; TODO: Move these arrays into the K bank to support direct dispatch via jmp (abs,x) -TS_BASE_TILE_COPY equ {TILE_STORE_SIZE*9} ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering +; TS_BASE_TILE_COPY equ {TILE_STORE_SIZE*9} ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering ; TS_BASE_TILE_DISP equ {TILE_STORE_SIZE*10} ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function -TS_DIRTY_TILE_DISP equ {TILE_STORE_SIZE*11} ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function +; TS_DIRTY_TILE_DISP equ {TILE_STORE_SIZE*11} ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function TILE_STORE_NUM equ 12 ; Need this many parallel arrays diff --git a/src/tiles/DirtyTileQueue.s b/src/tiles/DirtyTileQueue.s index 1dfbe31..459c00e 100644 --- a/src/tiles/DirtyTileQueue.s +++ b/src/tiles/DirtyTileQueue.s @@ -56,11 +56,9 @@ _PushDirtyTileY :occupied2 tya ; Make sure TileStore offset is returned in the accumulator rts + ; Remove a dirty tile from the list and return it in state ready to be rendered. It is important -; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update, -; because this routine merges the tile IDs stored in the Tile Store with the Sprite -; information to set the TILE_SPRITE_BIT. This is the *only* place in the entire code base that -; applies this bit to a tile descriptor. +; that the core rendering functions *only* use _PopDirtyTile to get a list of tiles to update. _PopDirtyTile ldy DirtyTileCount bne _PopDirtyTile2 @@ -93,7 +91,6 @@ pdtf_not_empty cpx #16 ; If there are >= 8 elements, then bcs full_chunk ; do a full chunk -; stz DP2_DIRTY_TILE_COUNT ; Otherwise, this pass will handle them all jmp (at_table,x) at_table da at_exit,at_one,at_two,at_three da at_four,at_five,at_six,at_seven @@ -109,76 +106,76 @@ full_chunk txa ldx DirtyTiles+14,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+12,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+10,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+8,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+6,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+4,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+2,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile ldy DP2_DIRTY_TILE_COUNT ldx DirtyTiles+0,y stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile jmp _PopDirtyTilesFast ; These routines just handle between 1 and 7 dirty tiles at_seven ldx DirtyTiles+12 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_six ldx DirtyTiles+10 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_five ldx DirtyTiles+8 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_four ldx DirtyTiles+6 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_three ldx DirtyTiles+4 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_two ldx DirtyTiles+2 stz TileStore+TS_DIRTY,x - jsr _RenderTileFast + jsr _RenderTile at_one ldx DirtyTiles+0 stz TileStore+TS_DIRTY,x - jmp _RenderTileFast + jmp _RenderTile diff --git a/src/tiles/FastRenderer.s b/src/tiles/FastRenderer.s index aae3ffe..e69de29 100644 --- a/src/tiles/FastRenderer.s +++ b/src/tiles/FastRenderer.s @@ -1,314 +0,0 @@ -; If the engine mode has the second background layer disabled, we take advantage of that to -; be more efficient in our rendering. Basically, without the second layer, there is no need -; to use the tile mask information. -; -; If there are no sprites, then we copy the tile data into the code field as fast as possible. -; If there are sprites, then the sprite data is flattened and stored into a direct page buffer -; and then copied into the code field -_RenderTileFast - lda TileStore+TS_SPRITE_FLAG,x ; any sprites on this line? - bne :sprites - -_OneSpriteFastUnder0 -_RenderNoSprite - lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line - pha ; and put on the stack for later. Has TileStore bank in high byte. - ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field - lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) - plb ; set the code field bank - jmp (K_TS_BASE_TILE_DISP,x) ; go to the tile copy routine -:sprites jmp (K_TS_SPRITE_TILE_DISP,x) ; go to the sprite+tile routine - -; Optimized routines to render sprites on top of the tile data and update the code field -; assuming that the opcode will never need to be reset, e.g. all of the instructions are -; PEA opcodes, so only the operands need to be set. -; -; Since the sprite is drawn on top of the tile, the first step is to copy the tile data -; into the direct page temporary space, then dispatch to the appropriate sprite rendering -; subroutine -FastSpriteOver - txy - SpriteBitsToVBuffAddrs OneSpriteFast;TwoSpritesFast;ThreeSpritesFast;FourSpritesFast - -; Optimized routines for drawing sprites underneath the tile. In this case, the sprite is drawn first, -; so we have to calculate the sprite dispatch subrotine to copy the sprite data into the direct -; page space and then merge it with the tile data at the end. -FastSpriteUnder - txy - SpriteBitsToVBuffAddrs OneSpriteFastUnder;OneSpriteFastUnder;OneSpriteFastUnder;OneSpriteFastUnder - -; This handles sprites with the tile above -OneSpriteFastUnder - tyx - jmp (K_TS_ONE_SPRITE,x) - -; General copy -_OneSpriteFastUnder - tax - jsr _CopySpriteDataToDP2 ; preserves Y - - ldx TileStore+TS_TILE_ADDR,y - lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line - pha ; and put on the stack for later. Has TileStore bank in high byte. - lda TileStore+TS_CODE_ADDR_LOW,y ; load the address of the code field - tay - plb - -]line equ 0 - lup 8 - lda tmp_tile_data+{]line*4} - andl tiledata+{]line*4}+32,x - oral tiledata+{]line*4},x - sta: $0004+{]line*$1000},y - - lda tmp_tile_data+{]line*4}+2 - andl tiledata+{]line*4}+32+2,x - oral tiledata+{]line*4}+2,x - sta: $0001+{]line*$1000},y -]line equ ]line+1 - --^ - plb - rts - - -_CopySpriteDataToDP2 -]line equ 0 - lup 8 - ldal spritedata+{]line*SPRITE_PLANE_SPAN},x - sta tmp_tile_data+{]line*4} - - ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x - sta tmp_tile_data+{]line*4}+2 -]line equ ]line+1 - --^ - rts - -; Where there are sprites involved, the first step is to call a routine to copy the -; tile data into a temporary buffer. Then the sprite data is merged and placed into -; the code field. -; -; A = vbuff address -; Y = tile store address -OneSpriteFast - tyx - jmp (K_TS_ONE_SPRITE,x) - -; Specialize when the tile is Tile 0 -_OneSpriteFastOver0 - ldy TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line - phy ; and put on the stack for later. Has TileStore bank in high byte. - ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field - tax - plb - -]line equ 0 - lup 8 - ldal spritedata+{]line*SPRITE_PLANE_SPAN},x - sta: $0004+{]line*$1000},y - ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x - sta: $0001+{]line*$1000},y -]line equ ]line+1 - --^ - plb - rts - -; General copy -_OneSpriteFastOver - sta sprite_ptr0 - ldy TileStore+TS_TILE_ADDR,x ; load the tile address - jsr (K_TS_COPY_TILE_DATA,x) ; This routine *must* preserve X register - - lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line - pha ; and put on the stack for later. Has TileStore bank in high byte. - ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field - ldx sprite_ptr0 - plb - -]line equ 0 - lup 8 - lda tmp_tile_data+{]line*4} - andl spritemask+{]line*SPRITE_PLANE_SPAN},x - oral spritedata+{]line*SPRITE_PLANE_SPAN},x - sta: $0004+{]line*$1000},y - - lda tmp_tile_data+{]line*4}+2 - andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x - oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x - sta: $0001+{]line*$1000},y -]line equ ]line+1 - --^ - plb - rts - -TwoSpriteLine mac -; and [sprite_ptr1],y - db $37,sprite_ptr1 - ora (sprite_ptr1),y -; and [sprite_ptr0],y - db $37,sprite_ptr0 - ora (sprite_ptr0),y - <<< - -TwoSpritesFast - tyx ; save for after compositing the sprites - - ldy TileStore+TS_TILE_ADDR,x - pei DP2_TILEDATA_AND_TILESTORE_BANKS - plb - jsr (K_TS_COPY_TILE_DATA,x) - plb - - pei DP2_SPRITEDATA_AND_TILESTORE_BANKS - plb ; set the sprite data bank - -]line equ 0 - lup 8 - ldy #{]line*SPRITE_PLANE_SPAN} - lda tmp_tile_data+{]line*4} - TwoSpriteLine - sta tmp_tile_data+{]line*4} - - ldy #{]line*SPRITE_PLANE_SPAN}+2 - lda tmp_tile_data+{]line*4}+2 - TwoSpriteLine - sta tmp_tile_data+{]line*4}+2 -]line equ ]line+1 - --^ - - plb ; restore access to data bank - -; Fall through -_CopyDP2ToCodeField - lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line - pha ; and put on the stack for later. Has TileStore bank in high byte. - ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field - plb ; Set the CODE_ADDR_HIGH bank - -]line equ 0 - lup 8 - lda tmp_tile_data+{]line*4} - sta: $0004+{]line*$1000},y - lda tmp_tile_data+{]line*4}+2 - sta: $0001+{]line*$1000},y -]line equ ]line+1 - --^ - - plb ; Reset to the bank in the top byte of CODE_ADDR_HIGH - rts - -ThreeSpriteLine mac -; and [sprite_ptr2],y - db $37,sprite_ptr2 - ora (sprite_ptr2),y -; and [sprite_ptr1],y - db $37,sprite_ptr1 - ora (sprite_ptr1),y -; and [sprite_ptr0],y - db $37,sprite_ptr0 - ora (sprite_ptr0),y - <<< - -ThreeSpritesFast - tyx ; save for after compositing the sprites - - ldy TileStore+TS_TILE_ADDR,x - pei DP2_TILEDATA_AND_TILESTORE_BANKS - plb - jsr (K_TS_COPY_TILE_DATA,x) - plb - - pei DP2_SPRITEDATA_AND_TILESTORE_BANKS - plb ; set the sprite data bank - -]line equ 0 - lup 8 - ldy #{]line*SPRITE_PLANE_SPAN} - lda tmp_tile_data+{]line*4} - ThreeSpriteLine - sta tmp_tile_data+{]line*4} - - ldy #{]line*SPRITE_PLANE_SPAN}+2 - lda tmp_tile_data+{]line*4}+2 - ThreeSpriteLine - sta tmp_tile_data+{]line*4}+2 -]line equ ]line+1 - --^ - - plb - jmp _CopyDP2ToCodeField - -FourSpriteLine mac -; and [sprite_ptr3],y - db $37,sprite_ptr3 - ora (sprite_ptr3),y -; and [sprite_ptr2],y - db $37,sprite_ptr2 - ora (sprite_ptr2),y -; and [sprite_ptr1],y - db $37,sprite_ptr1 - ora (sprite_ptr1),y -; and [sprite_ptr0],y - db $37,sprite_ptr0 - ora (sprite_ptr0),y - <<< - -FourSpritesFast - tyx ; save for after compositing the sprites - - ldy TileStore+TS_TILE_ADDR,x - pei DP2_TILEDATA_AND_TILESTORE_BANKS - plb - jsr (K_TS_COPY_TILE_DATA,x) - plb - - pei DP2_SPRITEDATA_AND_TILESTORE_BANKS - plb ; set the sprite data bank - -]line equ 0 - lup 8 - ldy #{]line*SPRITE_PLANE_SPAN} - lda tmp_tile_data+{]line*4} - FourSpriteLine - sta tmp_tile_data+{]line*4} - - ldy #{]line*SPRITE_PLANE_SPAN}+2 - lda tmp_tile_data+{]line*4}+2 - FourSpriteLine - sta tmp_tile_data+{]line*4}+2 -]line equ ]line+1 - --^ - - plb - jmp _CopyDP2ToCodeField - -_CopyTileDataToDP2 - pei DP2_TILEDATA_AND_TILESTORE_BANKS ; Setting the bank saves 16 cycles and costs 14, so it's a bit faster, - plb ; but we really do it to preserve the X register -]line equ 0 - lup 8 - lda tiledata+{]line*4},y - sta tmp_tile_data+{]line*4} - - lda tiledata+{]line*4}+2,y - sta tmp_tile_data+{]line*4}+2 -]line equ ]line+1 - --^ - plb - rts - -_CopyTileDataToDP2V - pei DP2_TILEDATA_AND_TILESTORE_BANKS ; Setting the bank saves 16 cycles and costs 14, so it's a bit faster, - plb ; but we really do it to preserve the X register -]src equ 7 -]dest equ 0 - lup 8 - lda tiledata+{]src*4},y - sta tmp_tile_data+{]dest*4} - - lda tiledata+{]src*4}+2,y - sta tmp_tile_data+{]dest*4}+2 -]src equ ]src-1 -]dest equ ]dest+1 - --^ - plb - rts \ No newline at end of file