Add dynamic rendering functions

2025-08-06 03:26:33 +00:00 · 2022-06-21 15:28:58 -05:00
parent a91f39aab8
commit 76a9710114
6 changed files with 315 additions and 26 deletions
--- a/src/Defs.s
+++ b/src/Defs.s
@@ -190,7 +190,7 @@ PAD_KEY_DOWN           equ   $04
 ; TILE_RESERVED_BIT      equ   $8000
 TILE_PRIORITY_BIT      equ   $4000                  ; Put tile on top of sprite
 TILE_FRINGE_BIT        equ   $2000                  ; Unused
-TILE_MASK_BIT          equ   $1000                  ; Hint bit used in TWO_LAYER_MODE to optimize rendering
+TILE_SOLID_BIT         equ   $1000                  ; Hint bit used in TWO_LAYER_MODE to optimize rendering
 TILE_DYN_BIT           equ   $0800                  ; Is this a Dynamic Tile?
 TILE_VFLIP_BIT         equ   $0400
 TILE_HFLIP_BIT         equ   $0200
--- a/src/Tiles.s
+++ b/src/Tiles.s
@@ -197,7 +197,10 @@ _SetTile
                 jsr  _GetTileAddr
                 sta  TileStore+TS_TILE_ADDR,x      ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later

-; Set the standard renderer procs for this tile.
+; Set the renderer procs for this tile.
+;
+; NOTE: Later on, optimize this to just take the Tile ID & TILE_CTRL_MASK and lookup the right proc
+;       table address from a lookup table....
 ;
 ;  1. The dirty render proc is always set the same.
 ;  2. If BG1 and DYN_TILES are disabled, then the TS_BASE_TILE_DISP is selected from the Fast Renderers, otherwise
@@ -243,23 +246,10 @@ _SetTile
                 brl  :setTileDyn

 :not_dyn
-                 lda  TileStore+TS_TILE_ID,x
-                 and  #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
-                 xba
-                 tay
-;                 ldal DirtyTileProcs,x
-;                 sta  TileStore+TS_DIRTY_TILE_DISP,y
-
-;                 ldal CopyTileProcs,x
-;                 sta  TileStore+TS_DIRTY_TILE_COPY,y
-
-                 lda  TileStore+TS_TILE_ID,x        ; Get the non-sprite dispatch address
-                 and  #TILE_CTRL_MASK
-                 xba
-                 tay
-;                 ldal TileProcs,y
-;                 sta  TileStore+TS_BASE_TILE_DISP,y
-                 jmp  _PushDirtyTileX               ; on the next call to _ApplyTiles
+                 ldy  #SlowProcs            ; safe for now....
+                 lda  procIdx
+                 jsr  _SetTileProcs
+                 jmp  _PushDirtyTileX

 ; Specialized check for when the engine is in "Fast" mode. If is a simple decision tree based on whether
 ; the tile priority bit is set, and whether this is the special tile 0 or not.
@@ -270,12 +260,22 @@ _SetTile
                 jmp  _PushDirtyTileX

 ; Specialized check for when the engine has enabled dynamic tiles. In this case we are no longer
-; guaranteed that the opcodes in a tile are PEA instructions.  If the old tile and the new tile
-; are both Dynamic tiles or both Basic tiles, then  we can use an optimized routine.  Otherwise
-; we must set the opcodes as well as the operands
+; guaranteed that the opcodes in a tile are PEA instructions.  
 :setTileDyn
+                 lda  #TILE_DYN_BIT
+                 bit  newTileId
+                 beq  :pickSlowProc            ; If the Dynamic bit is not set, select a tile proc that sets opcodes

-;                 ldy  #DynProcs
+                 lda  newTileId                ; Otherwise chose one of the two dynamic tuples
+                 and  #TILE_PRIORITY_BIT
+                 beq  :pickDynProc             ; If the Priority bit is not set, pick the first entry
+                 lda  #1                       ; If the Priority bit is set, pick the other one
+
+:pickDynProc     ldy  #DynProcs
+                 jsr  _SetTileProcs
+                 jmp  _PushDirtyTileX
+
+:pickSlowProc    ldy  #SlowProcs
                 lda  procIdx
                 jsr  _SetTileProcs
                 jmp  _PushDirtyTileX
@@ -375,14 +375,20 @@ SlowUnderNV  dw   CopyTileVSlow,SpriteUnderVSlow,OneSpriteSlowUnderV
 ; that does not need to worry about a second background.  Because dynamic
 ; tiles don't support horizontal or vertical flipping, there are only two 
 ; sets of procedures: one for Over and one for Under.
-;DynOver      dw   _TBDynamicTile,DynamicOver,_OneSpriteDynamicOver
-;DynUnder     dw   _TBDynamicTile,DynamicUnder,_OneSpriteDynamicUnder
+DynProcs
+DynOver      dw   CopyDynamicTile,DynamicOver,OneSpriteDynamicOver
+DynUnder     dw   CopyDynamicTile,DynamicUnder,OneSpriteDynamicUnder

 ; "Two Layer" procs. These are the most complex procs.  Generally,
 ; all of these methods are implemented by building up the data
 ; and mask into the direct page space and then calling a common
 ; function to create the complex code fragments in the code field.
 ; There is not a lot of opportuinity to optimize these routines.
+;
+; To improve the performance when two-layer rendering is enabled,
+; the TILE_SOLID_BIT hint bit can be set to indicate that a tile
+; has no transparency.  This allows one of the faster routines
+; to be selected.



--- a/src/Tool.s
+++ b/src/Tool.s
@@ -366,6 +366,7 @@ _TSGetSeconds
                put     render/Render.s
                put     render/Fast.s
                put     render/Slow.s
+                put     render/Dynamic.s
                put     render/Sprite1.s
                put     render/Sprite2.s
                put     tiles/DirtyTileQueue.s
--- a/src/blitter/Tiles10001.s
+++ b/src/blitter/Tiles10001.s
@@ -29,7 +29,7 @@ _TBDynamicSpriteTile
                 sta     _JTBL_CACHE                  ; within one tile, the second column is consecutive

                 lda     _OP_CACHE
-                 adc     #$0200
+                 adc     #$0200                       ; Advance to the next word
                 sta     _OP_CACHE

                 CopyDynWord  2;$0000
--- a/src/render/Dynamic.s
+++ b/src/render/Dynamic.s
@@ -0,0 +1,249 @@
+; Rendering functions for Dynamic tiles.  There are no Fast/Slow variants here
+CopyDynamicTile
+            ldal   TileStore+TS_TILE_ID,x
+            and    #$007F
+            ora    #$4800
+
+]line       equ    0                                 ; render the first column
+            lup    8
+            sta:   $0004+{]line*$1000},y
+]line       equ    ]line+1
+            --^
+
+            inc                                      ; advance to the next word
+            inc
+
+]line       equ    0                                 ; render the second column
+            lup    8
+            sta:   $0001+{]line*$1000},y
+]line       equ    ]line+1
+            --^
+
+            sep    #$20
+            lda    #$B5
+            sta:   $0000,y
+            sta:   $0003,y
+            sta    $1000,y
+            sta    $1003,y
+            sta    $2000,y
+            sta    $2003,y
+            sta    $3000,y
+            sta    $3003,y
+            sta    $4000,y
+            sta    $4003,y
+            sta    $5000,y
+            sta    $5003,y
+            sta    $6000,y
+            sta    $6003,y
+            sta    $7000,y
+            sta    $7003,y
+            rep    #$20
+            plb
+            rts
+
+; These routines handle the sprites.  They rely on a fairly complicated macro that takes care of
+; populating the code field and snippet space
+DynamicOver
+            lda     TileStore+TS_JMP_ADDR,x      ; Get the address of the exception handler
+            sta     _JTBL_CACHE
+
+            lda     TileStore+TS_TILE_ID,x       ; Get the original tile descriptor
+            and     #$007F                       ; clamp to < (32 * 4)
+            ora     #$B500
+            xba
+            sta     _OP_CACHE                    ; This is the 2-byte opcode for to load the data
+
+            lda   TileStore+TS_CODE_ADDR_HIGH,x
+            pha
+            ldy   TileStore+TS_CODE_ADDR_LOW,x
+            plb
+
+            CopyDynOver  0;$0003
+            CopyDynOver  4;$1003
+            CopyDynOver  8;$2003
+            CopyDynOver  12;$3003
+            CopyDynOver  16;$4003
+            CopyDynOver  20;$5003
+            CopyDynOver  24;$6003
+            CopyDynOver  28;$7003
+
+            sec
+            lda     _JTBL_CACHE
+            sbc     #32                          ; All the snippets are 32 bytes wide and, since we're
+            sta     _JTBL_CACHE                  ; within one tile, the second column is consecutive
+
+            clc
+            lda     _OP_CACHE
+            adc     #$0200                       ; Advance to the next word
+            sta     _OP_CACHE
+
+            CopyDynOver  2;$0000
+            CopyDynOver  6;$1000
+            CopyDynOver  10;$2000
+            CopyDynOver  14;$3000
+            CopyDynOver  18;$4000
+            CopyDynOver  22;$5000
+            CopyDynOver  26;$6000
+            CopyDynOver  30;$7000
+
+            plb
+            rts
+
+DynamicUnder
+            lda     TileStore+TS_JMP_ADDR,x      ; Get the address of the exception handler
+            sta     _JTBL_CACHE
+
+            lda     TileStore+TS_TILE_ID,x       ; Get the original tile descriptor
+            and     #$007F                       ; clamp to < (32 * 4)
+            ora     #$B500
+            xba
+            sta     _OP_CACHE                    ; This is the 2-byte opcode for to load the data
+
+            lda   TileStore+TS_CODE_ADDR_HIGH,x
+            pha
+            ldy   TileStore+TS_CODE_ADDR_LOW,x
+            plb
+
+            CopyDynUnder  0;$0003
+            CopyDynUnder  4;$1003
+            CopyDynUnder  8;$2003
+            CopyDynUnder  12;$3003
+            CopyDynUnder  16;$4003
+            CopyDynUnder  20;$5003
+            CopyDynUnder  24;$6003
+            CopyDynUnder  28;$7003
+
+            sec
+            lda     _JTBL_CACHE
+            sbc     #32                          ; All the snippets are 32 bytes wide and, since we're
+            sta     _JTBL_CACHE                  ; within one tile, the second column is consecutive
+
+            clc
+            lda     _OP_CACHE
+            adc     #$0200                       ; Advance to the next word
+            sta     _OP_CACHE
+
+            CopyDynUnder  2;$0000
+            CopyDynUnder  6;$1000
+            CopyDynUnder  10;$2000
+            CopyDynUnder  14;$3000
+            CopyDynUnder  18;$4000
+            CopyDynUnder  22;$5000
+            CopyDynUnder  26;$6000
+            CopyDynUnder  30;$7000
+
+; Now fill in the JMP opcodes
+            sep             #$20
+            lda             #$4C
+            sta:            $0000,y
+            sta:            $0003,y
+            sta             $1000,y
+            sta             $1003,y
+            sta             $2000,y
+            sta             $2003,y
+            sta             $3000,y
+            sta             $3003,y
+            sta             $4000,y
+            sta             $4003,y
+            sta             $5000,y
+            sta             $5003,y
+            sta             $6000,y
+            sta             $6003,y
+            sta             $7000,y
+            sta             $7003,y
+            rep             #$20
+
+            plb
+            rts
+
+; Create a masked render based on data in the direct page temporary buffer.
+;
+; If the MASK is $0000, then insert a PEA
+; If the MASK is $FFFF, then insert a LDA DP,x / PHA
+; If mixed, create a snippet of LDA DP,x / AND #MASK / ORA #DATA / PHA
+; 
+; ]1 : sprite buffer offset
+; ]2 : code field offset
+CopyDynOver     mac
+                lda   tmp_sprite_mask+{]1}     ; load the mask value
+                bne   mixed                    ; a non-zero value may be mixed
+
+; This is a solid word
+                lda   #$00F4                   ; PEA instruction
+                sta:  ]2,y
+                lda   tmp_sprite_data+{]1}     ; load the sprite data
+                sta:  ]2+1,y                   ; PEA operand
+                bra   next
+
+mixed           cmp   #$FFFF                   ; All 1's in the mask is a fully transparent sprite word
+                beq   transparent
+
+                lda   #$004C                   ; JMP to handler
+                sta:  {]2},y
+                lda   _JTBL_CACHE              ; Get the offset to the exception handler for this column
+                ora   #{]2&$F000}              ; adjust for the current row offset
+                sta:  {]2}+1,y
+                tax                            ; This becomes the new address that we use to patch in
+
+                lda   _OP_CACHE       ; Get the LDA dp,x instruction for this column
+                sta:  $0000,x
+
+                lda   #$0029          ; AND #SPRITE_MASK
+                sta:  $0002,x
+                lda   tmp_sprite_mask+{]1}
+                sta:  $0003,x
+
+                lda   #$0009          ; ORA #SPRITE_DATA
+                sta:  $0005,x
+                lda   tmp_sprite_data+{]1}
+                sta:  $0006,x
+
+                lda   #$0D80          ; branch to the prologue (BRA *+15)
+                sta:  $0008,x
+                bra   next
+
+; This is a transparent word, so just show the dynamic data
+transparent
+                lda   #$4800          ; Put the PHA in the third byte
+                sta:  {]2}+1,y
+                lda   _OP_CACHE       ; Store the LDA dp,x instruction with operand
+                sta:  {]2},y
+next
+                <<<
+
+; Masked renderer for a dynamic tile on top of the sprite data.  There are no transparent vs
+; solid vs mixed considerations here.  This only sets the JMP address, setting the JMP opcodes
+; must happen elsewhere
+;
+; ]1 : sprite plane offset
+; ]2 : code field offset
+CopyDynUnder MAC
+
+; Need to fill in the first 9 bytes of the JMP handler with the following code sequence where
+; the data and mask from from the sprite plane
+;
+;            lda  #DATA
+;            and  $80,x
+;            ora  $00,x
+;            bra  *+16
+
+                lda   _JTBL_CACHE     ; Get the offset to the exception handler for this column
+                ora   #{]2&$F000}     ; adjust for the current row offset
+                sta:  ]2+1,y
+                tay                   ; This becomes the new address that we use to patch in
+
+                lda   #$00A9          ; LDA #DATA
+                sta:  $0000,y
+                ldal  tmp_sprite_data+{]1},x
+                sta:  $0001,y
+
+                lda   _OP_CACHE
+                sta:  $0003,y         ; AND $80,x
+                eor   #$8020          ; Switch the opcode to an ORA and remove the high bit of the operand
+                sta:  $0005,y         ; ORA $00,x
+
+                lda   #$0E80          ; branch to the prologue (BRA *+16)
+                sta:  $0007,y
+
+                ldy   _Y_REG          ; restore original y-register value and move on
+                eom
--- a/src/render/Sprite1.s
+++ b/src/render/Sprite1.s
@@ -151,3 +151,36 @@ OneSpriteSlowUnderV
            plb
            jsr   FillPEAOpcode
            jmp   _OneSpriteFastUnderV
+
+;-------------------------------
+; Dynamic tiles with one sprite.
+
+OneSpriteDynamicUnder
+            ldx   sprite_ptr0
+]line       equ   0
+            lup   8
+            ldal  spritedata+{]line*SPRITE_PLANE_SPAN},x
+            sta   tmp_sprite_data+{]line*4}
+            ldal  spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
+            sta   tmp_sprite_data+{]line*4}+2
+]line       equ   ]line+1
+            --^
+            jmp   DynamicUnder
+
+OneSpriteDynamicOver
+            ldx   sprite_ptr0
+]line       equ   0
+            lup   8
+            ldal  spritedata+{]line*SPRITE_PLANE_SPAN},x
+            sta   tmp_sprite_data+{]line*4}
+            ldal  spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
+            sta   tmp_sprite_data+{]line*4}+2
+
+            ldal  spritemask+{]line*SPRITE_PLANE_SPAN},x
+            sta   tmp_sprite_mask+{]line*4}
+            ldal  spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
+            sta   tmp_sprite_mask+{]line*4}+2
+]line       equ   ]line+1
+            --^
+            jmp   DynamicOver
+