diff --git a/src/Defs.s b/src/Defs.s index 8f05c28..4eb1430 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -190,7 +190,7 @@ PAD_KEY_DOWN equ $04 ; TILE_RESERVED_BIT equ $8000 TILE_PRIORITY_BIT equ $4000 ; Put tile on top of sprite TILE_FRINGE_BIT equ $2000 ; Unused -TILE_MASK_BIT equ $1000 ; Hint bit used in TWO_LAYER_MODE to optimize rendering +TILE_SOLID_BIT equ $1000 ; Hint bit used in TWO_LAYER_MODE to optimize rendering TILE_DYN_BIT equ $0800 ; Is this a Dynamic Tile? TILE_VFLIP_BIT equ $0400 TILE_HFLIP_BIT equ $0200 diff --git a/src/Tiles.s b/src/Tiles.s index 565e1df..e27397c 100644 --- a/src/Tiles.s +++ b/src/Tiles.s @@ -197,7 +197,10 @@ _SetTile jsr _GetTileAddr sta TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later -; Set the standard renderer procs for this tile. +; Set the renderer procs for this tile. +; +; NOTE: Later on, optimize this to just take the Tile ID & TILE_CTRL_MASK and lookup the right proc +; table address from a lookup table.... ; ; 1. The dirty render proc is always set the same. ; 2. If BG1 and DYN_TILES are disabled, then the TS_BASE_TILE_DISP is selected from the Fast Renderers, otherwise @@ -243,23 +246,10 @@ _SetTile brl :setTileDyn :not_dyn - lda TileStore+TS_TILE_ID,x - and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value - xba - tay -; ldal DirtyTileProcs,x -; sta TileStore+TS_DIRTY_TILE_DISP,y - -; ldal CopyTileProcs,x -; sta TileStore+TS_DIRTY_TILE_COPY,y - - lda TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address - and #TILE_CTRL_MASK - xba - tay -; ldal TileProcs,y -; sta TileStore+TS_BASE_TILE_DISP,y - jmp _PushDirtyTileX ; on the next call to _ApplyTiles + ldy #SlowProcs ; safe for now.... + lda procIdx + jsr _SetTileProcs + jmp _PushDirtyTileX ; Specialized check for when the engine is in "Fast" mode. If is a simple decision tree based on whether ; the tile priority bit is set, and whether this is the special tile 0 or not. @@ -270,12 +260,22 @@ _SetTile jmp _PushDirtyTileX ; Specialized check for when the engine has enabled dynamic tiles. In this case we are no longer -; guaranteed that the opcodes in a tile are PEA instructions. If the old tile and the new tile -; are both Dynamic tiles or both Basic tiles, then we can use an optimized routine. Otherwise -; we must set the opcodes as well as the operands +; guaranteed that the opcodes in a tile are PEA instructions. :setTileDyn + lda #TILE_DYN_BIT + bit newTileId + beq :pickSlowProc ; If the Dynamic bit is not set, select a tile proc that sets opcodes -; ldy #DynProcs + lda newTileId ; Otherwise chose one of the two dynamic tuples + and #TILE_PRIORITY_BIT + beq :pickDynProc ; If the Priority bit is not set, pick the first entry + lda #1 ; If the Priority bit is set, pick the other one + +:pickDynProc ldy #DynProcs + jsr _SetTileProcs + jmp _PushDirtyTileX + +:pickSlowProc ldy #SlowProcs lda procIdx jsr _SetTileProcs jmp _PushDirtyTileX @@ -375,14 +375,20 @@ SlowUnderNV dw CopyTileVSlow,SpriteUnderVSlow,OneSpriteSlowUnderV ; that does not need to worry about a second background. Because dynamic ; tiles don't support horizontal or vertical flipping, there are only two ; sets of procedures: one for Over and one for Under. -;DynOver dw _TBDynamicTile,DynamicOver,_OneSpriteDynamicOver -;DynUnder dw _TBDynamicTile,DynamicUnder,_OneSpriteDynamicUnder +DynProcs +DynOver dw CopyDynamicTile,DynamicOver,OneSpriteDynamicOver +DynUnder dw CopyDynamicTile,DynamicUnder,OneSpriteDynamicUnder ; "Two Layer" procs. These are the most complex procs. Generally, ; all of these methods are implemented by building up the data ; and mask into the direct page space and then calling a common ; function to create the complex code fragments in the code field. ; There is not a lot of opportuinity to optimize these routines. +; +; To improve the performance when two-layer rendering is enabled, +; the TILE_SOLID_BIT hint bit can be set to indicate that a tile +; has no transparency. This allows one of the faster routines +; to be selected. diff --git a/src/Tool.s b/src/Tool.s index b92dc7c..9d4da09 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -366,6 +366,7 @@ _TSGetSeconds put render/Render.s put render/Fast.s put render/Slow.s + put render/Dynamic.s put render/Sprite1.s put render/Sprite2.s put tiles/DirtyTileQueue.s diff --git a/src/blitter/Tiles10001.s b/src/blitter/Tiles10001.s index dd63ee4..54adc98 100644 --- a/src/blitter/Tiles10001.s +++ b/src/blitter/Tiles10001.s @@ -29,7 +29,7 @@ _TBDynamicSpriteTile sta _JTBL_CACHE ; within one tile, the second column is consecutive lda _OP_CACHE - adc #$0200 + adc #$0200 ; Advance to the next word sta _OP_CACHE CopyDynWord 2;$0000 diff --git a/src/render/Dynamic.s b/src/render/Dynamic.s new file mode 100644 index 0000000..6cebb99 --- /dev/null +++ b/src/render/Dynamic.s @@ -0,0 +1,249 @@ +; Rendering functions for Dynamic tiles. There are no Fast/Slow variants here +CopyDynamicTile + ldal TileStore+TS_TILE_ID,x + and #$007F + ora #$4800 + +]line equ 0 ; render the first column + lup 8 + sta: $0004+{]line*$1000},y +]line equ ]line+1 + --^ + + inc ; advance to the next word + inc + +]line equ 0 ; render the second column + lup 8 + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + + sep #$20 + lda #$B5 + sta: $0000,y + sta: $0003,y + sta $1000,y + sta $1003,y + sta $2000,y + sta $2003,y + sta $3000,y + sta $3003,y + sta $4000,y + sta $4003,y + sta $5000,y + sta $5003,y + sta $6000,y + sta $6003,y + sta $7000,y + sta $7003,y + rep #$20 + plb + rts + +; These routines handle the sprites. They rely on a fairly complicated macro that takes care of +; populating the code field and snippet space +DynamicOver + lda TileStore+TS_JMP_ADDR,x ; Get the address of the exception handler + sta _JTBL_CACHE + + lda TileStore+TS_TILE_ID,x ; Get the original tile descriptor + and #$007F ; clamp to < (32 * 4) + ora #$B500 + xba + sta _OP_CACHE ; This is the 2-byte opcode for to load the data + + lda TileStore+TS_CODE_ADDR_HIGH,x + pha + ldy TileStore+TS_CODE_ADDR_LOW,x + plb + + CopyDynOver 0;$0003 + CopyDynOver 4;$1003 + CopyDynOver 8;$2003 + CopyDynOver 12;$3003 + CopyDynOver 16;$4003 + CopyDynOver 20;$5003 + CopyDynOver 24;$6003 + CopyDynOver 28;$7003 + + sec + lda _JTBL_CACHE + sbc #32 ; All the snippets are 32 bytes wide and, since we're + sta _JTBL_CACHE ; within one tile, the second column is consecutive + + clc + lda _OP_CACHE + adc #$0200 ; Advance to the next word + sta _OP_CACHE + + CopyDynOver 2;$0000 + CopyDynOver 6;$1000 + CopyDynOver 10;$2000 + CopyDynOver 14;$3000 + CopyDynOver 18;$4000 + CopyDynOver 22;$5000 + CopyDynOver 26;$6000 + CopyDynOver 30;$7000 + + plb + rts + +DynamicUnder + lda TileStore+TS_JMP_ADDR,x ; Get the address of the exception handler + sta _JTBL_CACHE + + lda TileStore+TS_TILE_ID,x ; Get the original tile descriptor + and #$007F ; clamp to < (32 * 4) + ora #$B500 + xba + sta _OP_CACHE ; This is the 2-byte opcode for to load the data + + lda TileStore+TS_CODE_ADDR_HIGH,x + pha + ldy TileStore+TS_CODE_ADDR_LOW,x + plb + + CopyDynUnder 0;$0003 + CopyDynUnder 4;$1003 + CopyDynUnder 8;$2003 + CopyDynUnder 12;$3003 + CopyDynUnder 16;$4003 + CopyDynUnder 20;$5003 + CopyDynUnder 24;$6003 + CopyDynUnder 28;$7003 + + sec + lda _JTBL_CACHE + sbc #32 ; All the snippets are 32 bytes wide and, since we're + sta _JTBL_CACHE ; within one tile, the second column is consecutive + + clc + lda _OP_CACHE + adc #$0200 ; Advance to the next word + sta _OP_CACHE + + CopyDynUnder 2;$0000 + CopyDynUnder 6;$1000 + CopyDynUnder 10;$2000 + CopyDynUnder 14;$3000 + CopyDynUnder 18;$4000 + CopyDynUnder 22;$5000 + CopyDynUnder 26;$6000 + CopyDynUnder 30;$7000 + +; Now fill in the JMP opcodes + sep #$20 + lda #$4C + sta: $0000,y + sta: $0003,y + sta $1000,y + sta $1003,y + sta $2000,y + sta $2003,y + sta $3000,y + sta $3003,y + sta $4000,y + sta $4003,y + sta $5000,y + sta $5003,y + sta $6000,y + sta $6003,y + sta $7000,y + sta $7003,y + rep #$20 + + plb + rts + +; Create a masked render based on data in the direct page temporary buffer. +; +; If the MASK is $0000, then insert a PEA +; If the MASK is $FFFF, then insert a LDA DP,x / PHA +; If mixed, create a snippet of LDA DP,x / AND #MASK / ORA #DATA / PHA +; +; ]1 : sprite buffer offset +; ]2 : code field offset +CopyDynOver mac + lda tmp_sprite_mask+{]1} ; load the mask value + bne mixed ; a non-zero value may be mixed + +; This is a solid word + lda #$00F4 ; PEA instruction + sta: ]2,y + lda tmp_sprite_data+{]1} ; load the sprite data + sta: ]2+1,y ; PEA operand + bra next + +mixed cmp #$FFFF ; All 1's in the mask is a fully transparent sprite word + beq transparent + + lda #$004C ; JMP to handler + sta: {]2},y + lda _JTBL_CACHE ; Get the offset to the exception handler for this column + ora #{]2&$F000} ; adjust for the current row offset + sta: {]2}+1,y + tax ; This becomes the new address that we use to patch in + + lda _OP_CACHE ; Get the LDA dp,x instruction for this column + sta: $0000,x + + lda #$0029 ; AND #SPRITE_MASK + sta: $0002,x + lda tmp_sprite_mask+{]1} + sta: $0003,x + + lda #$0009 ; ORA #SPRITE_DATA + sta: $0005,x + lda tmp_sprite_data+{]1} + sta: $0006,x + + lda #$0D80 ; branch to the prologue (BRA *+15) + sta: $0008,x + bra next + +; This is a transparent word, so just show the dynamic data +transparent + lda #$4800 ; Put the PHA in the third byte + sta: {]2}+1,y + lda _OP_CACHE ; Store the LDA dp,x instruction with operand + sta: {]2},y +next + <<< + +; Masked renderer for a dynamic tile on top of the sprite data. There are no transparent vs +; solid vs mixed considerations here. This only sets the JMP address, setting the JMP opcodes +; must happen elsewhere +; +; ]1 : sprite plane offset +; ]2 : code field offset +CopyDynUnder MAC + +; Need to fill in the first 9 bytes of the JMP handler with the following code sequence where +; the data and mask from from the sprite plane +; +; lda #DATA +; and $80,x +; ora $00,x +; bra *+16 + + lda _JTBL_CACHE ; Get the offset to the exception handler for this column + ora #{]2&$F000} ; adjust for the current row offset + sta: ]2+1,y + tay ; This becomes the new address that we use to patch in + + lda #$00A9 ; LDA #DATA + sta: $0000,y + ldal tmp_sprite_data+{]1},x + sta: $0001,y + + lda _OP_CACHE + sta: $0003,y ; AND $80,x + eor #$8020 ; Switch the opcode to an ORA and remove the high bit of the operand + sta: $0005,y ; ORA $00,x + + lda #$0E80 ; branch to the prologue (BRA *+16) + sta: $0007,y + + ldy _Y_REG ; restore original y-register value and move on + eom \ No newline at end of file diff --git a/src/render/Sprite1.s b/src/render/Sprite1.s index 57163c0..1143e9f 100644 --- a/src/render/Sprite1.s +++ b/src/render/Sprite1.s @@ -151,3 +151,36 @@ OneSpriteSlowUnderV plb jsr FillPEAOpcode jmp _OneSpriteFastUnderV + +;------------------------------- +; Dynamic tiles with one sprite. + +OneSpriteDynamicUnder + ldx sprite_ptr0 +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 +]line equ ]line+1 + --^ + jmp DynamicUnder + +OneSpriteDynamicOver + ldx sprite_ptr0 +]line equ 0 + lup 8 + ldal spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 + + ldal spritemask+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_mask+{]line*4} + ldal spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_mask+{]line*4}+2 +]line equ ]line+1 + --^ + jmp DynamicOver +