diff --git a/src/Sprite.s b/src/Sprite.s index 2eda837..f18a4a3 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -38,7 +38,7 @@ InitSprites adc #4*2 ; skip ahead 4 tiles inx inx - cpx #8*2 + cpx #16*2 bcc :loop4 ; Precalculate some bank values @@ -153,7 +153,7 @@ _DoPhase1 jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done -; Need to calculate new VBUFF information. The could be reuqired for UPDATED, ADDED or MOVED +; Need to calculate new VBUFF information. The could be required for UPDATED, ADDED or MOVED ; sprites, so we do it unconditionally. :no_clear jsr _CalcDirtySprite @@ -648,6 +648,7 @@ _PrecalcAllSpriteInfo sbc _Sprites+SPRITE_CLIP_TOP,x inc sta _Sprites+SPRITE_CLIP_HEIGHT,x + rts :offscreen @@ -691,14 +692,19 @@ _UpdateSprite tax pla - cmp _Sprites+SPRITE_ID,x ; If the flags changed, need to redraw the sprite - bne :sprite_flag_change ; on the next frame +; Do some work to see if only the H or V bits have changed. If so, merge them into the +; SPRITE_ID + eor _Sprites+SPRITE_ID,x ; If either bit has changed, this will be non-zero + and #SPRITE_VFLIP+SPRITE_HFLIP + bne :sprite_flag_change + tya cmp _Sprites+VBUFF_ADDR,x ; Did the stamp change? bne :sprite_stamp_change rts ; Nothing changed, so just return :sprite_flag_change + eor _Sprites+SPRITE_ID,x ; put the new bits into the value. ---HV--- ^ SPRITE_ID & 00011000 ^ SPRITE_ID = SSSHVSSS sta _Sprites+SPRITE_ID,x ; Keep a copy of the full descriptor tya :sprite_stamp_change diff --git a/src/Sprite2.s b/src/Sprite2.s index 4776a6f..ab43684 100644 --- a/src/Sprite2.s +++ b/src/Sprite2.s @@ -61,15 +61,18 @@ _CalcDirtySprite sta _Sprites+OLD_TS_LOOKUP_INDEX,y ; Add the first visible row of the sprite to the Y-scroll offset to find the first line in the -; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406] +; code field that needs to be drawn. The range of values is 0 to 199+207 = [0, 406]. This +; value is dividede by 8, so the range of lookup values is [0, 50], so 51 possible values. clc lda _Sprites+SPRITE_CLIP_TOP,y adc StartYMod208 ; Adjust for the scroll offset - tax ; Cache + pha ; Cache and #$FFF8 ; mask first to ensure LSR will clear the carry lsr lsr + tax + lda TileStoreLookupYTable,x sta RowTop ; Even numbers from [0, 100] (50 elements) ; Get the position of the top edge within the tile and then add it to the sprite's height @@ -78,7 +81,7 @@ _CalcDirtySprite ; that are intersected, rather than assuming an 8x8 sprite always takes up that amount of ; space. - txa + pla and #$0007 tax ; cache again. This is a bit faster than recalculating @@ -94,7 +97,8 @@ _CalcDirtySprite sta tmp0 ; Add the horizontal position to the horizontal offset to find the first column in the -; code field that needs to be drawn. The range of values is 0 to 159+163 = [0, 322] +; code field that needs to be drawn. The range of values is 0 to 159+163 = [0, 322]. +; This value is divided by 4, so 81 possible values clc lda _Sprites+SPRITE_CLIP_LEFT,y @@ -201,8 +205,8 @@ TSSetSprite mac next <<< -ROW equ TILE_STORE_WIDTH*2 -COL equ TILE_DATA_SPAN +ROW equ TILE_STORE_WIDTH*2 ; This many bytes to the next row in TileStore coordinates +COL equ 2 ; This many bytes for each element :mark1x1 ldx _Sprites+VBUFF_ARRAY_ADDR,y ; get the address of this sprite's vbuff values @@ -264,10 +268,10 @@ COL equ TILE_DATA_SPAN sta: {1*ROW}+{1*COL},x ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2 rts :mark2x3 @@ -286,12 +290,12 @@ COL equ TILE_DATA_SPAN sta: {1*ROW}+{2*COL},x ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4;#{1*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4 rts :mark3x1 @@ -304,9 +308,9 @@ COL equ TILE_DATA_SPAN sta: {2*ROW}+{0*COL},x ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0 rts :mark3x2 @@ -325,12 +329,12 @@ COL equ TILE_DATA_SPAN sta: {2*ROW}+{1*COL},x ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2;#{2*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2 rts :mark3x3 @@ -355,13 +359,13 @@ COL equ TILE_DATA_SPAN sta: {2*ROW}+{2*COL},x ldx _Sprites+TS_LOOKUP_INDEX,y - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0;#{0*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2;#{0*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4;#{0*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0;#{1*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2;#{1*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4;#{1*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0;#{2*VBUFF_TILE_ROW_BYTES}+{0*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2;#{2*VBUFF_TILE_ROW_BYTES}+{1*VBUFF_TILE_COL_BYTES} - TSSetSprite 2*{TS_LOOKUP_SPAN*2}+4;#{2*VBUFF_TILE_ROW_BYTES}+{2*VBUFF_TILE_COL_BYTES} + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 0*{TS_LOOKUP_SPAN*2}+4 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 1*{TS_LOOKUP_SPAN*2}+4 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+0 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+2 + TSSetSprite 2*{TS_LOOKUP_SPAN*2}+4 rts diff --git a/src/SpriteV1.s b/src/SpriteV1.s new file mode 100644 index 0000000..6d24dbb --- /dev/null +++ b/src/SpriteV1.s @@ -0,0 +1,226 @@ +; Old code the was in Version 1, but is not needed. May be adapted for Verions 2. + +; Y = _Sprites array offset +_EraseSpriteY + lda _Sprites+OLD_VBUFF_ADDR,y + beq :noerase + ldx _Sprites+SPRITE_DISP,y ; get the dispatch index for this sprite (32 values) + jmp (:do_erase,x) +:noerase rts +:do_erase dw _EraseTileSprite8x8,_EraseTileSprite8x8,_EraseTileSprite8x8,_EraseTileSprite8x8 + dw _EraseTileSprite8x16,_EraseTileSprite8x16,_EraseTileSprite8x16,_EraseTileSprite8x16 + dw _EraseTileSprite16x8,_EraseTileSprite16x8,_EraseTileSprite16x8,_EraseTileSprite16x8 + dw _EraseTileSprite16x16,_EraseTileSprite16x16,_EraseTileSprite16x16,_EraseTileSprite16x16 + dw _EraseTileSprite8x8,_EraseTileSprite8x8,_EraseTileSprite8x8,_EraseTileSprite8x8 + dw _EraseTileSprite8x16,_EraseTileSprite8x16,_EraseTileSprite8x16,_EraseTileSprite8x16 + dw _EraseTileSprite16x8,_EraseTileSprite16x8,_EraseTileSprite16x8,_EraseTileSprite16x8 + dw _EraseTileSprite16x16,_EraseTileSprite16x16,_EraseTileSprite16x16,_EraseTileSprite16x16 + +; A = bank address +_EraseTileSprite8x8 + tax + phb ; Save the bank to switch to the sprite plane + + pei SpriteBanks + plb ; pop the data bank (low byte) + +]line equ 0 + lup 8 + stz: {]line*SPRITE_PLANE_SPAN}+0,x + stz: {]line*SPRITE_PLANE_SPAN}+2,x +]line equ ]line+1 + --^ + + plb ; pop the mask bank (high byte) + lda #$FFFF +]line equ 0 + lup 8 + sta: {]line*SPRITE_PLANE_SPAN}+0,x + sta: {]line*SPRITE_PLANE_SPAN}+2,x +]line equ ]line+1 + --^ + + plb + rts + +_EraseTileSprite8x16 + tax + phb ; Save the bank to switch to the sprite plane + + pei SpriteBanks + plb ; pop the data bank (low byte) + +]line equ 0 + lup 16 + stz: {]line*SPRITE_PLANE_SPAN}+0,x + stz: {]line*SPRITE_PLANE_SPAN}+2,x +]line equ ]line+1 + --^ + + plb ; pop the mask bank (high byte) + lda #$FFFF +]line equ 0 + lup 16 + sta: {]line*SPRITE_PLANE_SPAN}+0,x + sta: {]line*SPRITE_PLANE_SPAN}+2,x +]line equ ]line+1 + --^ + + plb + rts + +_EraseTileSprite16x8 + tax + phb ; Save the bank to switch to the sprite plane + + pei SpriteBanks + plb ; pop the data bank (low byte) + +]line equ 0 + lup 8 + stz: {]line*SPRITE_PLANE_SPAN}+0,x + stz: {]line*SPRITE_PLANE_SPAN}+2,x + stz: {]line*SPRITE_PLANE_SPAN}+4,x + stz: {]line*SPRITE_PLANE_SPAN}+6,x +]line equ ]line+1 + --^ + + plb ; pop the mask bank (high byte) + lda #$FFFF +]line equ 0 + lup 8 + sta: {]line*SPRITE_PLANE_SPAN}+0,x + sta: {]line*SPRITE_PLANE_SPAN}+2,x + sta: {]line*SPRITE_PLANE_SPAN}+4,x + sta: {]line*SPRITE_PLANE_SPAN}+6,x +]line equ ]line+1 + --^ + + plb + rts + +_EraseTileSprite16x16 + tax + phb ; Save the bank to switch to the sprite plane + + pei SpriteBanks + plb ; pop the data bank (low byte) + +]line equ 0 + lup 16 + stz: {]line*SPRITE_PLANE_SPAN}+0,x + stz: {]line*SPRITE_PLANE_SPAN}+2,x + stz: {]line*SPRITE_PLANE_SPAN}+4,x + stz: {]line*SPRITE_PLANE_SPAN}+6,x +]line equ ]line+1 + --^ + + plb ; pop the mask bank (high byte) + + lda #$FFFF +]line equ 0 + lup 16 + sta: {]line*SPRITE_PLANE_SPAN}+0,x + sta: {]line*SPRITE_PLANE_SPAN}+2,x + sta: {]line*SPRITE_PLANE_SPAN}+4,x + sta: {]line*SPRITE_PLANE_SPAN}+6,x +]line equ ]line+1 + --^ + + plb + rts + + +; First, if there is only one sprite, then we can skip any overhead and do a single lda/and/ora/sta to put the +; sprite data on the screen. +; +; Second, if there are 4 or less, then we "stack" the sprite data using an unrolled loop that allows each +; sprite to just be a single and/ora pair and the final result is not written to any intermediate memory buffer. +; +; Third, if there are 5 or more sprites, then we assume that the sprites are "dense" and that there will be a +; non-trivial amount of overdraw. In this case we do a series of optimized copies of the sprite data *and* +; masks into a direct page buffer in *reverse order*. Once a mask value becomes zero, then nothing else can +; show through and that value can be skipped. Once all of the mask values are zero, then the render is terminated +; and the data buffer copied to the final destination. +; +; Note that these rendering algorithms impose a priority ordering on the sprites where lower sprite IDs are drawn +; underneath higher sprite IDs. +RenderActiveSpriteTiles + cmp #0 ; Is there only one active sprite? If so optimise + bne :many + + ldx vbuff ; load the address to the (adjusted) sprite tile + lda TileStore+TS_SCREEN_ADDR,y + tay + + lda tiledata+0,y + andl spritemask,x + oral spritedata,x + sta 00,s + + lda tiledata+2,y + andl spritemask+2,x + oral spritedata+2,x + sta 02,s + + ... + tsc + adc #320 + tcs + ... + + lda tiledata+{line*4},y + andl spritemask+{line*SPAN},x + oral spritedata+{line*SPAN},x + sta 160,s + + lda tiledata+{line*4}+2,y + andl spritemask+{line*SPAN}+2,x + oral spritedata+{line*SPAN}+2,x + sta 162,s + + rts + + +:many + lda TileStore+TS_SCREEN_ADDR,y + tcs + lda TileStore+TS_TILE_ADDR,y + tay + + ldx count + jmp (:arr,x) + lda tiledata+0,y + ldx vbuff + andl spritemask,x + oral spritedata,x + ldx vbuff+2 + andl spritemask,x + oral spritedata,x + ldx vbuff+4 + andl spritemask,x + oral spritedata,x + ... + sta 00,s + + ldx count + jmp (:arr,x) + lda tiledata+0,y + ldx vbuff + andl spritemask,x + oral spritedata,x + ldx vbuff+2 + andl spritemask,x + oral spritedata,x + ldx vbuff+4 + andl spritemask,x + oral spritedata,x + ... + sta 02,s + + sta 160,s + + sta 162,s + + tsc + adc #320 \ No newline at end of file diff --git a/src/tiles/FastRenderer.s b/src/tiles/FastRenderer.s index 72f61d0..f8654b7 100644 --- a/src/tiles/FastRenderer.s +++ b/src/tiles/FastRenderer.s @@ -58,20 +58,17 @@ OneSpriteFast plb ; jmp _TBApplySpriteData2 - _TBApplySpriteData2 ]line equ 0 lup 8 - lda blttmp+{]line*4} + lda tmp_tile_data+{]line*4} andl spritemask+{]line*SPRITE_PLANE_SPAN},x oral spritedata+{]line*SPRITE_PLANE_SPAN},x -; lda #$FFFF sta: $0004+{]line*$1000},y - lda blttmp+{]line*4}+2 + lda tmp_tile_data+{]line*4}+2 andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x -; lda #$FFFF sta: $0001+{]line*$1000},y ]line equ ]line+1 --^