diff --git a/demos/tool/App.Main.s b/demos/tool/App.Main.s index 32e0f8f..db6741b 100644 --- a/demos/tool/App.Main.s +++ b/demos/tool/App.Main.s @@ -49,9 +49,9 @@ ScreenY equ 2 cmp #'q' beq :exit -; pei ScreenX -; pei ScreenY -; _GTESetBG0Origin + pei ScreenX + pei ScreenY + _GTESetBG0Origin ; _GTERender @@ -119,4 +119,4 @@ GTEStartUp MasterId ds 2 UserId ds 2 -ToolPath str '1/GTETool' \ No newline at end of file +ToolPath str '1/Tool160' \ No newline at end of file diff --git a/demos/tool/build-image.bat b/demos/tool/build-image.bat index 5546b72..462afd6 100644 --- a/demos/tool/build-image.bat +++ b/demos/tool/build-image.bat @@ -14,6 +14,6 @@ REM Cadius does not overwrite files, so clear the root folder first REM Now copy files and folders as needed %CADIUS% ADDFILE %IMAGE% %FOLDER% .\GTEToolDemo -%CADIUS% ADDFILE %IMAGE% %FOLDER% ..\..\src\GTETool +%CADIUS% ADDFILE %IMAGE% %FOLDER% ..\..\src\Tool160 REM Copy in the image assets diff --git a/macros/GTE.Macs.s b/macros/GTE.Macs.s index b60d55b..a89cafa 100644 --- a/macros/GTE.Macs.s +++ b/macros/GTE.Macs.s @@ -26,4 +26,13 @@ _GTEReadControl MAC <<< _GTESetScreenMode MAC UserTool $A00+GTEToolNum - <<< + <<< +_GTESetTile MAC + UserTool $B00+GTEToolNum + <<< +_GTESetBG0Origin MAC + UserTool $C00+GTEToolNum + <<< +_GTERender MAC + UserTool $D00+GTEToolNum + <<< diff --git a/src/Defs.s b/src/Defs.s index ca080a6..468528b 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -182,6 +182,8 @@ TILE_MASK_BIT equ $1000 TILE_DYN_BIT equ $0800 TILE_VFLIP_BIT equ $0400 TILE_HFLIP_BIT equ $0200 +TILE_CTRL_MASK equ $FE00 +TILE_PROC_MASK equ $F800 ; Select tile proc for rendering ; Sprite constants SPRITE_HIDE equ $2000 @@ -218,7 +220,9 @@ TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5 TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000. TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender. -TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank +;TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank + +TS_BASE_TILE_COPY equ TILE_STORE_SIZE*9 ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function diff --git a/src/Master.s b/src/Master.s index 175f3a7..9c9a012 100644 --- a/src/Master.s +++ b/src/Master.s @@ -1,7 +1,7 @@ ; IIgs Generic Tile Engine User Toolset TYP $BA ; Tool set file - DSK GTETool + DSK Tool160 XPL ; Main toolbox interface and code diff --git a/src/Render.s b/src/Render.s index 4f52d89..d78d13b 100644 --- a/src/Render.s +++ b/src/Render.s @@ -10,15 +10,7 @@ ; ; Everything is composited into the tiles in the playfield and then the screen is rendered in ; a single pass. - -Render ENT - phb - phk - plb - jsr _Render - plb - rtl - +; ; TODO -- actually check the dirty bits and be selective on what gets updated. For example, if ; only the Y position changes, then we should only need to set new values on the ; virtual lines that were brought on screen. If the X position only changes by one @@ -36,13 +28,11 @@ _Render jsr _ApplyBG0XPosPre jsr _ApplyBG1XPosPre - nop jsr _RenderSprites ; Once the BG0 X and Y positions are committed, update sprite data jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles jsr _UpdateBG1TileMap ; that need to be updated in the code field - nop jsr _ApplyTiles ; This function actually draws the new tiles into the code field jsr _ApplyBG0XPos ; Patch the code field instructions with exit BRA opcode @@ -108,58 +98,17 @@ _Render ; the sprite subsystem + tile attributes for single-screen games which should be able to run ; close to 60 fps. ; -; Because we are register starved, there is a lot of inline code to quickly fetch the information -; needed to render sprites appropriately. If there was a way to efficiently maintain an ordered -; and compact array of per-tile VBUFF addresses, rather than the current sparse array, then -; the sprite handling code could be significantly streamlined. A note for anyone attempting -; this optimization: -; -; The _MarkDirtyTiles simply stores a sprite's per-tile VBUFF address and marks the tile -; as being occupied by the sprite with just 4 instructions -; -; sta (vbuff_array_ptr),y -; lda TileStore+TS_SPRITE_FLAG,x -; ora SpriteBit,y -; sta TileStore+TS_SPRITE_FLAG,x -; -; Then, we have an unrolled loop that does repeated tests of -; -; lsr -; bcc *+ -; lda vbuff_array_ptr,y -; sta spriteVBuffArr -; -; The only gain to be had is if the sprites that are marked are in the high bits and there are no low-index -; sprites. Skipping over N bits of the SPRITE_FLAG takes only 5*N cycles. So, on average, we might waste -; 40 cycles looking for the proper bit. -; -; Any improvement to the existing code would need to be able to maintain a data structure and get the final -; values into the spriteVBuffArr for a total cost of under 75 cycles per tile. - -RenderDirty ENT - phb - phk - plb - jsr _RenderDirty - plb - rtl - ; In this renderer, we assume that there is no scrolling, so no need to update any information about ; the BG0/BG1 positions _RenderDirty lda LastRender ; If the full renderer was last called, we assume that bne :norecalc ; the scroll positions have likely changed, so recalculate - lda #2 ; blue - jsr _SetBorderColor jsr _RecalcTileScreenAddrs ; them to make sure sprites draw at the correct screen address :norecalc - lda #3 ; purple - jsr _SetBorderColor - jsr _RenderSprites - lda #4 ; dk. green - jsr _SetBorderColor + jsr _RenderSprites jsr _ApplyDirtyTiles + lda #1 sta LastRender rts @@ -669,35 +618,6 @@ dirty_sprite stx spriteIdx+6 jmp BlitFourSprites -DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH -;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH - -; Blit tiles directly to the screen. -_TBDirtyTile_00 -_TBDirtyTile_0H -]line equ 0 - lup 8 - ldal tiledata+{]line*4},x - sta: $0000+{]line*160},y - ldal tiledata+{]line*4}+2,x - sta: $0002+{]line*160},y -]line equ ]line+1 - --^ - rts - -_TBDirtyTile_V0 -_TBDirtyTile_VH -]src equ 7 -]dest equ 0 - lup 8 - ldal tiledata+{]src*4},x - sta: $0000+{]dest*160},y - ldal tiledata+{]src*4}+2,x - sta: $0002+{]dest*160},y -]src equ ]src-1 -]dest equ ]dest+1 - --^ - rts ; If there are two or more sprites at a tile, we can still be fast, but need to do extra work because ; the VBUFF values need to be read from the direct page. Thus, the direct page cannot be mapped onto diff --git a/src/Tiles.s b/src/Tiles.s index 37aaf3a..c50718b 100644 --- a/src/Tiles.s +++ b/src/Tiles.s @@ -8,9 +8,6 @@ ; A = tile descriptor ; ; The address is the TileID * 128 + (HFLIP * 64) -GetTileAddr ENT - jsr _GetTileAddr - rtl _GetTileAddr asl ; Multiply by 2 bit #2*TILE_HFLIP_BIT ; Check if the horizontal flip bit is set @@ -36,6 +33,29 @@ _GetBaseTileAddr rts +; Helper function to get the address offset into the tile cachce / tile backing store +; X = tile column [0, 40] (41 columns) +; Y = tile row [0, 25] (26 rows) +_GetTileStoreOffset + phx ; preserve the registers + phy + + jsr _GetTileStoreOffset0 + + ply + plx + rts + +_GetTileStoreOffset0 + tya + asl + tay + txa + asl + clc + adc TileStoreYTable,y + rts + ; Initialize the tile storage data structures. This takes care of populating the tile records with the ; appropriate constant values. InitTiles @@ -83,11 +103,13 @@ InitTiles ; lda TileProcs ; Same for non-dirty, non-sprite base case ; stal TileStore+TS_BASE_TILE_DISP,x - lda :vbuff ; array of sprite vbuff addresses per tile - stal TileStore+TS_VBUFF_ARRAY_ADDR,x - clc - adc #32 - sta :vbuff +; *** DEPRECATED *** +; lda :vbuff ; array of sprite vbuff addresses per tile +; stal TileStore+TS_VBUFF_ARRAY_ADDR,x +; clc +; adc #32 +; sta :vbuff +; *** ********** *** ; The next set of values are constants that are simply used as cached parameters to avoid needing to ; calculate any of these values during tile rendering @@ -122,3 +144,116 @@ InitTiles dex bpl :loop rts + +; Set a tile value in the tile backing store. Mark dirty if the value changes +; +; A = tile id +; X = tile column [0, 40] (41 columns) +; Y = tile row [0, 25] (26 rows) +; +; Registers are not preserved +_SetTile + pha + jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position + tax + pla + + cmpl TileStore+TS_TILE_ID,x ; Only set to dirty if the value changed + beq :nochange + + stal TileStore+TS_TILE_ID,x ; Value is different, store it. + jsr _GetTileAddr + stal TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later + +; Set the standard renderer procs for this tile. +; +; 1. The dirty render proc is always set the same. +; 2. If BG1 and DYN_TILES are disabled, then the TS_BASE_TILE_DISP is selected from the Fast Renderers, otherwise +; it is selected from the full tile rendering functions. +; 3. The copy process is selected based on the flip bits +; +; When a tile overlaps the sprite, it is the responsibility of the Render function to compose the appropriate +; functionality. Sometimes it is simple, but in cases of the sprites overlapping Dynamic Tiles and other cases +; it can be more involved. + + ldal TileStore+TS_TILE_ID,x + and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value + xba + tay +; lda DirtyTileProcs,y +; stal TileStore+TS_DIRTY_TILE_DISP,x + +; lda CopyTileProcs,y +; stal TileStore+TS_DIRTY_TILE_COPY,x + + lda EngineMode + bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER + beq :fast + + ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address + and #TILE_CTRL_MASK + xba + tay +; lda TileProcs,y +; stal TileStore+TS_BASE_TILE_DISP,x + bra :out + +:fast +; lda FastTileProcs,y +; stal TileStore+TS_BASE_TILE_DISP,x +:out + +; txa ; Add this tile to the list of dirty tiles to refresh +; jmp _PushDirtyTileX ; on the next call to _ApplyTiles + +:nochange rts + + +; SetBG0XPos +; +; Set the virtual horizontal position of the primary background layer. In addition to +; updating the direct page state locations, this routine needs to preserve the original +; value as well. This is a bit subtle, because if this routine is called multiple times +; with different values, we need to make sure the *original* value is preserved and not +; continuously overwrite it. +; +; We assume that there is a clean code field in this routine +SetBG0XPos ENT + jsr _SetBG0XPos + rtl + +_SetBG0XPos + cmp StartX + beq :out ; Easy, if nothing changed, then nothing changes + + ldx StartX ; Load the old value (but don't save it yet) + sta StartX ; Save the new position + + lda #DIRTY_BIT_BG0_X + tsb DirtyBits ; Check if the value is already dirty, if so exit + bne :out ; without overwriting the original value + + stx OldStartX ; First change, so preserve the value +:out rts + + +; SetBG0YPos +; +; Set the virtual position of the primary background layer. +SetBG0YPos ENT + jsr _SetBG0YPos + rtl + +_SetBG0YPos + cmp StartY + beq :out ; Easy, if nothing changed, then nothing changes + + ldx StartY ; Load the old value (but don't save it yet) + sta StartY ; Save the new position + + lda #DIRTY_BIT_BG0_Y + tsb DirtyBits ; Check if the value is already dirty, if so exit + bne :out ; without overwriting the original value + + stx OldStartY ; First change, so preserve the value +:out rts diff --git a/src/Tool.s b/src/Tool.s index 0bd14fe..87e5a92 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -196,6 +196,14 @@ yTile equ FirstParam+2 xTile equ FirstParam+4 _TSEntry + + lda xTile,s ; Valid range [0, 40] (41 columns) + tax + lda yTile,s ; Valid range [0, 25] (26 rows) + tay + lda tileId + jsr _SetTile + _TSExit #0;#6 ; SetBG0Origin(x, y) @@ -204,11 +212,18 @@ yPos equ FirstParam xPos equ FirstParam+2 _TSEntry + + lda xPos,s + jsr _SetBG0XPos + lda yPos,s + jsr _SetBG0YPos + _TSExit #0;#4 ; Render() _TSRender _TSEntry +; jsr _Render _TSExit #0;#0 @@ -220,6 +235,7 @@ _TSRender put Timer.s put Graphics.s put Tiles.s +; put Render.s put blitter/BG0.s put blitter/BG1.s put blitter/Template.s diff --git a/src/_FileInformation.txt b/src/_FileInformation.txt index 54452a1..8b4a06a 100644 --- a/src/_FileInformation.txt +++ b/src/_FileInformation.txt @@ -1,2 +1 @@ -Tool=Type(00),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) -GTETool=Type(BA),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) +Tool160=Type(BA),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) diff --git a/src/blitter/Horz.s b/src/blitter/Horz.s index a069313..9eaf5d8 100644 --- a/src/blitter/Horz.s +++ b/src/blitter/Horz.s @@ -3,33 +3,6 @@ ; when the virtual X-position of the play field changes. -; SetBG0XPos -; -; Set the virtual horizontal position of the primary background layer. In addition to -; updating the direct page state locations, this routine needs to preserve the original -; value as well. This is a bit subtle, because if this routine is called multiple times -; with different values, we need to make sure the *original* value is preserved and not -; continuously overwrite it. -; -; We assume that there is a clean code field in this routine -SetBG0XPos ENT - jsr _SetBG0XPos - rtl - -_SetBG0XPos - cmp StartX - beq :out ; Easy, if nothing changed, then nothing changes - - ldx StartX ; Load the old value (but don't save it yet) - sta StartX ; Save the new position - - lda #DIRTY_BIT_BG0_X - tsb DirtyBits ; Check if the value is already dirty, if so exit - bne :out ; without overwriting the original value - - stx OldStartX ; First change, so preserve the value -:out rts - ; Simple function that restores the saved opcode that are stashed in _applyBG0Xpos. It is ; very important that opcodes are restored before new ones are inserted, because there is ; only one, fixed storage location and old values will be overwritten if operations are not diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s index e0a25a5..c175403 100644 --- a/src/blitter/Tiles.s +++ b/src/blitter/Tiles.s @@ -38,9 +38,6 @@ ; It is simply too slow to try to horizontally reverse the pixel data on the fly. This still allows ; for up to 512 tiles to be stored in a single bank, which should be sufficient. -TILE_CTRL_MASK equ $FE00 -TILE_PROC_MASK equ $F800 ; Select tile proc for rendering - ; Use some temporary space for the spriteIdx array (maximum of 4 entries) stkSave equ tmp9 @@ -48,29 +45,7 @@ screenAddr equ tmp10 tileAddr equ tmp11 spriteIdx equ tmp12 -; On entry -; -; B is set to the correct BG1 data bank -; A is set to the the tile descriptor -; Y is set to the top-left address of the tile in the BG1 data bank -; -; tmp0/tmp1 is reserved -_RenderTileBG1 - pha ; Save the tile descriptor - and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; Only horizontal and vertical flips are supported for BG1 - xba - tax - ldal :actions,x - stal :tiledisp+1 - - pla - and #TILE_ID_MASK ; Mask out the ID and save just that - _Mul128 ; multiplied by 128 - tax -:tiledisp jmp $0000 - -:actions dw _TBSolidBG1_00,_TBSolidBG1_0H,_TBSolidBG1_V0,_TBSolidBG1_VH ; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile ; Store record contains all of the low-level information that's needed to call the renderer. @@ -317,41 +292,6 @@ CopyTwoSprites --^ ; jmp FinishTile -; Copy a single piece of sprite data into a temporary direct page . X = spriteIdx -; -; X register is the offset of the underlying tile data -; Y register is the line offset into the sprite data and mask buffers -; There is a pointer for each sprite on the direct page that can be used -; to access both the data and mask components of a sprite -; The Data Bank reigster points to the sprite data -; -; ldal tiledata,x -; and [spriteIdx],y -; ora (spriteIdx),y -; sta tmp_sprite_data -; -; For multiple sprites, we can chain together the and/ora instructions to stack the sprites -; -; ldal tiledata,x -; and [spriteIdx],y -; ora (spriteIdx),y -; and [spriteIdx+4],y -; ora (spriteIdx+4),y -; and [spriteIdx+8],y -; ora (spriteIdx+8),y -; sta tmp_sprite_data -; -; When the sprites need to be drawn on top of the background, then change the order of operations -; -; lda (spriteIdx),y -; and [spriteIdx+4],y -; ora (spriteIdx+4),y -; and [spriteIdx+8],y -; ora (spriteIdx+8),y -; sta tmp_sprite_data -; andl tiledata+32,x -; oral tiledata,x -; CopyOneSprite clc lda TileStore+TS_VBUFF_ADDR_0,y @@ -770,77 +710,6 @@ _ClearDirtyTiles bne :loop rts -; Helper function to get the address offset into the tile cachce / tile backing store -; X = tile column [0, 40] (41 columns) -; Y = tile row [0, 25] (26 rows) -GetTileStoreOffset ENT - phb - phk - plb - jsr _GetTileStoreOffset - plb - rtl - - -_GetTileStoreOffset - phx ; preserve the registers - phy - - jsr _GetTileStoreOffset0 - - ply - plx - rts - -_GetTileStoreOffset0 - tya - asl - tay - txa - asl - clc - adc TileStoreYTable,y - rts - -; Set a tile value in the tile backing store. Mark dirty if the value changes -; -; A = tile id -; X = tile column [0, 40] (41 columns) -; Y = tile row [0, 25] (26 rows) -; -; Registers are not preserved -_SetTile - pha - jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position - tax - pla - - cmpl TileStore+TS_TILE_ID,x ; Only set to dirty if the value changed - beq :nochange - - stal TileStore+TS_TILE_ID,x ; Value is different, store it. - jsr _GetTileAddr - stal TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later - - ldal TileStore+TS_TILE_ID,x - and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value - xba - tay - lda DirtyTileProcs,y - stal TileStore+TS_DIRTY_TILE_DISP,x - - ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address - and #TILE_CTRL_MASK - xba - tay - lda TileProcs,y - stal TileStore+TS_BASE_TILE_DISP,x - -; txa ; Add this tile to the list of dirty tiles to refresh - jmp _PushDirtyTileX ; on the next call to _ApplyTiles - -:nochange rts - ; Append a new dirty tile record ; diff --git a/src/blitter/Tiles00000.s b/src/blitter/Tiles00000.s index 709b54d..854691d 100644 --- a/src/blitter/Tiles00000.s +++ b/src/blitter/Tiles00000.s @@ -36,6 +36,8 @@ _TBSolidTile_VH ; ; This does not increase the FPS by 37% because only a small number of tiles are drawn each frame, but it ; has an impact and can significantly help out when sprites trigger more dirty tile updates than normal. +_TBCopyDataFast + tax _TBCopyData ]line equ 0 lup 8 @@ -47,17 +49,8 @@ _TBCopyData --^ rts -;_TBCopyDataH -;]line equ 0 -; lup 8 -; ldal tiledata+{]line*4}+64,x -; sta: $0004+{]line*$1000},y -; ldal tiledata+{]line*4}+66,x -; sta: $0001+{]line*$1000},y -;]line equ ]line+1 -; --^ -; rts - +_TBCopyDataVFast + tax _TBCopyDataV ]src equ 7 ]dest equ 0 @@ -71,40 +64,3 @@ _TBCopyDataV --^ rts -;_TBCopyDataVH -;]src equ 7 -;]dest equ 0 -; lup 8 -; ldal tiledata+{]src*4}+64,x -; sta: $0004+{]dest*$1000},y -; ldal tiledata+{]src*4}+66,x -; sta: $0001+{]dest*$1000},y -;]src equ ]src-1 -;]dest equ ]dest+1 -; --^ -; rts - -; A simple helper function that fill in all of the opcodes of a tile with the PEA opcode. This is -; a common function since a tile must be explicitly flagged to use a mask, so this routine is used -; quite frequently in a well-designed tile map. -_TBFillPEAOpcode - sep #$20 - lda #$F4 - sta: $0000,y - sta: $0003,y - sta $1000,y - sta $1003,y - sta $2000,y - sta $2003,y - sta $3000,y - sta $3003,y - sta $4000,y - sta $4003,y - sta $5000,y - sta $5003,y - sta $6000,y - sta $6003,y - sta $7000,y - sta $7003,y - rep #$20 - rts diff --git a/src/blitter/Tiles10000.s b/src/blitter/Tiles10000.s index 8e761d3..edc5655 100644 --- a/src/blitter/Tiles10000.s +++ b/src/blitter/Tiles10000.s @@ -87,119 +87,6 @@ _TBApplySpriteDataTwo --^ rts -; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is -; because we can avoid needing to use both the X and Y registers during the compositing process and -; reserve Y to hold the code field address. -; -; Also, we can get away with not setting the bank register, this is a wash in terms of speed, but results -; in simpler, more composable subroutines -_TBCopyTileDataToCBuff -]line equ 0 - lup 8 - ldal tiledata+{]line*4},x - sta blttmp+{]line*4} - - ldal tiledata+{]line*4}+2,x - sta blttmp+{]line*4}+2 -]line equ ]line+1 - --^ - rts - -;_TBCopyTileDataToCBuffH -;]line equ 0 -; lup 8 -; ldal tiledata+{]line*4}+64,x -; sta blttmp+{]line*4} -; -; ldal tiledata+{]line*4}+64+2,x -; sta blttmp+{]line*4}+2 -;]line equ ]line+1 -; --^ -; rts - -_TBCopyTileDataToCBuffV -]src equ 7 -]dest equ 0 - lup 8 - ldal tiledata+{]src*4},x - sta blttmp+{]dest*4} - - ldal tiledata+{]src*4}+2,x - sta blttmp+{]dest*4}+2 -]src equ ]src-1 -]dest equ ]dest+1 - --^ - rts - -;_TBCopyTileDataToCBuffVH -;]src equ 7 -;]dest equ 0 -; lup 8 -; ldal tiledata+{]src*4}+64,x -; sta blttmp+{]dest*4} -; -; ldal tiledata+{]src*4}+64+2,x -; sta blttmp+{]dest*4}+2 -;]src equ ]src-1 -;]dest equ ]dest+1 -; --^ -; rts - - -; Copy tile mask data into the direct page compositing buffer. -_TBCopyTileMaskToCBuff -]line equ 0 - lup 8 - ldal tiledata+{]line*4}+32,x - sta blttmp+{]line*4}+32 - - ldal tiledata+{]line*4}+32+2,x - sta blttmp+{]line*4}+32+2 -]line equ ]line+1 - --^ - rts - -;_TBCopyTileMaskToCBuffH -;]line equ 0 -; lup 8 -; ldal tiledata+{]line*4}+32+64,x -; sta blttmp+{]line*4}+32 -; -; ldal tiledata+{]line*4}+32+64+2,x -; sta blttmp+{]line*4}+32+2 -;]line equ ]line+1 -; --^ -; rts - -_TBCopyTileMaskToCBuffV -]src equ 7 -]dest equ 0 - lup 8 - ldal tiledata+{]src*4}+32,x - sta blttmp+{]dest*4}+32 - - ldal tiledata+{]src*4}+32+2,x - sta blttmp+{]dest*4}+32+2 -]src equ ]src-1 -]dest equ ]dest+1 - --^ - rts - -;_TBCopyTileMaskToCBuffVH -;]src equ 7 -;]dest equ 0 -; lup 8 -; ldal tiledata+{]src*4}+32+64,x -; sta blttmp+{]dest*4}+32 -; -; ldal tiledata+{]src*4}+32+64+2,x -; sta blttmp+{]dest*4}+32+2 -;]src equ ]src-1 -;]dest equ ]dest+1 -; --^ -; rts - - ; Copy just the data into the code field from the composite buffer _TBSolidComposite ]line equ 0 diff --git a/src/blitter/TilesBG1.s b/src/blitter/TilesBG1.s index 5345830..9825e11 100644 --- a/src/blitter/TilesBG1.s +++ b/src/blitter/TilesBG1.s @@ -1,4 +1,26 @@ + +; On entry +; +; B is set to the correct BG1 data bank +; A is set to the the tile descriptor +; Y is set to the top-left address of the tile in the BG1 data bank +; +; tmp0/tmp1 is reserved +_RenderTileBG1 + pha ; Save the tile descriptor + + and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; Only horizontal and vertical flips are supported for BG1 + xba + tax + + pla + and #TILE_ID_MASK ; Mask out the ID and save just that + _Mul128 ; multiplied by 128 + jmp (:actions,x) +:actions dw _TBSolidBG1_00,_TBSolidBG1_0H,_TBSolidBG1_V0,_TBSolidBG1_VH + _TBSolidBG1_00 + tax ]line equ 0 lup 8 ldal tiledata+{]line*4},x @@ -10,6 +32,7 @@ _TBSolidBG1_00 rts _TBSolidBG1_0H + tax ]line equ 0 lup 8 ldal tiledata+{]line*4}+64,x @@ -21,6 +44,7 @@ _TBSolidBG1_0H rts _TBSolidBG1_V0 + tax ]src equ 7 ]dest equ 0 lup 8 @@ -34,6 +58,7 @@ _TBSolidBG1_V0 rts _TBSolidBG1_VH + tax ]src equ 7 ]dest equ 0 lup 8 diff --git a/src/blitter/Vert.s b/src/blitter/Vert.s index 14e4535..c7b7bd6 100644 --- a/src/blitter/Vert.s +++ b/src/blitter/Vert.s @@ -2,28 +2,6 @@ ; of these routines are to adjust tables and patch in new values into the code field ; when the virtual Y-position of the play field changes. - -; SetBG0YPos -; -; Set the virtual position of the primary background layer. -SetBG0YPos ENT - jsr _SetBG0YPos - rtl - -_SetBG0YPos - cmp StartY - beq :out ; Easy, if nothing changed, then nothing changes - - ldx StartY ; Load the old value (but don't save it yet) - sta StartY ; Save the new position - - lda #DIRTY_BIT_BG0_Y - tsb DirtyBits ; Check if the value is already dirty, if so exit - bne :out ; without overwriting the original value - - stx OldStartY ; First change, so preserve the value -:out rts - ; Based on the current value of StartY in the direct page. Set up the dispatch ; information so that the BltRange driver will render the correct code field ; lines in the correct order diff --git a/src/sprites/DirtySpriteProcs.s b/src/sprites/DirtySpriteProcs.s new file mode 100644 index 0000000..65c68e1 --- /dev/null +++ b/src/sprites/DirtySpriteProcs.s @@ -0,0 +1,92 @@ +; Functions to handle rendering sprites into 8x8 tile buffers for dirty tile rendering. Because we +; are rendering directly to the graphics screen instead of the code field, we can map the direct +; page into Bank 01 and use that to avoid writing the merge sprite and tile data to an intermediate +; buffer. + +;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH + +; Optimization Note: The single-sprite blitter seems like it could be made faster by taking advantage of +; the fact that only a single set of sprite data needs to be read, but the extra overhead +; of using the direct page and setting up and restoring registers wipes out the 2 cycle +; per word advantage. +; +; A = screen address +; X = address of sprite data +; Y = address of tile data +; B = tile data bank + +_OneDirtySprite_00 +_OneDirtySprite_0H + + phd + sei + clc + tcd + _R0W1 + + _ODS_Line 0,0,$0 + _ODS_Line 1,1,$A0 + tdc + adc #320 + tcd + _ODS_Line 2,2,$0 + _ODS_Line 3,3,$A0 + tdc + adc #320 + tcd + _ODS_Line 4,4,$0 + _ODS_Line 5,5,$A0 + tdc + adc #320 + tcd + _ODS_Line 6,6,$0 + _ODS_Line 7,7,$A0 + + _R0W0 + cli + pld + rts + + +_OneDirtySprite_V0 +_OneDirtySprite_VH + phd + sei + clc + tcd + _R0W1 + + _ODS_Line 0,7,$0 + _ODS_Line 1,6,$A0 + tdc + adc #320 + tcd + _ODS_Line 2,5,$0 + _ODS_Line 3,4,$A0 + tdc + adc #320 + tcd + _ODS_Line 4,3,$0 + _ODS_Line 5,2,$A0 + tdc + adc #320 + tcd + _ODS_Line 6,1,$0 + _ODS_Line 7,0,$A0 + + _R0W0 + cli + pld + rts + + +; Build up from here +_FourDirtySprites + lda TileStore+TS_VBUFF_ADDR_0,y + sta spriteIdx + lda TileStore+TS_VBUFF_ADDR_1,y + sta spriteIdx+4 + lda TileStore+TS_VBUFF_ADDR_2,y + sta spriteIdx+8 + lda TileStore+TS_VBUFF_ADDR_3,y + sta spriteIdx+12 \ No newline at end of file diff --git a/src/sprites/SpriteProcs.s b/src/sprites/SpriteProcs.s new file mode 100644 index 0000000..10d0fc5 --- /dev/null +++ b/src/sprites/SpriteProcs.s @@ -0,0 +1,150 @@ +; Functions to handle rendering sprite information into buffers for updates to the +; code field. Due to lack of parallel structure, the sprites are combined with the +; tile data and then written to a single direct page buffer. The data is read from +; this buffer and then applied to the code field + +; Merge a single block of sprite data with a tile +_OneSprite_00 +_OneSprite_H0 + ldx TileStore+TS_VBUFF_ADDR_0,y + lda TileStore+TS_TILE_ADDR,y + tay + +]line equ 0 + lup 8 + lda tiledata+{]line*TILE_DATA_SPAN},y + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]line*4} + + lda tiledata+{]line*TILE_DATA_SPAN}+2,y + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]line*4}+2 +]line equ ]line+1 + --^ + +_OneSprite_V0 +_OneSprite_VH + ldx TileStore+TS_VBUFF_ADDR_0,y + lda TileStore+TS_TILE_ADDR,y + tay + +]line equ 7 +]dest equ 0 + lup 8 + lda tiledata+{]line*TILE_DATA_SPAN},y + andl spritemask+{]dest*SPRITE_PLANE_SPAN},x + oral spritedata+{]dest*SPRITE_PLANE_SPAN},x + sta tmp_sprite_data+{]dest*4} + + lda tiledata+{]line*TILE_DATA_SPAN}+2,y + andl spritemask+{]dest*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]dest*SPRITE_PLANE_SPAN}+2,x + sta tmp_sprite_data+{]dest*4}+2 +]line equ ]line-1 +]dest equ ]dest+1 + --^ + rts + + +; Merge two blocks of sprite data. This is more involved because we need to use the +; direct page pointers to stack the sprite information +_TwoSprite_00 +_TwoSprite_H0 + lda TileStore+TS_VBUFF_ADDR_0,y + sta sprite_0 + lda TileStore+TS_VBUFF_ADDR_1,y + sta sprite_1 + ldx TileStore+TS_TILE_ADDR,y + +; line 0 + lda tiledata+{0*TILE_DATA_SPAN},x + and [sprite_1] + ora (sprite_1) + and [sprite_0] + ora (sprite_0) + sta tmp_sprite_data+{0*4} + + ldy #{0*SPRITE_PLANE_SPAN}+2 + lda tiledata+{0*TILE_DATA_SPAN}+2,x + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{0*4}+2 + +; line 1 + ldy #{1*SPRITE_PLANE_SPAN} + lda tiledata+{1*TILE_DATA_SPAN},x + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{1*4} + + ldy #{1*SPRITE_PLANE_SPAN}+2 + lda tiledata+{1*TILE_DATA_SPAN}+2,x + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{1*4}+2 + + rts + + +; Merge three blocks of sprite data. This is more involved because we need to use the +; direct page pointers to stack the sprite information +_ThreeSprite_00 +_ThreeSprite_H0 + lda TileStore+TS_VBUFF_ADDR_0,y + sta sprite_0 + lda TileStore+TS_VBUFF_ADDR_1,y + sta sprite_1 + lda TileStore+TS_VBUFF_ADDR_2,y + sta sprite_2 + ldx TileStore+TS_TILE_ADDR,y + +; line 0 + lda tiledata+{0*TILE_DATA_SPAN},x + and [sprite_2] + ora (sprite_2) + and [sprite_1] + ora (sprite_1) + and [sprite_0] + ora (sprite_0) + sta tmp_sprite_data+{0*4} + + ldy #{0*SPRITE_PLANE_SPAN}+2 + lda tiledata+{0*TILE_DATA_SPAN}+2,x + and [sprite_2],y + ora (sprite_2),y + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{0*4}+2 + +; line 1 + ldy #{1*SPRITE_PLANE_SPAN} + lda tiledata+{1*TILE_DATA_SPAN},x + and [sprite_2],y + ora (sprite_2),y + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{1*4} + + ldy #{1*SPRITE_PLANE_SPAN}+2 + lda tiledata+{1*TILE_DATA_SPAN}+2,x + and [sprite_2],y + ora (sprite_2),y + and [sprite_1],y + ora (sprite_1),y + and [sprite_0],y + ora (sprite_0),y + sta tmp_sprite_data+{1*4}+2 + + rts \ No newline at end of file diff --git a/src/tiles/DirtyTileProcs.s b/src/tiles/DirtyTileProcs.s new file mode 100644 index 0000000..f3ebe56 --- /dev/null +++ b/src/tiles/DirtyTileProcs.s @@ -0,0 +1,43 @@ +; A collection of tile blitters used in the dirty renderer. These renderers copy data directly +; to the graphics screen. Also, because the dirty render assumes that the screen is not moving, +; there is no support for two layer tiles. + +; Address table of the rendering functions +DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH + +; Normal and horizontally flipped tiles. The horizontal variant is selected by choosing +; and appropriate value for the X register, so these can share the same code. +; +; B = Bank 01 +; X = address of tile data +; Y = screen address +_TBDirtyTile_00 +_TBDirtyTile_0H +]line equ 0 + lup 8 + ldal tiledata+{]line*4},x + sta: $0000+{]line*160},y + ldal tiledata+{]line*4}+2,x + sta: $0002+{]line*160},y +]line equ ]line+1 + --^ + rts + +; Vertically flipped tile renderers +; +; B = Bank 01 +; X = address of tile data +; Y = screen address +_TBDirtyTile_V0 +_TBDirtyTile_VH +]line equ 7 +]dest equ 0 + lup 8 + ldal tiledata+{]line*4},x + sta: $0000+{]dest*160},y + ldal tiledata+{]line*4}+2,x + sta: $0002+{]dest*160},y +]line equ ]line-1 +]dest equ ]dest+1 + --^ + rts \ No newline at end of file diff --git a/src/tiles/FastRenderer.s b/src/tiles/FastRenderer.s new file mode 100644 index 0000000..e405c0b --- /dev/null +++ b/src/tiles/FastRenderer.s @@ -0,0 +1,151 @@ +; If the engine mode has the second background layer disabled, we take advantage of that to +; be more efficient in our rendering. Basically, without the second layer, there is no need +; to use the tile mask information. +; +; If there are no sprites, then we copy the tile data into the code field as fast as possible. +; If there are sprites, then the sprite data is flattened and stored into a direct page buffer +; and then copied into the code field +_RenderTileFast + ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile? + beq NoSpritesFast ; This is faster if there are no sprites + + lda TileStore+TS_TILE_ID,y ; Check if the tile has + jmp (fast_dispatch,x) +fast_dispatch + da NoSpritesFast + da OneSpriteFast + da TwoSpritesFast + da ThreeSpritesFast + da FourSpritesFast + +NoSpritesFast + tyx + lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line + pha ; and put on the stack for later. Has addl bank in high byte. + ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field + lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated) + plb ; set the code field bank + jmp (TileStore+TS_BASE_TILE_DISP,x) ; go to the tile copy routine (just basics) + +; The TS_BASE_TILE_DISP routines will come from this table when ENGINE_MODE_TWO_LAYER and +; ENGINE_MODE_DYN_TILES are both off. +FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast + +; Where there are sprites involved, the first step is to call a routine to copy the +; tile data into a temporary buffer. Then the sprite data is merged and placed into +; the code field. +OneSpriteFast + tyx + lda TileStore+TS_TILE_ADDR,y + per :-1 + jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer +: + ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. + lda TileStore+TS_CODE_ADDR_LOW,y + tay + plb ; set the code field bank + +]line equ 0 + lup 8 + lda blttmp+{]line*4} + andl spritemask+{]line*SPRITE_PLANE_SPAN},x + oral spritedata+{]line*SPRITE_PLANE_SPAN},x + sta: $0004+{]line*$1000},y + + lda blttmp+{]line*4}+2 + andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x + oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x + sta: $0001+{]line*$1000},y +]line equ ]line+1 + --^ + rts + +TwoSpritesFast + tyx + lda TileStore+TS_TILE_ADDR,y + per :-1 + jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer +: + lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data + sta spritedata_0 + sta spritemask_0 + lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data + sta spritedata_1 + sta spritemask_1 + + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. + lda TileStore+TS_CODE_ADDR_LOW,y + tay + plb ; set the code field bank + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda blttmp+{]line*4} + andl [spritemask_1],y + oral [spritedata_1],y + andl [spritemask_0],y + oral [spritedata_0],y + sta: $0004+{]line*$1000},x + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda blttmp+{]line*4}+2 + andl [spritemask_1],y + oral [spritedata_1],y + andl [spritemask_0],y + oral [spritedata_0],y + sta: $0001+{]line*$1000},x +]line equ ]line+1 + --^ + rts + +ThreeSpritesFast +FourSpritesFast + tyx + lda TileStore+TS_TILE_ADDR,y + per :-1 + jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer +: + lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data + sta spritedata_0 + sta spritemask_0 + lda TileStore+TS_VBUFF_ADDR_1,y + sta spritedata_1 + sta spritemask_1 + lda TileStore+TS_VBUFF_ADDR_2,y + sta spritedata_2 + sta spritemask_2 + + lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line + pha ; and put on the stack for later. + lda TileStore+TS_CODE_ADDR_LOW,y + tay + plb ; set the code field bank + +]line equ 0 + lup 8 + ldy #{]line*SPRITE_PLANE_SPAN} + lda blttmp+{]line*4} + andl [spritemask_2],y + oral [spritedata_2],y + andl [spritemask_1],y + oral [spritedata_1],y + andl [spritemask_0],y + oral [spritedata_0],y + sta: $0004+{]line*$1000},x + + ldy #{]line*SPRITE_PLANE_SPAN}+2 + lda blttmp+{]line*4}+2 + andl [spritemask_2],y + oral [spritedata_2],y + andl [spritemask_1],y + oral [spritedata_1],y + andl [spritemask_0],y + oral [spritedata_0],y + sta: $0001+{]line*$1000},x +]line equ ]line+1 + --^ + rts \ No newline at end of file diff --git a/src/tiles/TileProcs.s b/src/tiles/TileProcs.s new file mode 100644 index 0000000..a990d1c --- /dev/null +++ b/src/tiles/TileProcs.s @@ -0,0 +1,93 @@ +; A simple helper function that fills in all of the opcodes of a tile with the PEA opcode. This is +; a separate functino because we can often just update the tile data if we know the opcodes are already +; set. When we have to fill the opcodes, this function is used +_TBFillPEAOpcode + sep #$20 + lda #$F4 + sta: $0000,y + sta: $0003,y + sta $1000,y + sta $1003,y + sta $2000,y + sta $2003,y + sta $3000,y + sta $3003,y + sta $4000,y + sta $4003,y + sta $5000,y + sta $5003,y + sta $6000,y + sta $6003,y + sta $7000,y + sta $7003,y + rep #$20 + rts + + +; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is +; because we can avoid needing to use both the X and Y registers during the compositing process and +; reserve Y to hold the code field address. +; +; Also, we can get away with not setting the bank register, this is a wash in terms of speed, but results +; in simpler, more composable subroutines +_TBCopyTileDataAndMaskToCBuff + jsr _TBCopyTileDataToCBuff + jmp _TBCopyTileMaskToCBuff + +_TBCopyTileDataAndMaskToCBuffV + jsr _TBCopyTileDataToCBuffV + jmp _TBCopyTileMaskToCBuffV + +_TBCopyTileDataToCBuff +]line equ 0 + lup 8 + ldal tiledata+{]line*4},x + sta blttmp+{]line*4} + + ldal tiledata+{]line*4}+2,x + sta blttmp+{]line*4}+2 +]line equ ]line+1 + --^ + rts + +_TBCopyTileDataToCBuffV +]src equ 7 +]dest equ 0 + lup 8 + ldal tiledata+{]src*4},x + sta blttmp+{]dest*4} + + ldal tiledata+{]src*4}+2,x + sta blttmp+{]dest*4}+2 +]src equ ]src-1 +]dest equ ]dest+1 + --^ + rts + +; Copy tile mask data into the direct page compositing buffer. +_TBCopyTileMaskToCBuff +]line equ 0 + lup 8 + ldal tiledata+{]line*4}+32,x + sta blttmp+{]line*4}+32 + + ldal tiledata+{]line*4}+32+2,x + sta blttmp+{]line*4}+32+2 +]line equ ]line+1 + --^ + rts + +_TBCopyTileMaskToCBuffV +]src equ 7 +]dest equ 0 + lup 8 + ldal tiledata+{]src*4}+32,x + sta blttmp+{]dest*4}+32 + + ldal tiledata+{]src*4}+32+2,x + sta blttmp+{]dest*4}+32+2 +]src equ ]src-1 +]dest equ ]dest+1 + --^ + rts +