Work checkpoint

This commit is contained in:
Lucas Scharenbroich 2022-04-29 12:38:04 -05:00
parent 4d6f11a9ba
commit 7f6e5d1b1f
20 changed files with 742 additions and 442 deletions

View File

@ -49,9 +49,9 @@ ScreenY equ 2
cmp #'q'
beq :exit
; pei ScreenX
; pei ScreenY
; _GTESetBG0Origin
pei ScreenX
pei ScreenY
_GTESetBG0Origin
; _GTERender
@ -119,4 +119,4 @@ GTEStartUp
MasterId ds 2
UserId ds 2
ToolPath str '1/GTETool'
ToolPath str '1/Tool160'

View File

@ -14,6 +14,6 @@ REM Cadius does not overwrite files, so clear the root folder first
REM Now copy files and folders as needed
%CADIUS% ADDFILE %IMAGE% %FOLDER% .\GTEToolDemo
%CADIUS% ADDFILE %IMAGE% %FOLDER% ..\..\src\GTETool
%CADIUS% ADDFILE %IMAGE% %FOLDER% ..\..\src\Tool160
REM Copy in the image assets

View File

@ -26,4 +26,13 @@ _GTEReadControl MAC
<<<
_GTESetScreenMode MAC
UserTool $A00+GTEToolNum
<<<
<<<
_GTESetTile MAC
UserTool $B00+GTEToolNum
<<<
_GTESetBG0Origin MAC
UserTool $C00+GTEToolNum
<<<
_GTERender MAC
UserTool $D00+GTEToolNum
<<<

View File

@ -182,6 +182,8 @@ TILE_MASK_BIT equ $1000
TILE_DYN_BIT equ $0800
TILE_VFLIP_BIT equ $0400
TILE_HFLIP_BIT equ $0200
TILE_CTRL_MASK equ $FE00
TILE_PROC_MASK equ $F800 ; Select tile proc for rendering
; Sprite constants
SPRITE_HIDE equ $2000
@ -218,7 +220,9 @@ TS_CODE_ADDR_HIGH equ TILE_STORE_SIZE*5
TS_WORD_OFFSET equ TILE_STORE_SIZE*6 ; const value, word offset value for this tile if LDA (dp),y instructions re used
TS_BASE_ADDR equ TILE_STORE_SIZE*7 ; const value, because there are two rows of tiles per bank, this is set to $0000 ot $8000.
TS_SCREEN_ADDR equ TILE_STORE_SIZE*8 ; cached value of on-screen location of tile. Used for DirtyRender.
TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank
;TS_VBUFF_ARRAY_ADDR equ TILE_STORE_SIZE*9 ; const value to an aligned 32-byte array starting at $8000 in TileStore bank
TS_BASE_TILE_COPY equ TILE_STORE_SIZE*9 ; derived from TS_TILE_ID to optimize tile copy to support sprite rendering
TS_BASE_TILE_DISP equ TILE_STORE_SIZE*10 ; derived from TS_TILE_ID to optimize base (non-sprite) tile dispatch in the Render function
TS_DIRTY_TILE_DISP equ TILE_STORE_SIZE*11 ; derived from TS_TILE_ID to optimize dirty tile dispatch in the Render function

View File

@ -1,7 +1,7 @@
; IIgs Generic Tile Engine User Toolset
TYP $BA ; Tool set file
DSK GTETool
DSK Tool160
XPL
; Main toolbox interface and code

View File

@ -10,15 +10,7 @@
;
; Everything is composited into the tiles in the playfield and then the screen is rendered in
; a single pass.
Render ENT
phb
phk
plb
jsr _Render
plb
rtl
;
; TODO -- actually check the dirty bits and be selective on what gets updated. For example, if
; only the Y position changes, then we should only need to set new values on the
; virtual lines that were brought on screen. If the X position only changes by one
@ -36,13 +28,11 @@ _Render
jsr _ApplyBG0XPosPre
jsr _ApplyBG1XPosPre
nop
jsr _RenderSprites ; Once the BG0 X and Y positions are committed, update sprite data
jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles
jsr _UpdateBG1TileMap ; that need to be updated in the code field
nop
jsr _ApplyTiles ; This function actually draws the new tiles into the code field
jsr _ApplyBG0XPos ; Patch the code field instructions with exit BRA opcode
@ -108,58 +98,17 @@ _Render
; the sprite subsystem + tile attributes for single-screen games which should be able to run
; close to 60 fps.
;
; Because we are register starved, there is a lot of inline code to quickly fetch the information
; needed to render sprites appropriately. If there was a way to efficiently maintain an ordered
; and compact array of per-tile VBUFF addresses, rather than the current sparse array, then
; the sprite handling code could be significantly streamlined. A note for anyone attempting
; this optimization:
;
; The _MarkDirtyTiles simply stores a sprite's per-tile VBUFF address and marks the tile
; as being occupied by the sprite with just 4 instructions
;
; sta (vbuff_array_ptr),y
; lda TileStore+TS_SPRITE_FLAG,x
; ora SpriteBit,y
; sta TileStore+TS_SPRITE_FLAG,x
;
; Then, we have an unrolled loop that does repeated tests of
;
; lsr
; bcc *+
; lda vbuff_array_ptr,y
; sta spriteVBuffArr
;
; The only gain to be had is if the sprites that are marked are in the high bits and there are no low-index
; sprites. Skipping over N bits of the SPRITE_FLAG takes only 5*N cycles. So, on average, we might waste
; 40 cycles looking for the proper bit.
;
; Any improvement to the existing code would need to be able to maintain a data structure and get the final
; values into the spriteVBuffArr for a total cost of under 75 cycles per tile.
RenderDirty ENT
phb
phk
plb
jsr _RenderDirty
plb
rtl
; In this renderer, we assume that there is no scrolling, so no need to update any information about
; the BG0/BG1 positions
_RenderDirty
lda LastRender ; If the full renderer was last called, we assume that
bne :norecalc ; the scroll positions have likely changed, so recalculate
lda #2 ; blue
jsr _SetBorderColor
jsr _RecalcTileScreenAddrs ; them to make sure sprites draw at the correct screen address
:norecalc
lda #3 ; purple
jsr _SetBorderColor
jsr _RenderSprites
lda #4 ; dk. green
jsr _SetBorderColor
jsr _RenderSprites
jsr _ApplyDirtyTiles
lda #1
sta LastRender
rts
@ -669,35 +618,6 @@ dirty_sprite
stx spriteIdx+6
jmp BlitFourSprites
DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH
;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH
; Blit tiles directly to the screen.
_TBDirtyTile_00
_TBDirtyTile_0H
]line equ 0
lup 8
ldal tiledata+{]line*4},x
sta: $0000+{]line*160},y
ldal tiledata+{]line*4}+2,x
sta: $0002+{]line*160},y
]line equ ]line+1
--^
rts
_TBDirtyTile_V0
_TBDirtyTile_VH
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4},x
sta: $0000+{]dest*160},y
ldal tiledata+{]src*4}+2,x
sta: $0002+{]dest*160},y
]src equ ]src-1
]dest equ ]dest+1
--^
rts
; If there are two or more sprites at a tile, we can still be fast, but need to do extra work because
; the VBUFF values need to be read from the direct page. Thus, the direct page cannot be mapped onto

View File

@ -8,9 +8,6 @@
; A = tile descriptor
;
; The address is the TileID * 128 + (HFLIP * 64)
GetTileAddr ENT
jsr _GetTileAddr
rtl
_GetTileAddr
asl ; Multiply by 2
bit #2*TILE_HFLIP_BIT ; Check if the horizontal flip bit is set
@ -36,6 +33,29 @@ _GetBaseTileAddr
rts
; Helper function to get the address offset into the tile cachce / tile backing store
; X = tile column [0, 40] (41 columns)
; Y = tile row [0, 25] (26 rows)
_GetTileStoreOffset
phx ; preserve the registers
phy
jsr _GetTileStoreOffset0
ply
plx
rts
_GetTileStoreOffset0
tya
asl
tay
txa
asl
clc
adc TileStoreYTable,y
rts
; Initialize the tile storage data structures. This takes care of populating the tile records with the
; appropriate constant values.
InitTiles
@ -83,11 +103,13 @@ InitTiles
; lda TileProcs ; Same for non-dirty, non-sprite base case
; stal TileStore+TS_BASE_TILE_DISP,x
lda :vbuff ; array of sprite vbuff addresses per tile
stal TileStore+TS_VBUFF_ARRAY_ADDR,x
clc
adc #32
sta :vbuff
; *** DEPRECATED ***
; lda :vbuff ; array of sprite vbuff addresses per tile
; stal TileStore+TS_VBUFF_ARRAY_ADDR,x
; clc
; adc #32
; sta :vbuff
; *** ********** ***
; The next set of values are constants that are simply used as cached parameters to avoid needing to
; calculate any of these values during tile rendering
@ -122,3 +144,116 @@ InitTiles
dex
bpl :loop
rts
; Set a tile value in the tile backing store. Mark dirty if the value changes
;
; A = tile id
; X = tile column [0, 40] (41 columns)
; Y = tile row [0, 25] (26 rows)
;
; Registers are not preserved
_SetTile
pha
jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position
tax
pla
cmpl TileStore+TS_TILE_ID,x ; Only set to dirty if the value changed
beq :nochange
stal TileStore+TS_TILE_ID,x ; Value is different, store it.
jsr _GetTileAddr
stal TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later
; Set the standard renderer procs for this tile.
;
; 1. The dirty render proc is always set the same.
; 2. If BG1 and DYN_TILES are disabled, then the TS_BASE_TILE_DISP is selected from the Fast Renderers, otherwise
; it is selected from the full tile rendering functions.
; 3. The copy process is selected based on the flip bits
;
; When a tile overlaps the sprite, it is the responsibility of the Render function to compose the appropriate
; functionality. Sometimes it is simple, but in cases of the sprites overlapping Dynamic Tiles and other cases
; it can be more involved.
ldal TileStore+TS_TILE_ID,x
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
xba
tay
; lda DirtyTileProcs,y
; stal TileStore+TS_DIRTY_TILE_DISP,x
; lda CopyTileProcs,y
; stal TileStore+TS_DIRTY_TILE_COPY,x
lda EngineMode
bit #ENGINE_MODE_DYN_TILES+ENGINE_MODE_TWO_LAYER
beq :fast
ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address
and #TILE_CTRL_MASK
xba
tay
; lda TileProcs,y
; stal TileStore+TS_BASE_TILE_DISP,x
bra :out
:fast
; lda FastTileProcs,y
; stal TileStore+TS_BASE_TILE_DISP,x
:out
; txa ; Add this tile to the list of dirty tiles to refresh
; jmp _PushDirtyTileX ; on the next call to _ApplyTiles
:nochange rts
; SetBG0XPos
;
; Set the virtual horizontal position of the primary background layer. In addition to
; updating the direct page state locations, this routine needs to preserve the original
; value as well. This is a bit subtle, because if this routine is called multiple times
; with different values, we need to make sure the *original* value is preserved and not
; continuously overwrite it.
;
; We assume that there is a clean code field in this routine
SetBG0XPos ENT
jsr _SetBG0XPos
rtl
_SetBG0XPos
cmp StartX
beq :out ; Easy, if nothing changed, then nothing changes
ldx StartX ; Load the old value (but don't save it yet)
sta StartX ; Save the new position
lda #DIRTY_BIT_BG0_X
tsb DirtyBits ; Check if the value is already dirty, if so exit
bne :out ; without overwriting the original value
stx OldStartX ; First change, so preserve the value
:out rts
; SetBG0YPos
;
; Set the virtual position of the primary background layer.
SetBG0YPos ENT
jsr _SetBG0YPos
rtl
_SetBG0YPos
cmp StartY
beq :out ; Easy, if nothing changed, then nothing changes
ldx StartY ; Load the old value (but don't save it yet)
sta StartY ; Save the new position
lda #DIRTY_BIT_BG0_Y
tsb DirtyBits ; Check if the value is already dirty, if so exit
bne :out ; without overwriting the original value
stx OldStartY ; First change, so preserve the value
:out rts

View File

@ -196,6 +196,14 @@ yTile equ FirstParam+2
xTile equ FirstParam+4
_TSEntry
lda xTile,s ; Valid range [0, 40] (41 columns)
tax
lda yTile,s ; Valid range [0, 25] (26 rows)
tay
lda tileId
jsr _SetTile
_TSExit #0;#6
; SetBG0Origin(x, y)
@ -204,11 +212,18 @@ yPos equ FirstParam
xPos equ FirstParam+2
_TSEntry
lda xPos,s
jsr _SetBG0XPos
lda yPos,s
jsr _SetBG0YPos
_TSExit #0;#4
; Render()
_TSRender
_TSEntry
; jsr _Render
_TSExit #0;#0
@ -220,6 +235,7 @@ _TSRender
put Timer.s
put Graphics.s
put Tiles.s
; put Render.s
put blitter/BG0.s
put blitter/BG1.s
put blitter/Template.s

View File

@ -1,2 +1 @@
Tool=Type(00),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000)
GTETool=Type(BA),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000)
Tool160=Type(BA),AuxType(0000),VersionCreate(70),MinVersion(BE),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000)

View File

@ -3,33 +3,6 @@
; when the virtual X-position of the play field changes.
; SetBG0XPos
;
; Set the virtual horizontal position of the primary background layer. In addition to
; updating the direct page state locations, this routine needs to preserve the original
; value as well. This is a bit subtle, because if this routine is called multiple times
; with different values, we need to make sure the *original* value is preserved and not
; continuously overwrite it.
;
; We assume that there is a clean code field in this routine
SetBG0XPos ENT
jsr _SetBG0XPos
rtl
_SetBG0XPos
cmp StartX
beq :out ; Easy, if nothing changed, then nothing changes
ldx StartX ; Load the old value (but don't save it yet)
sta StartX ; Save the new position
lda #DIRTY_BIT_BG0_X
tsb DirtyBits ; Check if the value is already dirty, if so exit
bne :out ; without overwriting the original value
stx OldStartX ; First change, so preserve the value
:out rts
; Simple function that restores the saved opcode that are stashed in _applyBG0Xpos. It is
; very important that opcodes are restored before new ones are inserted, because there is
; only one, fixed storage location and old values will be overwritten if operations are not

View File

@ -38,9 +38,6 @@
; It is simply too slow to try to horizontally reverse the pixel data on the fly. This still allows
; for up to 512 tiles to be stored in a single bank, which should be sufficient.
TILE_CTRL_MASK equ $FE00
TILE_PROC_MASK equ $F800 ; Select tile proc for rendering
; Use some temporary space for the spriteIdx array (maximum of 4 entries)
stkSave equ tmp9
@ -48,29 +45,7 @@ screenAddr equ tmp10
tileAddr equ tmp11
spriteIdx equ tmp12
; On entry
;
; B is set to the correct BG1 data bank
; A is set to the the tile descriptor
; Y is set to the top-left address of the tile in the BG1 data bank
;
; tmp0/tmp1 is reserved
_RenderTileBG1
pha ; Save the tile descriptor
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; Only horizontal and vertical flips are supported for BG1
xba
tax
ldal :actions,x
stal :tiledisp+1
pla
and #TILE_ID_MASK ; Mask out the ID and save just that
_Mul128 ; multiplied by 128
tax
:tiledisp jmp $0000
:actions dw _TBSolidBG1_00,_TBSolidBG1_0H,_TBSolidBG1_V0,_TBSolidBG1_VH
; Given an address to a Tile Store record, dispatch to the appropriate tile renderer. The Tile
; Store record contains all of the low-level information that's needed to call the renderer.
@ -317,41 +292,6 @@ CopyTwoSprites
--^
; jmp FinishTile
; Copy a single piece of sprite data into a temporary direct page . X = spriteIdx
;
; X register is the offset of the underlying tile data
; Y register is the line offset into the sprite data and mask buffers
; There is a pointer for each sprite on the direct page that can be used
; to access both the data and mask components of a sprite
; The Data Bank reigster points to the sprite data
;
; ldal tiledata,x
; and [spriteIdx],y
; ora (spriteIdx),y
; sta tmp_sprite_data
;
; For multiple sprites, we can chain together the and/ora instructions to stack the sprites
;
; ldal tiledata,x
; and [spriteIdx],y
; ora (spriteIdx),y
; and [spriteIdx+4],y
; ora (spriteIdx+4),y
; and [spriteIdx+8],y
; ora (spriteIdx+8),y
; sta tmp_sprite_data
;
; When the sprites need to be drawn on top of the background, then change the order of operations
;
; lda (spriteIdx),y
; and [spriteIdx+4],y
; ora (spriteIdx+4),y
; and [spriteIdx+8],y
; ora (spriteIdx+8),y
; sta tmp_sprite_data
; andl tiledata+32,x
; oral tiledata,x
;
CopyOneSprite
clc
lda TileStore+TS_VBUFF_ADDR_0,y
@ -770,77 +710,6 @@ _ClearDirtyTiles
bne :loop
rts
; Helper function to get the address offset into the tile cachce / tile backing store
; X = tile column [0, 40] (41 columns)
; Y = tile row [0, 25] (26 rows)
GetTileStoreOffset ENT
phb
phk
plb
jsr _GetTileStoreOffset
plb
rtl
_GetTileStoreOffset
phx ; preserve the registers
phy
jsr _GetTileStoreOffset0
ply
plx
rts
_GetTileStoreOffset0
tya
asl
tay
txa
asl
clc
adc TileStoreYTable,y
rts
; Set a tile value in the tile backing store. Mark dirty if the value changes
;
; A = tile id
; X = tile column [0, 40] (41 columns)
; Y = tile row [0, 25] (26 rows)
;
; Registers are not preserved
_SetTile
pha
jsr _GetTileStoreOffset0 ; Get the address of the X,Y tile position
tax
pla
cmpl TileStore+TS_TILE_ID,x ; Only set to dirty if the value changed
beq :nochange
stal TileStore+TS_TILE_ID,x ; Value is different, store it.
jsr _GetTileAddr
stal TileStore+TS_TILE_ADDR,x ; Committed to drawing this tile, so get the address of the tile in the tiledata bank for later
ldal TileStore+TS_TILE_ID,x
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; get the lookup value
xba
tay
lda DirtyTileProcs,y
stal TileStore+TS_DIRTY_TILE_DISP,x
ldal TileStore+TS_TILE_ID,x ; Get the non-sprite dispatch address
and #TILE_CTRL_MASK
xba
tay
lda TileProcs,y
stal TileStore+TS_BASE_TILE_DISP,x
; txa ; Add this tile to the list of dirty tiles to refresh
jmp _PushDirtyTileX ; on the next call to _ApplyTiles
:nochange rts
; Append a new dirty tile record
;

View File

@ -36,6 +36,8 @@ _TBSolidTile_VH
;
; This does not increase the FPS by 37% because only a small number of tiles are drawn each frame, but it
; has an impact and can significantly help out when sprites trigger more dirty tile updates than normal.
_TBCopyDataFast
tax
_TBCopyData
]line equ 0
lup 8
@ -47,17 +49,8 @@ _TBCopyData
--^
rts
;_TBCopyDataH
;]line equ 0
; lup 8
; ldal tiledata+{]line*4}+64,x
; sta: $0004+{]line*$1000},y
; ldal tiledata+{]line*4}+66,x
; sta: $0001+{]line*$1000},y
;]line equ ]line+1
; --^
; rts
_TBCopyDataVFast
tax
_TBCopyDataV
]src equ 7
]dest equ 0
@ -71,40 +64,3 @@ _TBCopyDataV
--^
rts
;_TBCopyDataVH
;]src equ 7
;]dest equ 0
; lup 8
; ldal tiledata+{]src*4}+64,x
; sta: $0004+{]dest*$1000},y
; ldal tiledata+{]src*4}+66,x
; sta: $0001+{]dest*$1000},y
;]src equ ]src-1
;]dest equ ]dest+1
; --^
; rts
; A simple helper function that fill in all of the opcodes of a tile with the PEA opcode. This is
; a common function since a tile must be explicitly flagged to use a mask, so this routine is used
; quite frequently in a well-designed tile map.
_TBFillPEAOpcode
sep #$20
lda #$F4
sta: $0000,y
sta: $0003,y
sta $1000,y
sta $1003,y
sta $2000,y
sta $2003,y
sta $3000,y
sta $3003,y
sta $4000,y
sta $4003,y
sta $5000,y
sta $5003,y
sta $6000,y
sta $6003,y
sta $7000,y
sta $7003,y
rep #$20
rts

View File

@ -87,119 +87,6 @@ _TBApplySpriteDataTwo
--^
rts
; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is
; because we can avoid needing to use both the X and Y registers during the compositing process and
; reserve Y to hold the code field address.
;
; Also, we can get away with not setting the bank register, this is a wash in terms of speed, but results
; in simpler, more composable subroutines
_TBCopyTileDataToCBuff
]line equ 0
lup 8
ldal tiledata+{]line*4},x
sta blttmp+{]line*4}
ldal tiledata+{]line*4}+2,x
sta blttmp+{]line*4}+2
]line equ ]line+1
--^
rts
;_TBCopyTileDataToCBuffH
;]line equ 0
; lup 8
; ldal tiledata+{]line*4}+64,x
; sta blttmp+{]line*4}
;
; ldal tiledata+{]line*4}+64+2,x
; sta blttmp+{]line*4}+2
;]line equ ]line+1
; --^
; rts
_TBCopyTileDataToCBuffV
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4},x
sta blttmp+{]dest*4}
ldal tiledata+{]src*4}+2,x
sta blttmp+{]dest*4}+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts
;_TBCopyTileDataToCBuffVH
;]src equ 7
;]dest equ 0
; lup 8
; ldal tiledata+{]src*4}+64,x
; sta blttmp+{]dest*4}
;
; ldal tiledata+{]src*4}+64+2,x
; sta blttmp+{]dest*4}+2
;]src equ ]src-1
;]dest equ ]dest+1
; --^
; rts
; Copy tile mask data into the direct page compositing buffer.
_TBCopyTileMaskToCBuff
]line equ 0
lup 8
ldal tiledata+{]line*4}+32,x
sta blttmp+{]line*4}+32
ldal tiledata+{]line*4}+32+2,x
sta blttmp+{]line*4}+32+2
]line equ ]line+1
--^
rts
;_TBCopyTileMaskToCBuffH
;]line equ 0
; lup 8
; ldal tiledata+{]line*4}+32+64,x
; sta blttmp+{]line*4}+32
;
; ldal tiledata+{]line*4}+32+64+2,x
; sta blttmp+{]line*4}+32+2
;]line equ ]line+1
; --^
; rts
_TBCopyTileMaskToCBuffV
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4}+32,x
sta blttmp+{]dest*4}+32
ldal tiledata+{]src*4}+32+2,x
sta blttmp+{]dest*4}+32+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts
;_TBCopyTileMaskToCBuffVH
;]src equ 7
;]dest equ 0
; lup 8
; ldal tiledata+{]src*4}+32+64,x
; sta blttmp+{]dest*4}+32
;
; ldal tiledata+{]src*4}+32+64+2,x
; sta blttmp+{]dest*4}+32+2
;]src equ ]src-1
;]dest equ ]dest+1
; --^
; rts
; Copy just the data into the code field from the composite buffer
_TBSolidComposite
]line equ 0

View File

@ -1,4 +1,26 @@
; On entry
;
; B is set to the correct BG1 data bank
; A is set to the the tile descriptor
; Y is set to the top-left address of the tile in the BG1 data bank
;
; tmp0/tmp1 is reserved
_RenderTileBG1
pha ; Save the tile descriptor
and #TILE_VFLIP_BIT+TILE_HFLIP_BIT ; Only horizontal and vertical flips are supported for BG1
xba
tax
pla
and #TILE_ID_MASK ; Mask out the ID and save just that
_Mul128 ; multiplied by 128
jmp (:actions,x)
:actions dw _TBSolidBG1_00,_TBSolidBG1_0H,_TBSolidBG1_V0,_TBSolidBG1_VH
_TBSolidBG1_00
tax
]line equ 0
lup 8
ldal tiledata+{]line*4},x
@ -10,6 +32,7 @@ _TBSolidBG1_00
rts
_TBSolidBG1_0H
tax
]line equ 0
lup 8
ldal tiledata+{]line*4}+64,x
@ -21,6 +44,7 @@ _TBSolidBG1_0H
rts
_TBSolidBG1_V0
tax
]src equ 7
]dest equ 0
lup 8
@ -34,6 +58,7 @@ _TBSolidBG1_V0
rts
_TBSolidBG1_VH
tax
]src equ 7
]dest equ 0
lup 8

View File

@ -2,28 +2,6 @@
; of these routines are to adjust tables and patch in new values into the code field
; when the virtual Y-position of the play field changes.
; SetBG0YPos
;
; Set the virtual position of the primary background layer.
SetBG0YPos ENT
jsr _SetBG0YPos
rtl
_SetBG0YPos
cmp StartY
beq :out ; Easy, if nothing changed, then nothing changes
ldx StartY ; Load the old value (but don't save it yet)
sta StartY ; Save the new position
lda #DIRTY_BIT_BG0_Y
tsb DirtyBits ; Check if the value is already dirty, if so exit
bne :out ; without overwriting the original value
stx OldStartY ; First change, so preserve the value
:out rts
; Based on the current value of StartY in the direct page. Set up the dispatch
; information so that the BltRange driver will render the correct code field
; lines in the correct order

View File

@ -0,0 +1,92 @@
; Functions to handle rendering sprites into 8x8 tile buffers for dirty tile rendering. Because we
; are rendering directly to the graphics screen instead of the code field, we can map the direct
; page into Bank 01 and use that to avoid writing the merge sprite and tile data to an intermediate
; buffer.
;DirtyTileSpriteProcs dw _TBDirtySpriteTile_00,_TBDirtySpriteTile_0H,_TBDirtySpriteTile_V0,_TBDirtySpriteTile_VH
; Optimization Note: The single-sprite blitter seems like it could be made faster by taking advantage of
; the fact that only a single set of sprite data needs to be read, but the extra overhead
; of using the direct page and setting up and restoring registers wipes out the 2 cycle
; per word advantage.
;
; A = screen address
; X = address of sprite data
; Y = address of tile data
; B = tile data bank
_OneDirtySprite_00
_OneDirtySprite_0H
phd
sei
clc
tcd
_R0W1
_ODS_Line 0,0,$0
_ODS_Line 1,1,$A0
tdc
adc #320
tcd
_ODS_Line 2,2,$0
_ODS_Line 3,3,$A0
tdc
adc #320
tcd
_ODS_Line 4,4,$0
_ODS_Line 5,5,$A0
tdc
adc #320
tcd
_ODS_Line 6,6,$0
_ODS_Line 7,7,$A0
_R0W0
cli
pld
rts
_OneDirtySprite_V0
_OneDirtySprite_VH
phd
sei
clc
tcd
_R0W1
_ODS_Line 0,7,$0
_ODS_Line 1,6,$A0
tdc
adc #320
tcd
_ODS_Line 2,5,$0
_ODS_Line 3,4,$A0
tdc
adc #320
tcd
_ODS_Line 4,3,$0
_ODS_Line 5,2,$A0
tdc
adc #320
tcd
_ODS_Line 6,1,$0
_ODS_Line 7,0,$A0
_R0W0
cli
pld
rts
; Build up from here
_FourDirtySprites
lda TileStore+TS_VBUFF_ADDR_0,y
sta spriteIdx
lda TileStore+TS_VBUFF_ADDR_1,y
sta spriteIdx+4
lda TileStore+TS_VBUFF_ADDR_2,y
sta spriteIdx+8
lda TileStore+TS_VBUFF_ADDR_3,y
sta spriteIdx+12

150
src/sprites/SpriteProcs.s Normal file
View File

@ -0,0 +1,150 @@
; Functions to handle rendering sprite information into buffers for updates to the
; code field. Due to lack of parallel structure, the sprites are combined with the
; tile data and then written to a single direct page buffer. The data is read from
; this buffer and then applied to the code field
; Merge a single block of sprite data with a tile
_OneSprite_00
_OneSprite_H0
ldx TileStore+TS_VBUFF_ADDR_0,y
lda TileStore+TS_TILE_ADDR,y
tay
]line equ 0
lup 8
lda tiledata+{]line*TILE_DATA_SPAN},y
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta tmp_sprite_data+{]line*4}
lda tiledata+{]line*TILE_DATA_SPAN}+2,y
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta tmp_sprite_data+{]line*4}+2
]line equ ]line+1
--^
_OneSprite_V0
_OneSprite_VH
ldx TileStore+TS_VBUFF_ADDR_0,y
lda TileStore+TS_TILE_ADDR,y
tay
]line equ 7
]dest equ 0
lup 8
lda tiledata+{]line*TILE_DATA_SPAN},y
andl spritemask+{]dest*SPRITE_PLANE_SPAN},x
oral spritedata+{]dest*SPRITE_PLANE_SPAN},x
sta tmp_sprite_data+{]dest*4}
lda tiledata+{]line*TILE_DATA_SPAN}+2,y
andl spritemask+{]dest*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]dest*SPRITE_PLANE_SPAN}+2,x
sta tmp_sprite_data+{]dest*4}+2
]line equ ]line-1
]dest equ ]dest+1
--^
rts
; Merge two blocks of sprite data. This is more involved because we need to use the
; direct page pointers to stack the sprite information
_TwoSprite_00
_TwoSprite_H0
lda TileStore+TS_VBUFF_ADDR_0,y
sta sprite_0
lda TileStore+TS_VBUFF_ADDR_1,y
sta sprite_1
ldx TileStore+TS_TILE_ADDR,y
; line 0
lda tiledata+{0*TILE_DATA_SPAN},x
and [sprite_1]
ora (sprite_1)
and [sprite_0]
ora (sprite_0)
sta tmp_sprite_data+{0*4}
ldy #{0*SPRITE_PLANE_SPAN}+2
lda tiledata+{0*TILE_DATA_SPAN}+2,x
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{0*4}+2
; line 1
ldy #{1*SPRITE_PLANE_SPAN}
lda tiledata+{1*TILE_DATA_SPAN},x
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{1*4}
ldy #{1*SPRITE_PLANE_SPAN}+2
lda tiledata+{1*TILE_DATA_SPAN}+2,x
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{1*4}+2
rts
; Merge three blocks of sprite data. This is more involved because we need to use the
; direct page pointers to stack the sprite information
_ThreeSprite_00
_ThreeSprite_H0
lda TileStore+TS_VBUFF_ADDR_0,y
sta sprite_0
lda TileStore+TS_VBUFF_ADDR_1,y
sta sprite_1
lda TileStore+TS_VBUFF_ADDR_2,y
sta sprite_2
ldx TileStore+TS_TILE_ADDR,y
; line 0
lda tiledata+{0*TILE_DATA_SPAN},x
and [sprite_2]
ora (sprite_2)
and [sprite_1]
ora (sprite_1)
and [sprite_0]
ora (sprite_0)
sta tmp_sprite_data+{0*4}
ldy #{0*SPRITE_PLANE_SPAN}+2
lda tiledata+{0*TILE_DATA_SPAN}+2,x
and [sprite_2],y
ora (sprite_2),y
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{0*4}+2
; line 1
ldy #{1*SPRITE_PLANE_SPAN}
lda tiledata+{1*TILE_DATA_SPAN},x
and [sprite_2],y
ora (sprite_2),y
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{1*4}
ldy #{1*SPRITE_PLANE_SPAN}+2
lda tiledata+{1*TILE_DATA_SPAN}+2,x
and [sprite_2],y
ora (sprite_2),y
and [sprite_1],y
ora (sprite_1),y
and [sprite_0],y
ora (sprite_0),y
sta tmp_sprite_data+{1*4}+2
rts

View File

@ -0,0 +1,43 @@
; A collection of tile blitters used in the dirty renderer. These renderers copy data directly
; to the graphics screen. Also, because the dirty render assumes that the screen is not moving,
; there is no support for two layer tiles.
; Address table of the rendering functions
DirtyTileProcs dw _TBDirtyTile_00,_TBDirtyTile_0H,_TBDirtyTile_V0,_TBDirtyTile_VH
; Normal and horizontally flipped tiles. The horizontal variant is selected by choosing
; and appropriate value for the X register, so these can share the same code.
;
; B = Bank 01
; X = address of tile data
; Y = screen address
_TBDirtyTile_00
_TBDirtyTile_0H
]line equ 0
lup 8
ldal tiledata+{]line*4},x
sta: $0000+{]line*160},y
ldal tiledata+{]line*4}+2,x
sta: $0002+{]line*160},y
]line equ ]line+1
--^
rts
; Vertically flipped tile renderers
;
; B = Bank 01
; X = address of tile data
; Y = screen address
_TBDirtyTile_V0
_TBDirtyTile_VH
]line equ 7
]dest equ 0
lup 8
ldal tiledata+{]line*4},x
sta: $0000+{]dest*160},y
ldal tiledata+{]line*4}+2,x
sta: $0002+{]dest*160},y
]line equ ]line-1
]dest equ ]dest+1
--^
rts

151
src/tiles/FastRenderer.s Normal file
View File

@ -0,0 +1,151 @@
; If the engine mode has the second background layer disabled, we take advantage of that to
; be more efficient in our rendering. Basically, without the second layer, there is no need
; to use the tile mask information.
;
; If there are no sprites, then we copy the tile data into the code field as fast as possible.
; If there are sprites, then the sprite data is flattened and stored into a direct page buffer
; and then copied into the code field
_RenderTileFast
ldx TileStore+TS_VBUFF_ADDR_COUNT,y ; How many sprites are on this tile?
beq NoSpritesFast ; This is faster if there are no sprites
lda TileStore+TS_TILE_ID,y ; Check if the tile has
jmp (fast_dispatch,x)
fast_dispatch
da NoSpritesFast
da OneSpriteFast
da TwoSpritesFast
da ThreeSpritesFast
da FourSpritesFast
NoSpritesFast
tyx
lda TileStore+TS_CODE_ADDR_HIGH,x ; load the bank of the target code field line
pha ; and put on the stack for later. Has addl bank in high byte.
ldy TileStore+TS_CODE_ADDR_LOW,x ; load the address of the code field
lda TileStore+TS_TILE_ADDR,x ; load the address of this tile's data (pre-calculated)
plb ; set the code field bank
jmp (TileStore+TS_BASE_TILE_DISP,x) ; go to the tile copy routine (just basics)
; The TS_BASE_TILE_DISP routines will come from this table when ENGINE_MODE_TWO_LAYER and
; ENGINE_MODE_DYN_TILES are both off.
FastTileProcs dw _TBCopyDataFast,_TBCopyDataFast,_TBCopyDataVFast,_TBCopyDataVFast
; Where there are sprites involved, the first step is to call a routine to copy the
; tile data into a temporary buffer. Then the sprite data is merged and placed into
; the code field.
OneSpriteFast
tyx
lda TileStore+TS_TILE_ADDR,y
per :-1
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
:
ldx TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later.
lda TileStore+TS_CODE_ADDR_LOW,y
tay
plb ; set the code field bank
]line equ 0
lup 8
lda blttmp+{]line*4}
andl spritemask+{]line*SPRITE_PLANE_SPAN},x
oral spritedata+{]line*SPRITE_PLANE_SPAN},x
sta: $0004+{]line*$1000},y
lda blttmp+{]line*4}+2
andl spritemask+{]line*SPRITE_PLANE_SPAN}+2,x
oral spritedata+{]line*SPRITE_PLANE_SPAN}+2,x
sta: $0001+{]line*$1000},y
]line equ ]line+1
--^
rts
TwoSpritesFast
tyx
lda TileStore+TS_TILE_ADDR,y
per :-1
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
:
lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
sta spritedata_0
sta spritemask_0
lda TileStore+TS_VBUFF_ADDR_1,y ; address of the sprite data
sta spritedata_1
sta spritemask_1
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later.
lda TileStore+TS_CODE_ADDR_LOW,y
tay
plb ; set the code field bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda blttmp+{]line*4}
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0004+{]line*$1000},x
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda blttmp+{]line*4}+2
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0001+{]line*$1000},x
]line equ ]line+1
--^
rts
ThreeSpritesFast
FourSpritesFast
tyx
lda TileStore+TS_TILE_ADDR,y
per :-1
jmp (TileStore+TS_BASE_TILE_COPY,x) ; Copy the tile data to the temporary buffer
:
lda TileStore+TS_VBUFF_ADDR_0,y ; address of the sprite data
sta spritedata_0
sta spritemask_0
lda TileStore+TS_VBUFF_ADDR_1,y
sta spritedata_1
sta spritemask_1
lda TileStore+TS_VBUFF_ADDR_2,y
sta spritedata_2
sta spritemask_2
lda TileStore+TS_CODE_ADDR_HIGH,y ; load the bank of the target code field line
pha ; and put on the stack for later.
lda TileStore+TS_CODE_ADDR_LOW,y
tay
plb ; set the code field bank
]line equ 0
lup 8
ldy #{]line*SPRITE_PLANE_SPAN}
lda blttmp+{]line*4}
andl [spritemask_2],y
oral [spritedata_2],y
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0004+{]line*$1000},x
ldy #{]line*SPRITE_PLANE_SPAN}+2
lda blttmp+{]line*4}+2
andl [spritemask_2],y
oral [spritedata_2],y
andl [spritemask_1],y
oral [spritedata_1],y
andl [spritemask_0],y
oral [spritedata_0],y
sta: $0001+{]line*$1000},x
]line equ ]line+1
--^
rts

93
src/tiles/TileProcs.s Normal file
View File

@ -0,0 +1,93 @@
; A simple helper function that fills in all of the opcodes of a tile with the PEA opcode. This is
; a separate functino because we can often just update the tile data if we know the opcodes are already
; set. When we have to fill the opcodes, this function is used
_TBFillPEAOpcode
sep #$20
lda #$F4
sta: $0000,y
sta: $0003,y
sta $1000,y
sta $1003,y
sta $2000,y
sta $2003,y
sta $3000,y
sta $3003,y
sta $4000,y
sta $4003,y
sta $5000,y
sta $5003,y
sta $6000,y
sta $6003,y
sta $7000,y
sta $7003,y
rep #$20
rts
; Copy tile data into the direct page compositing buffer. The main reason to do this in full passes is
; because we can avoid needing to use both the X and Y registers during the compositing process and
; reserve Y to hold the code field address.
;
; Also, we can get away with not setting the bank register, this is a wash in terms of speed, but results
; in simpler, more composable subroutines
_TBCopyTileDataAndMaskToCBuff
jsr _TBCopyTileDataToCBuff
jmp _TBCopyTileMaskToCBuff
_TBCopyTileDataAndMaskToCBuffV
jsr _TBCopyTileDataToCBuffV
jmp _TBCopyTileMaskToCBuffV
_TBCopyTileDataToCBuff
]line equ 0
lup 8
ldal tiledata+{]line*4},x
sta blttmp+{]line*4}
ldal tiledata+{]line*4}+2,x
sta blttmp+{]line*4}+2
]line equ ]line+1
--^
rts
_TBCopyTileDataToCBuffV
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4},x
sta blttmp+{]dest*4}
ldal tiledata+{]src*4}+2,x
sta blttmp+{]dest*4}+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts
; Copy tile mask data into the direct page compositing buffer.
_TBCopyTileMaskToCBuff
]line equ 0
lup 8
ldal tiledata+{]line*4}+32,x
sta blttmp+{]line*4}+32
ldal tiledata+{]line*4}+32+2,x
sta blttmp+{]line*4}+32+2
]line equ ]line+1
--^
rts
_TBCopyTileMaskToCBuffV
]src equ 7
]dest equ 0
lup 8
ldal tiledata+{]src*4}+32,x
sta blttmp+{]dest*4}+32
ldal tiledata+{]src*4}+32+2,x
sta blttmp+{]dest*4}+32+2
]src equ ]src-1
]dest equ ]dest+1
--^
rts