diff --git a/_FileInformation.txt b/_FileInformation.txt index 74bd862..07f7fc1 100644 --- a/_FileInformation.txt +++ b/_FileInformation.txt @@ -1,2 +1,4 @@ GTETool.SHK=Type(E0),AuxType(8002),VersionCreate(00),MinVersion(87),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) Tool160.SHK=Type(E0),AuxType(8002),VersionCreate(00),MinVersion(B8),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) +CCode.SHK=Type(E0),AuxType(8002),VersionCreate(00),MinVersion(9C),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) +ChrisV.SHK=Type(E0),AuxType(8002),VersionCreate(00),MinVersion(BC),Access(E3),FolderInfo1(000000000000000000000000000000000000),FolderInfo2(000000000000000000000000000000000000) diff --git a/demos/kfest-2022/demo-5/App.Main.s b/demos/kfest-2022/demo-5/App.Main.s index 2a68015..ea64999 100644 --- a/demos/kfest-2022/demo-5/App.Main.s +++ b/demos/kfest-2022/demo-5/App.Main.s @@ -81,6 +81,8 @@ SpriteCount equ 50 ; Load a tileset + pea #0 + pea #511 pea #^tiledata pea #tiledata _GTELoadTileSet @@ -131,28 +133,31 @@ HERO_FRAME_4 equ HERO_SIZE+151 HERO_VBUFF_4 equ VBUFF_SPRITE_START+3*VBUFF_SPRITE_STEP HERO_SLOT equ 1 - pea HERO_FRAME_1 - pea HERO_VBUFF_1 - _GTECreateSpriteStamp +; pea HERO_FRAME_1 +; pea HERO_VBUFF_1 +; _GTECreateSpriteStamp - pea HERO_FRAME_2 - pea HERO_VBUFF_2 - _GTECreateSpriteStamp +; pea HERO_FRAME_2 +; pea HERO_VBUFF_2 +; _GTECreateSpriteStamp - pea HERO_FRAME_3 - pea HERO_VBUFF_3 - _GTECreateSpriteStamp +; pea HERO_FRAME_3 +; pea HERO_VBUFF_3 +; _GTECreateSpriteStamp - pea HERO_FRAME_4 - pea HERO_VBUFF_4 - _GTECreateSpriteStamp +; pea HERO_FRAME_4 +; pea HERO_VBUFF_4 +; _GTECreateSpriteStamp - pea HERO_SLOT ; Put the player in slot 1 - pea HERO_FLAGS - pea HERO_VBUFF_1 ; and use this stamp - pei PlayerX - pei PlayerY - _GTEAddSprite +; pea HERO_SLOT ; Put the player in slot 1 +; pea HERO_FLAGS +; pea HERO_VBUFF_1 ; and use this stamp +; pei PlayerX +; pei PlayerY +; _GTEAddSprite + + pea $0000 + _GTERender EvtLoop pha @@ -202,13 +207,13 @@ do_render jsr ApplyCollisions ; Check if we run into things jsr UpdateCameraPos ; Moves the screen - pea HERO_SLOT - pei PlayerX - pei PlayerY - _GTEMoveSprite ; Move the sprite to this local position +; pea HERO_SLOT +; pei PlayerX +; pei PlayerY +; _GTEMoveSprite ; Move the sprite to this local position - pea $0000 - _GTERender +; pea $0000 +; _GTERender ; Update the performance counters @@ -216,11 +221,11 @@ do_render pha _GTEGetSeconds pla - cmp OldOneSecondCounter - beq :noudt - sta OldOneSecondCounter - jsr UdtOverlay - stz frameCount +; cmp OldOneSecondCounter +; beq :noudt +; sta OldOneSecondCounter +; jsr UdtOverlay +; stz frameCount :noudt brl EvtLoop @@ -468,10 +473,10 @@ ApplyCollisions tax :frame - pea HERO_SLOT - pei LastHFlip - phx - _GTEUpdateSprite +; pea HERO_SLOT +; pei LastHFlip +; phx +; _GTEUpdateSprite rts diff --git a/macros/GTE.Macs.s b/macros/GTE.Macs.s index f217d4f..b24a999 100644 --- a/macros/GTE.Macs.s +++ b/macros/GTE.Macs.s @@ -157,6 +157,13 @@ RENDER_BG1_HORZ_OFFSET equ $0002 RENDER_BG1_VERT_OFFSET equ $0004 RENDER_BG1_ROTATION equ $0008 RENDER_PER_SCANLINE equ $0010 +RENDER_WITH_SHADOWING equ $0020 + +; Overlay flags +OVERLAY_MASKED equ $0000 ; Overlay has a mask, so the background must be draw first +OVERLAY_SOLID equ $8000 ; Overlay covers the scan line and is fully opaque +OVERLAY_ABOVE equ $0000 ; Overlay is drawn above scanline sprites +OVERLAY_BELOW equ $4000 ; Overlay is drawn below scanline sprites ; GetAddress table IDs scanlineHorzOffset equ $0001 diff --git a/src/Defs.s b/src/Defs.s index 2a2d6bd..cbdbb87 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -174,6 +174,20 @@ RENDER_BG1_HORZ_OFFSET equ $0002 RENDER_BG1_VERT_OFFSET equ $0004 RENDER_BG1_ROTATION equ $0008 RENDER_PER_SCANLINE equ $0010 +RENDER_WITH_SHADOWING equ $0020 + +; Overlay flags +OVERLAY_MASKED equ $0000 ; Overlay has a mask, so the background must be draw first +OVERLAY_SOLID equ $8000 ; Overlay covers the scan line and is fully opaque +OVERLAY_ABOVE equ $0000 ; Overlay is drawn above scanline sprites +OVERLAY_BELOW equ $4000 ; Overlay is drawn below scanline sprites + +OVERLAY_BASE equ 2 ; 2 bytes for the number of overlays +OVERLAY_REC_SIZE equ 10 ; Size of an overlay record (10 bytes) +OVERLAY_FLAGS equ {OVERLAY_BASE+0} +OVERLAY_TOP equ {OVERLAY_BASE+2} +OVERLAY_BOTTOM equ {OVERLAY_BASE+4} +OVERLAY_PROC equ {OVERLAY_BASE+6} ; DirtyBits definitions DIRTY_BIT_BG0_X equ $0001 @@ -185,7 +199,7 @@ DIRTY_BIT_BG1_REFRESH equ $0020 DIRTY_BIT_SPRITE_ARRAY equ $0040 ; GetAddress table IDs -scanlineHorzOffset equ $0001 ; table of 416 wors, a double-array of scanline offset values. Must be 0, 163 +scanlineHorzOffset equ $0001 ; table of 416 words, a double-array of scanline offset values. Must be 0, 163 ; Script definition YIELD equ $8000 diff --git a/src/Render.s b/src/Render.s index 289fd7f..5ed15af 100644 --- a/src/Render.s +++ b/src/Render.s @@ -8,8 +8,15 @@ ; and internal data structure to properly render the play field. Then the update pipeline is ; executed. ; -; Everything is composited into the tiles in the playfield and then the screen is rendered in -; a single pass. +; There are two major rendering modes: a composited mode and a scanline mode. The composited mode +; will render all of the sprites into the playfield tiles, and then perform a single blit to update +; the entire playfield. The scanline mode utilized shadowing and blits the background scanlines +; on sprite lines first, then draws the sprites and finally exposes the updated scanlines. +; +; The composited mode has the advantages of being able to render sprites behind tile data as well +; as avoiding most overdraw. The scanline mode is able to draw sprites correctly even when scanline +; effect are used on the background and has lower overhead, which can make it faster in some cases, +; even with the additional overdraw. ; ; TODO -- actually check the dirty bits and be selective on what gets updated. For example, if ; only the Y position changes, then we should only need to set new values on the @@ -32,13 +39,6 @@ _Render stz SpriteRemovedFlag ; If we remove a sprite, then we need to flag a rebuild for the next frame -; If we are doing per-scanline rendering, use the alternate renderer - - lda #RENDER_PER_SCANLINE - bit RenderFlags - beq *+5 - jmp _RenderScanlines ; Do the scanline-based renderer - jsr _ApplyBG0YPos ; Set stack addresses for the virtual lines to the physical screen lda #RENDER_BG1_ROTATION @@ -61,6 +61,7 @@ _Render jsr _ApplyTiles ; This function actually draws the new tiles into the code field jsr _ApplyBG0XPos ; Patch the code field instructions with exit BRA opcode + lda #RENDER_BG1_ROTATION bit RenderFlags bne :skip_bg1_x @@ -79,8 +80,8 @@ _Render ; optimization that can be done here is that the lines can be rendered in any order ; since it is not shown on-screen yet. - ldx Overlays+2 ; Blit the full virtual buffer to the screen - ldy Overlays+4 + ldx Overlays+OVERLAY_TOP ; Blit the full virtual buffer to the screen + ldy Overlays+OVERLAY_BOTTOM jsr _BltRange ; Turn shadowing back on @@ -89,14 +90,9 @@ _Render ; Now render all of the remaining lines in top-to-bottom (or bottom-to-top) order - ldx #0 - ldy Overlays+2 - beq :skip - jsr _BltRange -:skip jsr _DoOverlay - ldx Overlays+4 + ldx Overlays+OVERLAY_BOTTOM cpx ScreenHeight beq :done ldy ScreenHeight @@ -138,14 +134,14 @@ _Render rts _DoOverlay - lda Overlays+6 + lda Overlays+OVERLAY_PROC stal :disp+1 - lda Overlays+7 + lda Overlays+OVERLAY_PROC+1 stal :disp+2 lda ScreenY0 ; pass the address of the first line of the overlay clc - adc Overlays+2 + adc Overlays+OVERLAY_TOP asl tax lda ScreenAddr,x @@ -312,3 +308,157 @@ _ApplyDirtyTiles stz DirtyTileCount ; Reset the dirty tile count rts +; This rendering mode turns off shadowing and draws all of the relevant background lines and then +; draws sprites on top of the background before turning shadowing on and exposing the lines to the +; screen. Even though entire lines are drawn twice, it's so efficient that it is often faster +; than using all of the logic to draw/erase tiles in the TileBuffer, even though less visible words +; are touched. +; +; This mode is also necessary if per-scanling rendering it used since spritge would not look correct +; if each line had independent offsets. +_RenderWithShadowing + sta RenderFlags + jsr _DoTimers ; Run any pending timer tasks + + jsr _ApplyBG0YPos ; Set stack addresses for the virtual lines to the physical screen + jsr _ApplyBG1YPos ; Set the y-register values of the blitter + +; _ApplyBG0Xpos need to be split because we have to set the offsets, then draw in any updated tiles, and +; finally patch out the code field. Right now, the BRA operand is getting overwritten by tile data. + + jsr _ApplyBG0XPosPre + jsr _ApplyBG1XPosPre + + jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles + jsr _UpdateBG1TileMap ; that need to be updated in the code field + + jsr _ApplyTiles ; This function actually draws the new tiles into the code field + + jsr _ApplyBG0XPos ; Patch the code field instructions with exit BRA opcode + jsr _ApplyBG1XPos ; Update the direct page value based on the horizontal position + +; At this point, everything in the background has been rendered into the code field. Next, we need +; to create priority lists of scanline ranges. +; +; The objects that need to be reasoned about are +; +; 1. Sprites +; 2. Overlays +; a. Solid High Priority +; b. Solid Low Priority +; c. Masked High Priority +; d. Masked Low Priority +; 3. Background +; +; Notes: +; +; A High Priority overlay is rendered above the sprites +; A Low Priority overlay is rendered below the sprites +; A Solid High Priority overlay obscured everything and if the only thing drawn on the scanline +; +; The order of draw oprations is: +; +; 1. Turn off shadowing +; 2. Draw the background for scanlines with (Sprites OR a Masked Low Priority overlay) AND NOT a Solid Low Priority overlay +; 3. Draw the Solid Low Priority overlays +; 4. Draw the Sprites +; 5. Draw the Masked Low Priority overlays +; 6. Turn on shadowing +; 7. Draw, in top-to-bottom order +; a. Background lines not drawn yet +; b. PEI Slam lines with (Sprites OR a Masked Low Priority Overlay) AND NOT a High Priority overlay +; c. High Priority overlays +; +; The work of this routine is to quickly build a sorted list of scanline ranges that can the appropriate +; sub-renderer + +; jsr BuildShadowSegments +; +; The trick is to create a bit-field mapping for the different actions to define + + lda Overlays + beq :no_ovrly + + jsr _ShadowOff + +; Shadowing is turned off. Render all of the scan lines that need a second pass. One +; optimization that can be done here is that the lines can be rendered in any order +; since it is not shown on-screen yet. + + ldx Overlays+OVERLAY_TOP ; Blit the full virtual buffer to the screen + ldy Overlays+OVERLAY_BOTTOM + jsr _BltRange + +; Turn shadowing back on + + jsr _ShadowOn + +; Now render all of the remaining lines in top-to-bottom (or bottom-to-top) order + + ldx #0 + ldy Overlays+OVERLAY_TOP + beq :skip + jsr _BltRange +:skip + jsr _DoOverlay + + ldx Overlays+OVERLAY_BOTTOM + cpx ScreenHeight + beq :done + ldy ScreenHeight + jsr _BltRange + bra :done + +:no_ovrly + ldx #0 ; Blit the full virtual buffer to the screen + ldy ScreenHeight + jsr _BltRange +:done + + ldx #0 + ldy ScreenHeight + jsr _BltSCB + + lda StartYMod208 ; Restore the fields back to their original state + ldx ScreenHeight + jsr _RestoreBG0Opcodes + + lda StartY + sta OldStartY + lda StartX + sta OldStartX + + lda BG1StartY + sta OldBG1StartY + lda BG1StartX + sta OldBG1StartX + + stz DirtyBits + stz LastRender ; Mark that a full render was just performed + + lda SpriteRemovedFlag ; If any sprite was removed, set the rebuild flag + beq :no_removal + lda #DIRTY_BIT_SPRITE_ARRAY + sta DirtyBits +:no_removal + rts + +; Look at the overlay list and the sprite list and figure out which scaneline ranges need to be +; blitted in what order. We try to build all of the scan line segments lists because that +; saves the work of re-scanning the lists. +; +; The semgent list definitions are: +; +; BLIT_W_SHADOW_OF +BuildShadowSegments +; ldx _SortedHead +; bmi :no_sprite +;:loop +; lda _Sprites+CLIP_TOP,x +; lda _Sprites+SORTED_NEXT,x +; tax +; bpl :loop +; +; lda #0 ; Start at the top of the + + rts diff --git a/src/Sprite.s b/src/Sprite.s index c76888e..d8501a5 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -223,10 +223,11 @@ _DoPhase1 trb SpriteMap lda #SPRITE_STATUS_EMPTY ; Mark as empty so no error if we try to Add a sprite here again sta _Sprites+SPRITE_STATUS,y - jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done - + tyx + jsr _DeleteSprite ; Remove sprite from linked list + txy ; Restore y-register :hidden - jmp _ClearSpriteFromTileStore + jmp _ClearSpriteFromTileStore ; Clear the tile flags, add to the dirty tile list and done :no_clear @@ -406,7 +407,8 @@ _AddSprite lda _SpriteBits,x ; Get the bit flag for this sprite slot tsb SpriteMap ; Mark it in the sprite map bit field - jmp _PrecalcAllSpriteInfo ; Cache sprite property values + jsr _PrecalcAllSpriteInfo ; Cache sprite property values + jmp _InsertSprite ; Insert it into the sorted list ; _SortSprite ; @@ -419,9 +421,9 @@ _AddSprite ; ; X = current sprite index ; -; The sorting strategy is to +; The sorting strategy is to ; -; a) check if the corrent slot's y-pos is greater than the next item. If yes, then search forward +; a) check if the current slot's y-pos is greater than the next item. If yes, then search forward ; b) check if the current slot's y-pos is less than the prev item. If yes, then search in reverse ; c) sprite is in the correct location ; @@ -1022,7 +1024,7 @@ _RemoveSprite ora #SPRITE_STATUS_REMOVED sta _Sprites+SPRITE_STATUS,x - rts + rts ; The _DeleteSprite call is made in _DoPhase1 during the next render ; Update the sprite's flags. We do not allow the size of a sprite to be changed. That requires ; the sprite to be removed and re-added. @@ -1112,4 +1114,4 @@ _MoveSprite sta _Sprites+SPRITE_STATUS,x jsr _PrecalcSpritePos ; Can be specialized to only update (x,y) values - jmp _SortSprite ; UPdate the sprite's sorted position + jmp _SortSprite ; Update the sprite's sorted position diff --git a/src/Tiles.s b/src/Tiles.s index 741720e..6acf6f2 100644 --- a/src/Tiles.s +++ b/src/Tiles.s @@ -445,16 +445,16 @@ _SetTileProcs ; TileProcTables ; ; Tables of tuples used to populate the K_TS_* dispatch arrays for different combinations. This is -; easier to maintain than a bunch of conditional code. Each etry hold three addresses. +; easier to maintain than a bunch of conditional code. Each entry holds three addresses. ; ; First address: Draw a tile directly into the code buffer (no sprites) ; Second address: Draw a tile merged with sprite data from the direct page ; Third address: Specialize routine to draw a tile merged with one sprite ; ; There are unique tuples of routines for all of the different combinations of tile properties -; and engine modes. This is an extesive number of combinations, but it simplified the development -; and maintainence of the rendering subroutines. Also, the difference subroutines can be written -; in any way and can make use of their on subroutines to reduce code size. +; and engine modes. This is an extensive number of combinations, but it simplifies the development +; and maintainence of the rendering subroutines. Also, the different subroutines can be written +; in any way and can make use of their own subroutines to reduce code size. ; ; Properties: ; @@ -506,6 +506,12 @@ DynUnder dw CopyDynamicTile,DynamicUnder,OneSpriteDynamicUnder ; the TILE_SOLID_BIT hint bit can be set to indicate that a tile ; has no transparency. This allows one of the faster routines ; to be selected from the other Proc tables +; +; FUTURE: An optimization that can be done is to have the snippets +; code layout fixed based on the EngineFlags and then the Two Layer +; routines should only need to update the DATA and MASK operands in +; the snippet at a fixed location rather than rebuild the ~20 bytes +; of data. TwoLyrProcs TwoLyrOverZA dw Tile0TwoLyr,SpriteOver0TwoLyr,OneSpriteOver0TwoLyr TwoLyrOverZV dw Tile0TwoLyr,SpriteOver0TwoLyr,OneSpriteOver0TwoLyr diff --git a/src/Tool.s b/src/Tool.s index 741f674..2fc7ec1 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -293,7 +293,15 @@ _TSRender _TSEntry lda :flags,s + bit #RENDER_WITH_SHADOWING + beq :no_shadowing + jsr _RenderWithShadowing + bra :done + +:no_shadowing jsr _Render + +:done _TSExit #0;#2 @@ -696,14 +704,15 @@ _TSSetOverlay lda #1 sta Overlays + stz Overlays+OVERLAY_FLAGS lda :top,s - sta Overlays+2 + sta Overlays+OVERLAY_TOP lda :bottom,s - sta Overlays+4 + sta Overlays+OVERLAY_BOTTOM lda :proc,s - sta Overlays+6 + sta Overlays+OVERLAY_PROC lda :proc+2,s - sta Overlays+8 + sta Overlays+OVERLAY_PROC+2 _TSExit #0;#8 diff --git a/src/static/TileStore.s b/src/static/TileStore.s index 79daea1..3170f7c 100644 --- a/src/static/TileStore.s +++ b/src/static/TileStore.s @@ -391,7 +391,7 @@ Timers ENT ds TIMER_REC_SIZE*MAX_TIMERS Overlays ENT dw 0 ; count - ds 8 ; only support one or now (start_line, end_line, function call) + ds 10 ; only support one for now (flags, start_line, end_line, function call) ; From the IIgs ref DefaultPalette ENT