diff --git a/demos/kfest-2022/demo-5/App.Main.s b/demos/kfest-2022/demo-5/App.Main.s index 53b2cd7..6a33765 100644 --- a/demos/kfest-2022/demo-5/App.Main.s +++ b/demos/kfest-2022/demo-5/App.Main.s @@ -62,8 +62,8 @@ SpriteCount equ 50 lda #ENGINE_MODE_USER_TOOL ; Engine in Fast Mode jsr GTEStartUp ; Load and install the GTE User Tool - jsr SoundStartUp - jsr StartMusic +; jsr SoundStartUp +; jsr StartMusic ; Initialize local variables @@ -183,7 +183,7 @@ HERO_SLOT equ 1 pei PlayerY _GTEAddSprite - pea $0000 + pea #RENDER_WITH_SHADOWING _GTERender EvtLoop @@ -272,7 +272,7 @@ do_render ; Exit code Exit - jsr SoundShutDown +; jsr SoundShutDown _GTEShutDown Quit _QuitGS qtRec diff --git a/macros/CORE.MACS.S b/macros/CORE.MACS.S index 29fe055..e939015 100644 --- a/macros/CORE.MACS.S +++ b/macros/CORE.MACS.S @@ -3,7 +3,7 @@ **************************************** _Err mac bcc NoErr - do ]0 ; (DO if true) + do ]0 ; (DO if true) Mu jsr PgmDeath ; this is conditionally compiled if str ]1 ; we pass in an error statement else ; (ELSE) diff --git a/src/Defs.s b/src/Defs.s index cbdbb87..884fcda 100644 --- a/src/Defs.s +++ b/src/Defs.s @@ -290,5 +290,13 @@ StartXMod164Arr EXT LastPatchOffsetArr EXT _SortedHead EXT +_ShadowListCount EXT +_ShadowListTop EXT +_ShadowListBottom EXT +_DirectListCount EXT +_DirectListTop EXT +_DirectListBottom EXT + + ; Tool error codes NO_TIMERS_AVAILABLE equ 10 diff --git a/src/Render.s b/src/Render.s index 5ed15af..fbcf56b 100644 --- a/src/Render.s +++ b/src/Render.s @@ -314,7 +314,7 @@ _ApplyDirtyTiles ; than using all of the logic to draw/erase tiles in the TileBuffer, even though less visible words ; are touched. ; -; This mode is also necessary if per-scanling rendering it used since spritge would not look correct +; This mode is also necessary if per-scanling rendering it used since sprites would not look correct ; if each line had independent offsets. _RenderWithShadowing sta RenderFlags @@ -330,7 +330,7 @@ _RenderWithShadowing jsr _ApplyBG1XPosPre jsr _UpdateBG0TileMap ; and the tile maps. These subroutines build up a list of tiles - jsr _UpdateBG1TileMap ; that need to be updated in the code field +; jsr _UpdateBG1TileMap ; that need to be updated in the code field jsr _ApplyTiles ; This function actually draws the new tiles into the code field @@ -339,6 +339,17 @@ _RenderWithShadowing ; At this point, everything in the background has been rendered into the code field. Next, we need ; to create priority lists of scanline ranges. + + jsr _BuildShadowList ; Create the rages based on the sorted sprite y-values + jsr _ComplementList ; Create the complement to identify non-sprite scanlines + + jsr _ShadowOff ; Turn off shadowing and draw all the scanlines with sprites on them + jsr _DrawShadowList + jsr _DrawDirectSprites ; Draw the sprites directly to the Bank $01 graphics buffer (skipping the render-to-tile step) + + jsr _ShadowOn ; Turn shadowing back on + jsr _DrawComplementList ; Alternate drawing scanlines and PEI slam to expose the full fram + ; ; The objects that need to be reasoned about are ; @@ -376,48 +387,50 @@ _RenderWithShadowing ; ; The trick is to create a bit-field mapping for the different actions to define - lda Overlays - beq :no_ovrly - - jsr _ShadowOff +; lda Overlays +; beq :no_ovrly +; +; jsr _ShadowOff ; Shadowing is turned off. Render all of the scan lines that need a second pass. One ; optimization that can be done here is that the lines can be rendered in any order ; since it is not shown on-screen yet. - ldx Overlays+OVERLAY_TOP ; Blit the full virtual buffer to the screen - ldy Overlays+OVERLAY_BOTTOM - jsr _BltRange +; ldx Overlays+OVERLAY_TOP ; Blit the full virtual buffer to the screen +; ldy Overlays+OVERLAY_BOTTOM +; jsr _BltRange ; Turn shadowing back on - jsr _ShadowOn +; jsr _ShadowOn ; Now render all of the remaining lines in top-to-bottom (or bottom-to-top) order - ldx #0 - ldy Overlays+OVERLAY_TOP - beq :skip - jsr _BltRange -:skip - jsr _DoOverlay +; ldx #0 +; ldy Overlays+OVERLAY_TOP +; beq :skip +; jsr _BltRange +;:skip +; jsr _DoOverlay - ldx Overlays+OVERLAY_BOTTOM - cpx ScreenHeight - beq :done - ldy ScreenHeight - jsr _BltRange - bra :done +; ldx Overlays+OVERLAY_BOTTOM +; cpx ScreenHeight +; beq :done +; ldy ScreenHeight +; jsr _BltRange +; bra :done -:no_ovrly - ldx #0 ; Blit the full virtual buffer to the screen - ldy ScreenHeight - jsr _BltRange -:done +;:no_ovrly - ldx #0 - ldy ScreenHeight - jsr _BltSCB +; ldx #0 ; Blit the full virtual buffer to the screen +; ldy ScreenHeight +; jsr _BltRange + +;:done + + ; ldx #0 + ; ldy ScreenHeight + ; jsr _BltSCB lda StartYMod208 ; Restore the fields back to their original state ldx ScreenHeight @@ -443,7 +456,7 @@ _RenderWithShadowing :no_removal rts -; Look at the overlay list and the sprite list and figure out which scaneline ranges need to be +; Look at the overlay list and the sprite list and figure out which scanline ranges need to be ; blitted in what order. We try to build all of the scan line segments lists because that ; saves the work of re-scanning the lists. ; @@ -462,3 +475,186 @@ BuildShadowSegments ; lda #0 ; Start at the top of the rts + +; Function to iterate through the sprite list and build a merged scanline list of sprites. Once this is +; done, we re-scan the list to build the complement for scanlines that do not need shadowing. +_BuildShadowList + + ldy #0 ; This is the index into the list of shadow segments + + ldx _SortedHead + bmi :empty + bra :insert + +; Start of loop +:advance + iny + iny + +:insert + lda _Sprites+SPRITE_CLIP_TOP,x ; Load the sprite's top line + sta _ShadowListTop,y ; Set the top entry of the list to the sprite top + + lda _Sprites+SPRITE_CLIP_BOTTOM,x ; Optimistically set the end of the segment to the bottom of this sprite + inc + +:replace + sta _ShadowListBottom,y +:skip + lda _Sprites+SORTED_NEXT,x ; If there another sprite in the list? + bmi :no_more_sprites ; If not, we can finish up + + tax + lda _ShadowListBottom,y ; If the bottom of the current sprite is _less than_ the top of the next + cmp _Sprites+SPRITE_CLIP_TOP,x ; sprite, then there is a gap and we create a new entry + bcc :advance + + lda _Sprites+SPRITE_CLIP_BOTTOM,x ; Get the bottom value of the next sprite. + inc + cmp _ShadowListBottom,y ; If it extends the segment then replace the value, otherwise skip + bcc :skip + bra :replace + +:no_more_sprites + iny ; Set the list count to N * 2 + iny +:empty + sty _ShadowListCount + rts + +; Run through the shadow list and make a complementary list, e.g +; [[0, 7], [12, 19]] -> [[7, 12], [19, end]] +; [[2, 10], [20, 40]] -> [[0, 2], [10, 20], [40, end]] + +_ComplementList + ldy #0 + tyx + + lda _ShadowListCount + beq :empty_list + + lda _ShadowListTop + beq :loop + + stz _DirectListTop + sta _DirectListBottom + + inx + inx + +:loop + lda _ShadowListBottom,y + sta _DirectListTop,x + + iny ; Move to the next shadow list record + iny + cpy _ShadowListCount ; Are there any other segments to process + bcs :eol + + lda _ShadowListTop,y + sta _DirectListBottom,x ; Finish the direct list entry + + inx + inx + bra :loop + +:eol + lda ScreenHeight + sta _DirectListBottom,x + + inx ; Set the count to N * 2 + inx + stx _DirectListCount + rts + +:empty_list + lda #1 + sta _DirectListCount + stz _DirectListTop + lda ScreenHeight + sta _DirectListBottom + rts + +; Iterate through the shadow list and call _BltRange on each +_DrawShadowList + ldx #0 + bra :start + +:loop + phx ; Save the index + lda _ShadowListTop,x + ldy _ShadowListBottom,x + tax + jsr _BltRange + + plx + inx + inx +:start + cpx _ShadowListCount + bcc :loop + + rts + +; Run through the list of sprites that are not IS_OFFSCREEN and draw them directly to the graphics screen. We can use +; compiled sprites here, with limitations. +_DrawDirectSprites + ldx _SortedHead + bmi :empty + +:loop + phx + jsr _DrawStampToScreen + plx + + lda _Sprites+SORTED_NEXT,x ; If there another sprite in the list? + tax + bpl :loop + +:empty + rts + +; Run through the complement list and alternate between calling _PEISlam and _BltRange to show to full screen +_DrawComplementList + + ldx #0 + + lda _DirectListCount ; Skip empty lists + beq :out + + lda _DirectListTop ; If the first segment starts at 0, begin with _BltRange + beq :blt_range + + lda #0 + bra :pei_first + +:blt_range + phx + lda _DirectListTop,x + ldy _DirectListBottom,x + tax + jsr _BltRange + plx + + lda _DirectListBottom,x ; Grab a copy of the bottom of the blit range + inx + inx ; Advance to the next entry + cpx _DirectListCount + bcs :last ; Done, so check if there is any remaining part of the screen to slam + +:pei_first + phx + ldy _DirectListTop,x + tax + jsr _PEISlam + plx + bra :blt_range + +:last + cmp ScreenHeight ; If the bottom on the last segment didn't come to the bottom of the + bcs :out ; screen, then expose that range + tax + ldy ScreenHeight + jsr _PEISlam +:out + rts \ No newline at end of file diff --git a/src/Sprite.s b/src/Sprite.s index aca2037..db02ce5 100644 --- a/src/Sprite.s +++ b/src/Sprite.s @@ -904,7 +904,7 @@ _CacheSpriteBanks rts -; Precalculate some cached values for a sprite. These are *only* to make other part of code, +; Precalculate some cached values for a sprite. These are *only* to make other parts of code, ; specifically the draw/erase routines more efficient. ; ; X = sprite index diff --git a/src/SpriteRender.s b/src/SpriteRender.s index 2f001a1..c0a5d37 100644 --- a/src/SpriteRender.s +++ b/src/SpriteRender.s @@ -1,3 +1,105 @@ +; Draw a sprite directly to the graphics screen. No clipping / bounds checking is performed +; +; X = sprite record index +_DrawStampToScreen + clc + lda _Sprites+SPRITE_Y,x + adc ScreenY0 + asl + asl + asl + asl + asl + sta tmp0 + asl + asl + clc + adc tmp0 + clc + adc #$2000 + clc + adc ScreenX0 + adc _Sprites+SPRITE_X,x ; Move to the horizontal address + tay ; This is the on-screen address + + lda _Sprites+SPRITE_HEIGHT,x + sta tmp0 + +; Sprite is either 8 or 16 pixels wide, so select the entry point + lda _Sprites+SPRITE_WIDTH,x + cmp #4 + beq :skinny + + lda _Sprites+SPRITE_DISP,x ; This is the VBUFF address with the correct sprite frame + tax + phb + pea $0101 + plb + plb + bra :entry16 +:loop16 + clc + txa + adc #SPRITE_PLANE_SPAN + tax + tya + adc #SHR_LINE_WIDTH + tay +:entry16 + lda: 6,y + andl spritemask+6,x + oral spritedata+6,x + sta: 6,y + lda: 4,y + andl spritemask+4,x + oral spritedata+4,x + sta: 4,y + lda: 2,y + andl spritemask+2,x + oral spritedata+2,x + sta: 2,y + lda: 0,y + andl spritemask+0,x + oral spritedata+0,x + sta: 0,y + + dec tmp0 + bne :loop16 + + plb + rts + +:skinny + lda _Sprites+SPRITE_DISP,x ; This is the VBUFF address with the correct sprite frame + tax + phb + pea $0101 + plb + plb + bra :entry8 +:loop8 + clc + txa + adc #SPRITE_PLANE_SPAN + tax + tya + adc #SHR_LINE_WIDTH + tay +:entry8 + lda: 2,y + andl spritemask+2,x + oral spritedata+2,x + sta: 2,y + lda: 0,y + andl spritemask+0,x + oral spritedata+0,x + sta: 0,y + + dec tmp0 + bne :loop8 + + plb + rts ; Alternate entry point that takes arguments in registers instead of using a _Sprite ; record ; diff --git a/src/Tool.s b/src/Tool.s index 0530b7b..76fc809 100644 --- a/src/Tool.s +++ b/src/Tool.s @@ -880,3 +880,4 @@ _TSGetAddress put blitter/Template.s put blitter/TemplateUtils.s put blitter/Blitter.s + put blitter/PEISlammer.s diff --git a/src/blitter/PEISlammer.s b/src/blitter/PEISlammer.s index 888335d..a01a437 100644 --- a/src/blitter/PEISlammer.s +++ b/src/blitter/PEISlammer.s @@ -6,13 +6,20 @@ ; slammer, note that page-aligned addresses repeat every 8 scan lines and some lines would need ; to be split into two slams to keep the direct page aligned. ; -; At best, this saves 1 cycles per word, or 80 cycles for a full screen -- which is only about +; At best, this saves 1 cycles per word, or 80 cycles for a full scanline -- which is only about ; 12 additional instructions, so this is an optimization that is unlikely to lead to a net ; improvement. ; ; X = first line (inclusive), valid range of 0 to 199 ; Y = last line (exclusive), valid range >X up to 200 _PEISlam + cpx #201 + bcc *+4 + brk $A9 + cpy #201 + bcc *+4 + brk $A8 + lda ScreenWidth dec stal :screen_width_1 ; save the width-1 outside of the direct page @@ -31,6 +38,11 @@ _PEISlam tay ; get the number of lines in the y register txa + clc + adc ScreenY0 ; Adjust for the origin + cmp #201 + bcc *+4 + brk $A7 asl tax lda RTable,x ; This is the right visible byte, so add one to get the diff --git a/src/blitter/Template.s b/src/blitter/Template.s index 427249e..2c1c335 100644 --- a/src/blitter/Template.s +++ b/src/blitter/Template.s @@ -232,6 +232,21 @@ epilogue_1 tsc ; its passed state, because having the carry bit clear prevents evaluation of ; the V bit. ; +; The snippet code is fixed to allow fast updates and the entry point is chosen based on the +; EngineMode during initializations +; +; _snippet +; lda (00),y ; Entry point for mixed dyn & bg1. Set opcode below. +; and 80,x +; lda (00),y ; Entry point for BG1 or Dyn. Change opcode. +; and #MASK +; ora #DATA +; _2 bcs _alt +; pha +; jmp NEXT +; _alt jmp RTN +; + ; Snippet Samples: ; ; Standard Two-level Mix (23 bytes) diff --git a/src/static/TileStore.s b/src/static/TileStore.s index 3170f7c..5951097 100644 --- a/src/static/TileStore.s +++ b/src/static/TileStore.s @@ -508,7 +508,23 @@ _SpriteBitsNot ENT ; are stored here. A negative value is used as a setinel _SortedHead ENT dw $FFFF -;_SortedTail dw $FFFF + +; Array of screen ranges covered by the sprites. Adjacent sprites are merged. Used in the shadowing renderer +_ShadowListCount ENT + ds 2 +_ShadowListTop ENT + ds {2*{MAX_SPRITES+1}} ; space for all of the sprites + overlay range +_ShadowListBottom ENT + ds {2*{MAX_SPRITES+1}} + +; Complement of the Shadow List. Can have one more segment than that list +_DirectListCount ENT + ds 2 +_DirectListTop ENT + ds {2*{MAX_SPRITES+2}} +_DirectListBottom ENT + ds {2*{MAX_SPRITES+2}} + ; Steps to the different sprite stamps _stamp_step ENT diff --git a/src/static/TileStoreDefs.s b/src/static/TileStoreDefs.s index 356e355..6de5d4b 100644 --- a/src/static/TileStoreDefs.s +++ b/src/static/TileStoreDefs.s @@ -12,7 +12,6 @@ TS_TILE_ADDR equ {TILE_STORE_SIZE*3} ; cached value, the address TS_CODE_ADDR_LOW equ {TILE_STORE_SIZE*4} ; const value, address of this tile in the code fields TS_CODE_ADDR_HIGH equ {TILE_STORE_SIZE*5} TS_WORD_OFFSET equ {TILE_STORE_SIZE*6} ; const value, word offset value for this tile if LDA (dp),y instructions re used -;TS_BASE_ADDR equ {TILE_STORE_SIZE*7} ; const value, because there are two rows of tiles per bank, this is set to $0000 or $8000. TS_JMP_ADDR equ {TILE_STORE_SIZE*7} ; const value, address of the 32-byte snippet space for this tile TS_SCREEN_ADDR equ {TILE_STORE_SIZE*8} ; cached value of on-screen location of tile. Used for DirtyRender. @@ -62,7 +61,6 @@ SPRITE_HEIGHT equ {MAX_SPRITES*28} SPRITE_CLIP_WIDTH equ {MAX_SPRITES*30} SPRITE_CLIP_HEIGHT equ {MAX_SPRITES*32} TS_VBUFF_BASE equ {MAX_SPRITES*34} ; Finalized VBUFF address based on the sprite position and tile offsets -;VBUFF_ARRAY_ADDR equ {MAX_SPRITES*36} ; Fixed address where this sprite's VBUFF addresses are stores. The array is the same shape as TileStore, but much smaller SORTED_PREV equ {MAX_SPRITES*36} SORTED_NEXT equ {MAX_SPRITES*38}