mirror of
synced 2025-03-02 10:29:01 +00:00
Add per-scanline offset rendering.
This commit is contained in:
@ -104,6 +104,8 @@ extern pascal void GTEClearBG1Buffer(Word value) inline(0x29A0, tool_dispatcher)
/* GTE Misc. Functions */
extern pascal Word GTEReadControl(void) inline(0x09A0, tool_dispatcher);
extern pascal Word GTEGetSeconds(void) inline(0x14A0, tool_dispatcher);
extern pascal Pointer GTEGetAddress(Word tableId) inline(0x2CA0, tool_dispatcher);
extern pascal void GTESetAddress(Word tableId, Pointer pointer) inline(0x2EA0, tool_dispatcher);
/* GTE Timer Functions */
@ -35,6 +35,7 @@ MaxGlobalX equ 16
MaxGlobalY equ 18
MaxBG0X equ 20
MaxBG0Y equ 22
frameCount equ 24
OldOneSecondCounter equ 26
appTmp0 equ 28
@ -74,6 +75,10 @@ appTmp0 equ 28
jsr BG0SetUp
jsr SetLimits
pea #80
pei MaxBG0Y
lda #193 ; Tile ID of '0'
jsr InitOverlay ; Initialize the status bar
@ -82,6 +87,18 @@ appTmp0 equ 28
sta OldOneSecondCounter
jsr UdtOverlay
; Set up the per-scanline rendering
lda #0
jsr InitOffsets
pea #scanlineHorzOffset
pea #^BG0Offsets
pea #BG0Offsets
pea $0000 ; one regular render to fill the screen with the tilemap
; Set up a very specific test. First, we draw a sprite into the sprite plane, and then
; leave it alone. We are just testing the ability to merge sprite plane data into
; the play field tiles.
@ -102,9 +119,11 @@ EvtLoop
bcc *+5
brl :do_render
inc StartX
pei StartX
pei StartY
lda StartX
jsr SetOffsets
; pei StartX
; pei StartY
; _GTESetBG0Origin
brl :do_render
@ -114,9 +133,11 @@ EvtLoop
bne *+5
brl :do_render
dec StartX
pei StartX
pei StartY
lda StartX
jsr SetOffsets
; pei StartX
; pei StartY
; _GTESetBG0Origin
brl :do_render
@ -144,7 +165,7 @@ EvtLoop
pea $0000
; Update the performance counters
@ -234,7 +255,85 @@ SetLimits
frameCount equ 24
and #$00FF
brl _SetOffsets
and #$00FF
sta appTmp0
ldx #0
ldy #80
jsr _InitRange
ldx #80
ldy #80
jsr _InitRange
ldx #160
ldy #48
jsr _InitRange
lda appTmp0
ldx #160
ldy #48
jsr _SetRange
ldx #80
ldy #80
jsr _SetRange
ldx #0
ldy #80
jsr _SetRange
:loop2 lda BG0Offsets,x
and #$FF00
ora 1,s
sta BG0Offsets,x
beq :done
cpx #416
bcc :loop2
and #$00FF
:loop1 sta BG0Offsets,x
sbc #$0100
beq :done
cpx #416
bcc :loop1
BG0Offsets ds 416
PUT ../StartUp.s
PUT ../../shell/Overlay.s
@ -132,6 +132,9 @@ _GTEGetAddress MAC
_GTECompileSpriteStamp MAC
UserTool $2D00+GTEToolNum
_GTESetAddress MAC
UserTool $2E00+GTEToolNum
; EngineMode definitions
; Script definition
@ -251,13 +251,13 @@ EngineReset
bne :loop
; Set the scanline tables to reasonable default values
ldx #{416*2}-2
lda #0
sta StartXMod164Arr,x
bpl :sxm_loop
; ldx #{416*2}-2
; lda #0
; sta StartXMod164Arr,x
; dex
; dex
; bpl :sxm_loop
@ -102,7 +102,9 @@ RenderFlags equ 124 ; Flags passed to the Render() function
BG1Scaling equ 126
activeSpriteList equ 128 ; 32 bytes for the active sprite list (can persist across frames)
; tiletmp equ 178 ; 16 bytes of temp storage for the tile renderers
; Free space from 160 to 192
blttmp equ 192 ; 32 bytes of local cache/scratch space for blitter
tmp8 equ 224 ; another 16 bytes of temporary space to be used as scratch
@ -282,8 +284,8 @@ VBuffHorzTableSelect EXT
ScalingTables EXT
StartXMod164Arr EXT
LastPatchOffsetArr EXT
;StartXMod164Arr EXT
;LastPatchOffsetArr EXT
_SortedHead EXT
_ShadowListCount EXT
@ -294,6 +296,7 @@ _DirectListTop EXT
_DirectListBottom EXT
StartXMod164Tbl EXT
LastOffsetTbl EXT
; Tool error codes
@ -169,7 +169,17 @@ _RenderScanlines
; jsr _ApplyTiles ; This function actually draws the new tiles into the code field
jsr _ScanlineBG0XPos ; Patch the code field instructions with exit BRA opcode
jsr _ApplyScanlineBG0XPos ; Patch the code field instructions with exit BRA opcode
jsr _BuildShadowList ; Create the rages based on the sorted sprite y-values
jsr _ShadowOff ; Turn off shadowing and draw all the scanlines with sprites on them
jsr _DrawShadowList
jsr _DrawDirectSprites ; Draw the sprites directly to the Bank $01 graphics buffer (skipping the render-to-tile step)
jsr _ShadowOn ; Turn shadowing back on
; jsr _DrawComplementList ; Alternate drawing scanlines and PEI slam to expose the full fram
jsr _DrawFinalPass
; jsr _ApplyBG1XPos ; Update the direct page value based on the horizontal position
@ -209,11 +219,11 @@ _RenderScanlines
; jsr _BltRange
; bra :done
ldx #0 ; Blit the full virtual buffer to the screen
ldy ScreenHeight
jsr _BltRange
; ldx #0 ; Blit the full virtual buffer to the screen
; ldy ScreenHeight
; jsr _BltRange
; ldx #0
; ldy ScreenHeight
@ -99,6 +99,7 @@ _CallTable
adrl _TSGetAddress-1
adrl _TSCompileSpriteStamp-1
adrl _TSSetAddress-1
_GTEAddSprite MAC
@ -302,6 +303,12 @@ _TSRender
bra :done
beq :no_scanline
jsr _RenderScanlines
bra :done
jsr _Render
@ -865,27 +872,45 @@ _TSSetBG1Scale
sta BG1Scaling
_TSExit #0;#2
; Pointer GetAddress(tblId)
:output equ FirstParam+0
:tblId equ FirstParam+4
:tblId equ FirstParam+0
:output equ FirstParam+2
lda #0
sta :output,s
sta :output+2,s
lda :value,s
lda :tblId,s
cmp #scanlineHorzOffset
bne :out
lda #StartXMod164Arr
lda StartXMod164Tbl
sta :output,s
lda #^StartXMod164Arr
lda StartXMod164Tbl+2
sta :output+2,s
_TSExit #0;#2
; SetAddress(tblId, Pointer)
:ptr equ FirstParam+0
:tblId equ FirstParam+4
lda :tblId,s
cmp #scanlineHorzOffset
bne :out
lda :ptr,s
sta StartXMod164Tbl
lda :ptr+2,s
sta StartXMod164Tbl+2
_TSExit #0;#6
; CompileSpriteStamp(spriteId, vbuffAddr)
:vbuff equ FirstParam
@ -916,7 +941,7 @@ _TSCompileSpriteStamp
put Sprite2.s
put SpriteRender.s
put Render.s
put blitter/Scanline.s
; put blitter/Scanline.s
put render/Render.s
put render/Fast.s
put render/Slow.s
@ -50,11 +50,7 @@ _RestoreBG0Opcodes
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:stk_save equ tmp5
phb ; Save data bank
sta :virt_line_x2 ; Keep track of it
@ -66,6 +62,14 @@ _RestoreBG0Opcodes
lda LastPatchOffset ; If zero, there are no saved opcodes
sta :exit_offset
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:stk_save equ tmp5
phb ; Save data bank
sta :stk_save
@ -144,18 +148,8 @@ _ApplyBG0XPosPre
:stk_save equ tmp0
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:entry_offset equ tmp5
:exit_bra equ tmp6
:exit_address equ tmp7
:base_address equ tmp8
:opcode equ tmp9
:odd_entry_offset equ tmp10
; If there are saved opcodes that have not been restored, do not run this routine
lda LastPatchOffset
@ -232,6 +226,21 @@ _ApplyBG0XPos
; +-----------+
lda StartXMod164
; Alternate entry point if the virt_line_x2 and lines_left_x2 and XMod164 values are passed in externally
:stk_save equ tmp0
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:entry_offset equ tmp5
:exit_bra equ tmp6
:exit_address equ tmp7
:base_address equ tmp8
:opcode equ tmp9
:odd_entry_offset equ tmp10
bit #$0001
jne :odd_case ; Specialized routines for even/odd cases
@ -368,7 +377,7 @@ _ApplyBG0XPos
sbc #164
lda Col2CodeOffset-1,x
lda Col2CodeOffset-1,x ; Odd offset to get the value in the high byte
and #$FF00
ora #$00AF
sta :opcode
@ -445,361 +454,148 @@ _ApplyBG0XPos
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:exit_offset equ tmp4
; Avoid local var collisions
:virt_line_pos_x2 equ tmp11
:total_left_x2 equ tmp12
:current_count_x2 equ tmp13
:ptr equ tmp14
sta :virt_line_pos_x2
sta :total_left_x2
lda StartXMod164Tbl
sta :ptr
lda StartXMod164Tbl+2
sta :ptr+2
; Patch our the ranges from the StartXMod164Tbl array starting at the first virtual line
lda [:ptr],y
and #$FF00 ; Determine how many sequential lines to restore
min :total_left_x2 ; Don't draw more than the number of lines that are left to process
sta :current_count_x2 ; Save a copy for later
sta :lines_left_x2 ; Set the parameter
sty :virt_line_x2 ; Set the parameter
lda LastOffsetTbl,y
sta :exit_offset
jsr _RestoreBG0OpcodesAlt
lda :virt_line_pos_x2
adc :current_count_x2
cmp #208*2 ; Do the modulo check in this loop
bcc *+5
sbc #208*2
sta :virt_line_pos_x2
lda :total_left_x2
sbc :current_count_x2
sta :total_left_x2
bne :loop
; This is a variant of the above routine that allows each x-position to be set independently from a table of value. This is
; quite a bit slower than the other routine since we cannot store constant values for each line.
; We still want to perform operation in blocks of 16 to avoid repeatedly setting the data bank register for each line. In
; order to accomplish this, the even/odd cases are split into separate code blocks and the unrolled loop will patch up
; all of the memory locations on each line, rather than doing each patch one at a time. This may actually be more efficient
; since it eliminates several jmp (abs,x) / tax instructions and removed some register reloading.
; This routine operates at a higher level and does not try to be super optimized for the case where every line has a different
; set of parameters. Instead, we optimize for the case where there are a few large ranges of the screen moving at different
; rates, e.g. a fixed status bar area on top, a slow-scrolling area in the middle and a fast are in the foreground.
; The two unrolled loop elements are:
; The table that drives this is dense and has the following format for each word
; Even:
; lda: $0000,x ; Load from X = BTableLow + exit_offset
; sta: OPCODE_SAVE,y ; Save the two byte in another area of the line code
; lda :exit_bra[n]
; sta $0000,x ; Replace the two bytes with a BRA instruction to exit the blitter
; lda :opcode[n]
; sta: CODE_ENTRY_OPCODE,y ; CODE_ENTRY_OPCODE and CODE_ENTRY are adjacent -- could make this a single 16-bit store
; Bits 0 - 7: X mod 164 value
; Bits 8 - 15: Number of scanline to persist this mod value
; Odd:
; Same as above, plus...
; lda :odd_entry_offset[n] ; [8-bit] Get back into the code after fixing up the odd edge
; sta: ODD_ENTRY,y
; lda: $0001,x ; Save the high word in case the last instruction is PEA and we need to load the top byte
:stk_save equ tmp0
; So, if the first 10 entries has a mod value of 5, they would look like: $0905, $0805, $0705, ... $0105, $0005
; This allows the code to start an an arbitrary location and immeditely sync up with the modulo list. It also allows
; the code to easily skip ranges of constant values using the existing _ApplyBG0XPos function as a subroutine.
; Copies of the local variables in _ApplyBG0XPos
:virt_line_x2 equ tmp1
:lines_left_x2 equ tmp2
:draw_count_x2 equ tmp3
:exit_offset equ tmp4
:entry_offset equ tmp5
:exit_bra equ tmp6
:exit_address equ tmp7
:base_address equ tmp8
:opcode equ tmp9
:odd_entry_offset equ tmp10
; If there are saved opcodes that have not been restored, do not run this routine
lda LastPatchOffset
beq :ok
; Avoid local var collision with _ApplyBG0XPos
:virt_line_pos_x2 equ tmp11
:total_left_x2 equ tmp12
:current_count_x2 equ tmp13
:ptr equ tmp14
lda StartXMod164Tbl
sta :ptr
lda StartXMod164Tbl+2
sta :ptr+2
ora :ptr
bne *+3 ; null pointer check
; In this routine, basically every horizontal parameter is based off of the :virt_line_x2 index
lda StartYMod208 ; This is the base line of the virtual screen
sta :virt_line_x2 ; Keep track of it
sta :virt_line_pos_x2
lda ScreenHeight
sta :lines_left_x2
; Sketch out the core structural elements of the loop + bank management
phb ; Save the existing bank
sta :stk_save
sta :total_left_x2
; Patch our the ranges from the StartXMod164Tbl array starting at the first virtual line
ldx :virt_line_x2
and #$001E
eor #$FFFF
adc #2*16 ; 2 * (16 - virt_line % 16). This get us aligned to 16-line boundaries
min :lines_left_x2 ; Make sure we handle cases where lines_left < aligned remainder
sta :draw_count_x2 ; We are drawing this many lines on this iteration starting at _virt_line_x2
ldal BTableHigh,x ; Set the bank
jsr :DoScanlineRange ; Patch in the code field for this range (Bank is set)
lda :draw_count_x2
clc ; advance to the virtual line after the segment we just
adc :virt_line_x2 ; filled in
sta :virt_line_x2
lda :lines_left_x2 ; subtract the number of lines we just completed
sbc :draw_count_x2
sta :lines_left_x2
jne :loop
lda :stk_save
; Run through and build an array of scanline data and place it in temporary zero page space. Need a total of 48 bytes.
; First step, run though and create the tables for the copy routine
lda StartXMod164Tbl,x
bit #$0001
; bne :bsd_odd
lda CodeFieldEvenBRA-2,y ; The exit point comes after the left edge (reverse order due to stack)
sta :exit_bra,x
lda Col2CodeOffset-2,y
sta :exit_offset,x
ldx :virt_line_x2
; First, calculate the exit point
ldal StartXMod164Tbl,x ; Get the origin for this line
bit #$0001
bne :is_odd ; Quickly switch to specialized even/odd routines
; For even offsets, the index is x - 2
; For odd offsets, the index is x - 1
; So, for both we can do (x - 1) & $FFFE = dec / and #$FFFE = lsr / asl + clears the carry
; This is an even-aligned line
; dec ; Move to the previous address for entry (a - 1) % 164
; dec ; Optimization: Coule eliminate this with a double-width tbale for CodeFieldEvenBRA
; bpl *+5
; lda #162
lda CodeFieldEvenBRA-2,y
sta :exit_bra ; Store are exit_offset +
lda Col2CodeOffset-2,y
sta :exit_offset
; tya
; adc ScreenWidth
; cmp #164 ; Keep the value in range
; bcc *+5
; sbc #164
; tay
lda Col2CodeOffset-2-1,y ; -2 for even case , -1 to load value into high byte
and #$FF00
; sta :entry_offset
ora #$004C ; set the entry_jmp opcode to JMP
sta :opcode
; stz :odd_entry_offset ; mark as an even case
ldal BTableLow,x ; Get the address of the code field line
tay ; Save it to use as the base address
adc :exit_offset ; Add some offsets to get the base address in the code field line
lda [:ptr],y
; This is the core even patch loop. The y-register tracks the base address of the starting line. Set the x-register
; based on the per-line exit_offset and eveything else references other data
and #$FF00 ; Determine how many sequential lines have this mod value
min :total_left_x2 ; Don't draw more than the number of lines that are left to process
sta :current_count_x2 ; Save a copy for later
; tya
; adc :exit_offset+{]line*2}
; tax
; lda: {]line*$1000},x
; sta: OPCODE_SAVE+{]line*$1000},y
; lda :exit_bra+{]line*2} ; Copy this value into all of the lines
; sta: {]line*$1000},x
; lda :entry_offset+{]line*2} ; Pre-merged with the appropriate opcode + offset
; sta: CODE_ENTRY_OPCODE+{]line*$1000},y
sta :lines_left_x2 ; Set the parameter
sty :virt_line_x2 ; Set the parameter
txa ; Put the X mod 164 value in the accumulator
and #$00FF
jsr _ApplyBG0XPosAlt
bra :prep_complete
; This is an odd-aligned line
dec ; Remove the least-significant byte (must stay positive)
lda CodeFieldOddBRA,y
sta :exit_bra
lda Col2CodeOffset,y
sta :exit_offset
lda :exit_offset ; Get the direct address in the code field that was overwritten
ldy :virt_line_pos_x2
sta LastOffsetTbl,y ; Stash it for use by the per-scanline resotre function
adc ScreenWidth
cmp #164 ; Keep the value in range
bcc *+5
sbc #164
lda Col2CodeOffset,y
sta :entry_offset ; Will be used to load the data
lda Col2CodeOffset-2,y
sta :odd_entry_offset ; will be the actual location to jump to
lda #$00AF ; set the entry_jmp opcode to LDAL
sta :opcode
ldal BTableLow,x ; Get the address of the code field line
tay ; Save it to use as the base address
adc :exit_offset ; Add some offsets to get the base address in the code field line
adc :current_count_x2
cmp #208*2 ; Do the modulo check in this loop
bcc *+5
sbc #208*2
sta :virt_line_pos_x2
; sta :exit_address
; sty :base_address
; ldy :base_address
; ldx :exit_address ; Save from this location (not needed in fast mode)
; SaveOpcode ; X = :exit_address on return
lda: $0000,x
sta: OPCODE_SAVE+$0000,y
; txy ; ldy :exit_address -- starting at this address
; ldx :draw_count_x2 ; Do this many lines
lda :exit_bra ; Copy this value into all of the lines
; SetConst ; All registers are preserved
sta: $0000,x
; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an
; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly
sep #$20
lda :entry_offset
; ldy :base_address
; SetCodeEntry ; All registers are preserved
sta: CODE_ENTRY+$0000,y
; Now, patch in the opcode
lda :opcode
; SetCodeEntryOpcode ; All registers are preserved
sta: CODE_ENTRY_OPCODE+$0000,y
; If this is an odd entry, also set the odd_entry low byte and save the operand high byte
lda :odd_entry_offset
jeq :not_odd
; SetOddCodeEntry ; All registers are preserved
sta: ODD_ENTRY+$0000,y
; SaveHighOperand :exit_address ; Only used once, so "inline" it
ldx :exit_address
lda: $0002,x
sta: OPCODE_HIGH_SAVE+$0000,y
rep #$21 ; clear the carry
lda :virt_line_x2 ; advance to the virtual line after
adc :draw_count_x2 ; filled in
sta :virt_line_x2
lda :lines_left_x2 ; subtract the number of lines we just completed
lda :total_left_x2
sbc :draw_count_x2
sta :lines_left_x2
jne :loop
sbc :current_count_x2
sta :total_left_x2
bne :loop
; DoEvenRange
; Does all the core operations for an even range (16-bit accumulator and registers)
; X = number of lines * 2, 0 to 32
; Y = starting line * $1000
; A = code field location * $1000
DoEvenRange mac
asl ; mult the offset by 2 and clear the carry at the same time
adc #dispTbl
stal patch+1
patch jmp $0000
dispTbl jmp bottom
db 1
jmp x01
db 1
jmp x02
db 1
jmp x03
db 1
jmp x04
db 1
jmp x05
db 1
jmp x06
db 1
jmp x07
db 1
jmp x08
db 1
jmp x09
db 1
jmp x10
db 1
jmp x11
db 1
jmp x12
db 1
jmp x13
db 1
jmp x14
db 1
jmp x15
db 1
x16 tya
adc :exit_offset+$1E
lda: $F000,x
sta: OPCODE_SAVE+$F000,y
lda :exit_bra+$1E
sta: $F000,x
lda :entry_offset+$1E ; Pre-merged with the appropriate opcode + offset
x15 tya
adc :exit_offset+$1E
lda: $E000,x
sta: OPCODE_SAVE+$E000,y
lda :exit_bra+$1C
sta: $E000,x
lda :entry_offset+$1C
x14 lda $D002,x
x13 lda $C002,x
x12 lda $B002,x
x11 lda $A002,x
x10 lda $9002,x
sta OPCODE_HIGH_SAVE+$9000,y
x09 lda $8002,x
sta OPCODE_HIGH_SAVE+$8000,y
x08 lda $7002,x
sta OPCODE_HIGH_SAVE+$7000,y
x07 lda $6002,x
sta OPCODE_HIGH_SAVE+$6000,y
x06 lda $5002,x
sta OPCODE_HIGH_SAVE+$5000,y
x05 lda $4002,x
sta OPCODE_HIGH_SAVE+$4000,y
x04 lda $3002,x
sta OPCODE_HIGH_SAVE+$3000,y
x03 lda $2002,x
sta OPCODE_HIGH_SAVE+$2000,y
x02 lda $1002,x
sta OPCODE_HIGH_SAVE+$1000,y
x01 lda: $0002,x
sta: OPCODE_HIGH_SAVE+$0000,y
bottom <<<
; SaveHighOperand
; Save the high byte of the 3-byte code field instruction into the odd handler at the end
@ -108,6 +108,10 @@ TileStoreLookup ENT
Col2CodeOffset ENT
lup 82
dw CODE_TOP+{{81-]step}*PER_TILE_SIZE}
]step equ ]step+1
lup 82 ; Make is a double-length table so we can add the ScreenWidth without testing for wrap-around
dw CODE_TOP+{{81-]step}*PER_TILE_SIZE}
]step equ ]step+1
@ -386,6 +390,10 @@ BG1YOffsetTable ENT
; Per-scanline offsets for BG0
StartXMod164Tbl ENT
dw 0,0
LastOffsetTbl ENT
ds 416
; Other Toolset variables
OneSecondCounter ENT
@ -566,7 +574,7 @@ Scale15 dw $003C,$003C,$003C,$003E,$003E,$003E,$003E,$0040,$0040,$0040,$0040,$
StartXMod164Arr ENT
ds 416*2
LastPatchOffsetArr ENT
ds 416*2
;StartXMod164Arr ENT
; ds 416*2
;LastPatchOffsetArr ENT
; ds 416*2
Reference in New Issue
Block a user