Get the PEI Slammer working for exposing shadowed writes

This commit is contained in:
Lucas Scharenbroich 2021-07-19 22:42:51 -05:00
parent d9e3ee14e8
commit c5eb84ea37
8 changed files with 322 additions and 129 deletions

View File

@ -15,7 +15,8 @@
"debug": "%npm_package_config_crossrunner% src\\GTETestApp -Source src\\GTETestApp_S02_MAINSEG_Output.txt -Debug -CompatibilityLayer", "debug": "%npm_package_config_crossrunner% src\\GTETestApp -Source src\\GTETestApp_S02_MAINSEG_Output.txt -Debug -CompatibilityLayer",
"build": "%npm_package_config_merlin32% -V %npm_package_config_macros% src\\App.s", "build": "%npm_package_config_merlin32% -V %npm_package_config_macros% src\\App.s",
"build:watch": "watch \"npm run build\" src", "build:watch": "watch \"npm run build\" src",
"build:assets": "node ./tools/pngtoiigs.js ./assets/donut-plains-2-8-color.png ./emu/bg1a.bin --start-index 6 && node ./tools/pngtoiigs.js ./assets/donut-plains-2-8-color-shift.png ./emu/bg1b.bin --start-index 6 && node ./tools/pngtoiigs.js ./assets/donut-plains-1-6-color.png ./emu/fg1.bin" "build:assets-smw": "node ./tools/pngtoiigs.js ./assets/donut-plains-2-8-color.png ./emu/bg1a.bin --start-index 6 && node ./tools/pngtoiigs.js ./assets/donut-plains-2-8-color-shift.png ./emu/bg1b.bin --start-index 6 && node ./tools/pngtoiigs.js ./assets/donut-plains-1-6-color.png ./emu/fg1.bin",
"build:assets-fatdog": "node ./tools/pngtoiigs.js ./assets/armada-7-color.png ./emu/bg1a.bin --start-index 8 && node ./tools/pngtoiigs.js ./assets/armada-7-color-shift.png ./emu/bg1b.bin --start-index 8 && node ./tools/pngtoiigs.js ./assets/armada-7-color-shuffle.png ./emu/fg1.bin --start-index 1"
}, },
"repository": { "repository": {
"type": "git", "type": "git",

View File

@ -63,9 +63,11 @@ Demo
lda OneSecondCounter lda OneSecondCounter
sta oldOneSecondCounter sta oldOneSecondCounter
stz frameCount stz frameCount
:loop :loop
lda #1 lda #1
jsr MoveLeft jsr MoveLeft
inc frameCount inc frameCount
ldal KBD_STROBE_REG ldal KBD_STROBE_REG
@ -74,6 +76,8 @@ Demo
and #$007F and #$007F
cmp #'s' cmp #'s'
bne :nokey bne :nokey
pla
rts rts
:nokey :nokey
@ -109,3 +113,23 @@ FPSStr str 'FPS'

View File

@ -42,7 +42,7 @@ SHR_PALETTES equ $E19E00
tiledata ext tiledata ext
; Feature flags ; Feature flags
NO_INTERRUPTS equ 0 ; turn off for crossrunner debugging NO_INTERRUPTS equ 1 ; turn off for crossrunner debugging
; Typical init ; Typical init
@ -1284,6 +1284,8 @@ qtRec adrl $0000

View File

@ -61,9 +61,6 @@ Render
; byte, then we may have to change the CODE_ENTRY values or restore/set new OPCODE ; byte, then we may have to change the CODE_ENTRY values or restore/set new OPCODE
; values, but not both. ; values, but not both.
jsr ShadowOff
jsr ShadowOn
; It's important to do _ApplyBG0YPos first because it calculates the value of StartY % 208 which is ; It's important to do _ApplyBG0YPos first because it calculates the value of StartY % 208 which is
; used in all of the other loops ; used in all of the other loops
@ -72,7 +69,21 @@ Render
jsr _ApplyBG1YPos ; Adjust the index values into the BG1 bank buffer jsr _ApplyBG1YPos ; Adjust the index values into the BG1 bank buffer
jsr _ApplyBG1XPos ; Adjust the direct page pointers to the BG1 bank jsr _ApplyBG1XPos ; Adjust the direct page pointers to the BG1 bank
; The code fields are locked in now and reder to be rendered
jsr ShadowOff
ldx #0 ; Blit the full virtual buffer to the screen ldx #0 ; Blit the full virtual buffer to the screen
ldy #8
jsr _BltRange
jsr ShadowOn
ldx #0 ; Expose the top 8 rows
ldy #8
jsr _PEISlam
ldx #8 ; Blit the full virtual buffer to the screen
ldy ScreenHeight ldy ScreenHeight
jsr _BltRange jsr _BltRange
@ -90,8 +101,3 @@ Render

View File

@ -62,22 +62,61 @@ _ApplyBG1XPos
lda BlitterDP ; blitter direct page space and fill in the addresses lda BlitterDP ; blitter direct page space and fill in the addresses
tcd tcd
tya
ldx #162 ldx #162
:loop :loop
tya
clc
adc affine,x
sta 00,x ; store the value sta 00,x ; store the value
dec dey
dec dey
bpl *+6 bpl :nowrap
tya
clc clc
adc #164 adc #164
tay
:nowrap
dex dex
dex dex
bpl :loop bpl :loop
pld pld
rts rts
affine ds 164
; Pass accumulator to set every (A / 256) pitch
SetAffine
ldx #0
ldy #0
and #$00FF
pha ; step size
pea $0000
:loop lda 1,s
clc
adc 3,s
cmp #256
bcc :skip
tya
clc
adc #256 ; Move to next BG1 line
tay
:skip and #$00FF ; always clamp to 256
sta 1,s
tya
sta affine,x
inx
inx
cpx #164
bcc :loop
pla
pla
rts
_ClearBG1Buffer _ClearBG1Buffer
phb phb
pha pha
@ -266,6 +305,35 @@ CopyBG1YTableToBG1Addr

View File

@ -10,33 +10,33 @@
; on the SHR screen or the current value of StartY ; on the SHR screen or the current value of StartY
_BltRange _BltRange
:exit_ptr equ tmp0 :exit_ptr equ tmp0
:jmp_low_save equ tmp2 :jmp_low_save equ tmp2
phb ; preserve the bank register phb ; preserve the bank register
clc` clc`
dey dey
tya ; Get the address of the line that we want to return from tya ; Get the address of the line that we want to return from
adc StartY ; and create a pointer to it adc StartY ; and create a pointer to it
asl asl
tay tay
lda BTableLow,y lda BTableLow,y
sta :exit_ptr sta :exit_ptr
lda BTableHigh,y lda BTableHigh,y
sta :exit_ptr+2 sta :exit_ptr+2
txa ; get the first line (0 - 199) txa ; get the first line (0 - 199)
adc StartY ; add in the virtual offset (0, 207) -- max value of 406 adc StartY ; add in the virtual offset (0, 207) -- max value of 406
asl asl
tax ; this is the offset into the blitter table tax ; this is the offset into the blitter table
sep #$20 ; 8-bit Acc sep #$20 ; 8-bit Acc
lda BTableHigh,x ; patch in the bank lda BTableHigh,x ; patch in the bank
sta blt_entry+3 sta blt_entry+3
lda BTableLow+1,x ; patch in the page lda BTableLow+1,x ; patch in the page
sta blt_entry+2 sta blt_entry+2
; The way we patch the exit code is subtle, but very fast. The CODE_EXIT offset points to ; The way we patch the exit code is subtle, but very fast. The CODE_EXIT offset points to
; an JMP/JML instruction that transitions to the next line after all of the code has been ; an JMP/JML instruction that transitions to the next line after all of the code has been
@ -45,49 +45,53 @@ _BltRange
; The trick we use is to patch the low byte to force the code to jump to a special return ; The trick we use is to patch the low byte to force the code to jump to a special return
; function (jml blt_return) in the *next* code field line. ; function (jml blt_return) in the *next* code field line.
ldy #CODE_EXIT+1 ; this is a JMP or JML instruction that points to the next line. ldy #CODE_EXIT+1 ; this is a JMP or JML instruction that points to the next line.
lda [:exit_ptr],y lda [:exit_ptr],y
sta :jmp_low_save sta :jmp_low_save
lda #FULL_RETURN ; this is the offset of the return code lda #FULL_RETURN ; this is the offset of the return code
sta [:exit_ptr],y ; patch out the low byte of the JMP/JML sta [:exit_ptr],y ; patch out the low byte of the JMP/JML
; Now we need to set up the Bank, Stack Pointer and Direct Page registers for calling into ; Now we need to set up the Bank, Stack Pointer and Direct Page registers for calling into
; the code field ; the code field
lda StartX lda StartX
bit #$01 bit #$01
beq :primary beq :primary
lda BG1AltBank lda BG1AltBank
bra :alt bra :alt
:primary lda BG1DataBank :primary lda BG1DataBank
:alt :alt
pha pha
plb plb
rep #$20 rep #$20
phd ; Save the application direct page phd ; Save the application direct page
lda BlitterDP ; Set the direct page to the blitter data lda BlitterDP ; Set the direct page to the blitter data
tcd tcd
sei ; disable interrupts sei ; disable interrupts
_R0W1 _R0W1
tsc ; save the stack pointer tsc ; save the stack pointer
stal stk_save+1 stal stk_save+1
blt_entry jml $000000 ; Jump into the blitter code $XX/YY00 blt_entry jml $000000 ; Jump into the blitter code $XX/YY00
blt_return _R0W0 blt_return _R0W0
stk_save lda #0000 ; load the stack stk_save lda #0000 ; load the stack
tcs tcs
cli ; re-enable interrupts cli ; re-enable interrupts
pld ; restore the direct page pld ; restore the direct page
sep #$20 sep #$20
ldy #CODE_EXIT+1 ldy #CODE_EXIT+1
lda :jmp_low_save lda :jmp_low_save
sta [:exit_ptr],y sta [:exit_ptr],y
rep #$20 rep #$20
plb ; restore the bank plb ; restore the bank
rts rts
; Placeholder for actual sprite drawing. The implementation will be simple because
; we don't do anything sprite related; just call function pointers provided to us.
_RenderSprites
rts

View File

@ -10,82 +10,119 @@
; 12 additional instructions, so this is an optimization that is unlikely to lead to a net ; 12 additional instructions, so this is an optimization that is unlikely to lead to a net
; improvement. ; improvement.
; ;
; A = base address of top-left edge of the screen ; X = first line (inclusive), valid range of 0 to 199
; Y = number of scanlines to blit ; Y = last line (exclusive), valid range >X up to 200
; X = width of the screen in bytes _PEISlam
PEISlam lda ScreenWidth
stx :screen_width ; save the width dec
sta :screen_width_1 ; save the width-1 outside of the direct page
phd ; save the current direct page and assign the base lda #:pei_end ; patch the PEI entry address
tcd ; screen address to the direct page register and #$FFFE ; should always be even, but....
clc sec
adc :screen_width ; screen address of the right edge (will go in stack) sbc ScreenWidth
tax ; but cache in x register for a bit.... sta :inner+1
tsc phx
sta :stk_save ; save the stack pointer to restore later tya
sec
sbc 1,s
ply
tay ; get the number of lines in the y register
lda #:pei_end ; patch the PEI entry address txa
sec asl
sbc :screen_width tax
sta :inner+1 lda RTable,x ; This is the right visible byte, so add one to get the
tax ; left visible byte (cache in x-reg)
sec
sbc ScreenWidth
inc
clc ; clear before the loop -- nothing in the loop affect the carry bit phd ; save the current direct page and assign the base
brl :outer ; hop into the entry point. tcd ; screen address to the direct page register
]dp equ 158 tsc
lup 80 ; A full width screen is 160 bytes / 80 words sta :stk_save ; save the stack pointer to restore later
pei ]dp
]dp equ ]dp-2 clc ; clear before the loop -- nothing in the loop affect the carry bit
--^ brl :outer ; hop into the entry point.
]dp equ 158
lup 80 ; A full width screen is 160 bytes / 80 words
pei ]dp
]dp equ ]dp-2
--^
:pei_end :pei_end
tdc ; Move to the next line tdc ; Move to the next line
adc #160 adc #160
tcd tcd
adc :screen_width adc :screen_width_1
tcs tcs
dey ; decrement the total counter, if zero then we're done dey ; decrement the total counter, if zero then we're done
beq :exit beq :exit
dex ; decrement the inner counter. Both counters are set dex ; decrement the inner counter. Both counters are set
beq :restore ; up so that they fall-through by default to save a cycle beq :restore ; up so that they fall-through by default to save a cycle
; per loop iteration. ; per loop iteration.
:inner jmp $0000 ; 25 cycles of overhead per line. A full width slam executes all :inner jmp $0000 ; 25 cycles of overhead per line. A full width slam executes all
; 80 of the PEI instructions which we expect to take 7 cycles ; 80 of the PEI instructions which we expect to take 7 cycles
; since the direct page is not aligned. So total overhead is ; since the direct page is not aligned. So total overhead is
; 25 / (25 + 7 * 80) = 4.27% of execution ; 25 / (25 + 7 * 80) = 4.27% of execution
; ;
; Without the interrupt breaks, we could remove the dex/beq test ; Without the interrupt breaks, we could remove the dex/beq test
; and save 4 cycles per loop which takes the overhead down to ; and save 4 cycles per loop which takes the overhead down to
; only 3.6% ; only 3.6%
:restore :restore
tsx ; save the current stack tsx ; save the current stack
_R0W0 ; restore the execution environment and _R0W0 ; restore the execution environment and
lda :stk_save ; give a few cycles to catch some interrupts lda :stk_save ; give a few cycles to catch some interrupts
tcs tcs
cli ; fall through here -- saves a BRA instruction cli ; fall through here -- saves a BRA instruction
:outer :outer
sei sei
txs ; set the stack address to the right edge txs ; set the stack address to the right edge
ldx #8 ; Enable interrupts at least once every 8 lines ldx #8 ; Enable interrupts at least once every 8 lines
_R1W1 _R1W1
bra :inner bra :inner
:exit :exit
_R0W0 _R0W0
lda :stk_save lda :stk_save
tcs tcs
cli cli
pld
rts
:stk_save ds 2
:screen_width_1 ds 2
pld
rts
:stk_save ds 2
:screen_width ds 2

View File

@ -186,6 +186,57 @@ CopyTile
sta $7001,y sta $7001,y
rts rts
; Primitives to render a dynamic tile
;
; LDA 00,x / PHA where the operand is fixed when the tile is rendered
; $B5 $00 $48
;
; A = dynamic tile id (must be an 8-bit value)
:DynTile
and #$00FF
ora #$4800
sta: $0004,y
sta $1004,y
sta $2004,y
sta $3004,y
sta $4004,y
sta $5004,y
sta $6004,y
sta $7004,y
inc
inc
sta: $0001,y
sta $1001,y
sta $2001,y
sta $3001,y
sta $4001,y
sta $5001,y
sta $6001,y
sta $7001,y
sep #$20
lda #$B5
sta: $0000,y
sta: $0003,y
sta $1000,y
sta $1003,y
sta $2000,y
sta $2003,y
sta $3000,y
sta $3003,y
sta $4000,y
sta $4003,y
sta $5000,y
sta $5003,y
sta $6000,y
sta $6003,y
sta $7000,y
sta $7003,y
rep #$20
rts