Break up large source code files to help with dependency ordering

This commit is contained in:
Lucas Scharenbroich 2022-04-25 11:30:24 -05:00
parent 76180b6feb
commit 29d70dc567
3 changed files with 645 additions and 0 deletions

88
src/Math.s Normal file
View File

@ -0,0 +1,88 @@
; Math-y functions
mx %00
; Special subroutine to divide the accumulator by 164 and return remainder in the Accumulator
;
; 164 = $A4 = 1010_0100
Mod164 cmp #%1010010000000000
bcc *+5
sbc #%1010010000000000
cmp #%0101001000000000
bcc *+5
sbc #%0101001000000000
cmp #%0010100100000000
bcc *+5
sbc #%0010100100000000
cmp #%0001010010000000
bcc *+5
sbc #%0001010010000000
cmp #%0000101001000000
bcc *+5
sbc #%0000101001000000
cmp #%0000010100100000
bcc *+5
sbc #%0000010100100000
cmp #%0000001010010000
bcc *+5
sbc #%0000001010010000
cmp #%0000000101001000
bcc *+5
sbc #%0000000101001000
cmp #%0000000010100100
bcc *+5
sbc #%0000000010100100
rts
; Special subroutine to divide the accumulator by 208 and return remainder in the Accumulator
;
; 208 = $D0 = 1101_0000
;
; There are probably faster hacks to divide a 16-bit unsigned value by 208
; https://www.drdobbs.com/parallel/optimizing-integer-division-by-a-constan/184408499
; https://embeddedgurus.com/stack-overflow/2009/06/division-of-integers-by-constants/
Mod208 cmp #%1101000000000000
bcc *+5
sbc #%1101000000000000
cmp #%0110100000000000
bcc *+5
sbc #%0110100000000000
cmp #%0011010000000000
bcc *+5
sbc #%0011010000000000
cmp #%0001101000000000
bcc *+5
sbc #%0001101000000000
cmp #%0000110100000000
bcc *+5
sbc #%0000110100000000
cmp #%0000011010000000
bcc *+5
sbc #%0000011010000000
cmp #%0000001101000000
bcc *+5
sbc #%0000001101000000
cmp #%0000000110100000
bcc *+5
sbc #%0000000110100000
cmp #%0000000011010000
bcc *+5
sbc #%0000000011010000
rts

237
src/blitter/Rotation.s Normal file
View File

@ -0,0 +1,237 @@
; Support rotating the BG1 graphics by leveraging the fact that a rotation function can be decomposed
; into an addition of two function parametertized by the angle of rotation: pixel = *(f(x, a) + f(y, a))
;
; The pre-build a number of rotation tables and then populate the direct page values and Y-register values
; for each line of the blitter, such that a single lda (00),y instruction fetched the appropriate data
;
; This is about as fast of a rotation as we can do.
;
; When possible, off-screen locations are calculate to produce an address of $FFFE, so that the last two bytes
; of the BG1 data buffer provides the "fill value".
ANGLEBNK ext
ApplyBG1XPosAngle ENT
phb
phk
plb
jsr _ApplyBG1XPosAngle
plb
rtl
_ApplyBG1XPosAngle
; phy
; lda BG1StartX
; jsr Mod164
; sta BG1StartXMod164
; lda #162
; sec
; sbc StartXMod164
; bpl *+6
; clc
; adc #164
; clc
; adc BG1StartXMod164
; cmp #164
; bcc *+5
; sbc #164
; clc
; adc 1,s
; tay ; cache the value
; pla ; pop the value
phd ; save the direct page because we are going to switch to the
lda BlitterDP ; blitter direct page space and fill in the addresses
tcd
lda #^ANGLEBNK
sta $fe
sty $fc ; Store in the new direct page
ldy #162
tyx
:loop
lda [$fc],y
sta 00,x ; store the value
dey
dey
dex
dex
bpl :loop
pld
rts
ApplyBG1YPosAngle ENT
phb
phk
plb
jsr _ApplyBG1YPosAngle
plb
rtl
_ApplyBG1YPosAngle
:virt_line equ tmp0
:lines_left equ tmp1
:draw_count equ tmp2
:ytbl_idx equ tmp3
:angle_tbl equ tmp4
sty :angle_tbl
lda BG1StartY
jsr Mod208
sta BG1StartYMod208
sta :ytbl_idx ; Start copying from the first entry in the table
lda StartYMod208 ; This is the base line of the virtual screen
sta :virt_line ; Keep track of it
lda ScreenHeight
sta :lines_left
:loop
lda :virt_line
asl
tax
ldal BTableLow,x ; Get the address of the first code field line
tay
sep #$20
ldal BTableHigh,x
pha ; push the bank on the stack
plb
rep #$20
lda :virt_line
and #$000F
eor #$FFFF
inc
clc
adc #16
min :lines_left
sta :draw_count ; Do this many lines
asl
tax
lda :ytbl_idx ; Read from this location (duplicate every 4 lines)
lsr
lsr
asl
clc
adc :angle_tbl
sec
sbc #ANGLEBNK
jsr CopyAngleYTableToBG1Addr ; or CopyBG1YTableToBG1Addr2
lda :virt_line ; advance to the virtual line after the segment we just
clc ; filled in
adc :draw_count
sta :virt_line
lda :ytbl_idx ; advance the index into the YTable
adc :draw_count
sta :ytbl_idx
lda :lines_left ; subtract the number of lines we just completed
sec
sbc :draw_count
sta :lines_left
jne :loop
phk
plb
rts
; Unrolled copy routine to move y_angle entries into BG1_ADDR position with an additional
; shift. This has to be split into two
;
; A = index into the array (x2)
; Y = starting line * $1000
; X = number of lines (x2)
CopyAngleYTableToBG1Addr
phx
phb
phk ; restore access to this bank
plb
jsr SaveBG1AngleValues
plb
plx ; x is used directly in this routine
jsr ApplyBG1OffsetValues
rts
SaveBG1AngleValues
jmp (:tbl,x)
:tbl da :none
da :do01,:do02,:do03,:do04
da :do05,:do06,:do07,:do08
da :do09,:do10,:do11,:do12
da :do13,:do14,:do15,:do16
:do15 tax
bra :x15
:do14 tax
bra :x14
:do13 tax
bra :x13
:do12 tax
bra :x12
:do11 tax
bra :x11
:do10 tax
bra :x10
:do09 tax
bra :x09
:do08 tax
bra :x08
:do16 tax
ldal ANGLEBNK+06,x
sta BG1YCache+30
:x15 ldal ANGLEBNK+06,x
sta BG1YCache+28
:x14 ldal ANGLEBNK+06,x
sta BG1YCache+26
:x13 ldal ANGLEBNK+06,x
sta BG1YCache+24
:x12 ldal ANGLEBNK+04,x
sta BG1YCache+22
:x11 ldal ANGLEBNK+04,x
sta BG1YCache+20
:x10 ldal ANGLEBNK+04,x
sta BG1YCache+18
:x09 ldal ANGLEBNK+04,x
sta BG1YCache+16
:x08 ldal ANGLEBNK+02,x
sta BG1YCache+14
:x07 ldal ANGLEBNK+02,x
sta BG1YCache+12
:x06 ldal ANGLEBNK+02,x
sta BG1YCache+10
:x05 ldal ANGLEBNK+02,x
sta BG1YCache+08
:x04 ldal ANGLEBNK+00,x
sta BG1YCache+06
:x03 ldal ANGLEBNK+00,x
sta BG1YCache+04
:x02 ldal ANGLEBNK+00,x
sta BG1YCache+02
:x01 ldal ANGLEBNK+00,x
sta BG1YCache+00
:none rts
:do07 tax
bra :x07
:do06 tax
bra :x06
:do05 tax
bra :x05
:do04 tax
bra :x04
:do03 tax
bra :x03
:do02 tax
bra :x02
:do01 tax
bra :x01

320
src/blitter/TemplateUtils.s Normal file
View File

@ -0,0 +1,320 @@
; Untility function related to patching and manipulating the blitter template code
mx %00
; Generalized routine that calculates the on-screen address of the tiles and takes the
; StartX and StartY values into consideration. This routine really exists to support
; the dirty tile rendering mode and the tiles *must* be aligned with the playfield.
; That is, StartX % 4 == 0 and StartY % 8 == 0. If these conditions are not met, then
; screen will not render correctly.
_RecalcTileScreenAddrs
NextColPtr equ tmp0
RowAddrPtr equ tmp1
OnScreenAddr equ tmp2
Counter equ tmp3
jsr _OriginToTileStore ; Get the (col,row) of the tile in the upper-left corner of the playfield
; Manually add the offsets to the NextCol and TileStoreYTable array address and put in a direct page
; location so we can free up the registers.
clc
txa
adc #NextCol
sta NextColPtr
tya
adc #TileStoreYTable
sta RowAddrPtr
; Calculate the on-screen address of the upper-left corner of the playfiled
lda ScreenY0 ; Calculate the address of the first byte
asl ; of the right side of the playfield
tax
lda ScreenAddr,x ; This is the address for the left edge of the physical screen
clc
adc ScreenX0
sta OnScreenAddr
; Now, loop through the tile store
lda #MAX_TILES
sta Counter
ldy #0
:tsloop
lda (NextColPtr),y ; Need to recalculate each time since the wrap-around could
clc ; happen anywhere
adc (RowAddrPtr) ;
tax ; NOTE: Try to rework to use new TileStore2DLookup array
lda OnScreenAddr
stal TileStore+TS_SCREEN_ADDR,x
clc
adc #4 ; Go to the next tile
iny
iny
cpy #2*41 ; If we've done 41 columns, move to the next line
bcc :nohop
inc RowAddrPtr ; Advance the row address (with wrap-around)
inc RowAddrPtr
ldy #0 ; Reset the column counter
clc
adc #{8*160}-{4*41}
:nohop
sta OnScreenAddr ; Save the updated on-screen address
dec Counter
bne :tsloop
rts
; Patch an 8-bit or 16-bit valueS into the bank. These are a set up unrolled loops to
; quickly patch in a constanct value, or a value from an array into a given set of
; templates.
;
; Because we have structured everything as parallel code blocks, most updates to the blitter
; reduce to storing a constant value and have an amortized cost of just a single store.
;
; The utility of these routines is that they also handle setting just a range of lines
; within a single bank.
;
; X = number of lines * 2, 0 to 32
; Y = starting line * $1000
; A = value
;
; Set M to 0 or 1
SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09
da :bottom-12,:bottom-15,:bottom-18,:bottom-21
da :bottom-24,:bottom-27,:bottom-30,:bottom-33
da :bottom-36,:bottom-39,:bottom-42,:bottom-45
da :bottom-48
:top sta $F000,y
sta $E000,y
sta $D000,y
sta $C000,y
sta $B000,y
sta $A000,y
sta $9000,y
sta $8000,y
sta $7000,y
sta $6000,y
sta $5000,y
sta $4000,y
sta $3000,y
sta $2000,y
sta $1000,y
sta: $0000,y
:bottom rts
; SetDPAddrs
;
; A = absolute address (largest)
; Y = offset
;
; Initializes a bank of direct page offsets
SetDPAddrs
lda #$0800
sta $F000,y
lda #$0700
sta $E000,y
lda #$0600
sta $D000,y
lda #$0500
sta $C000,y
lda #$0400
sta $B000,y
lda #$0300
sta $A000,y
lda #$0200
sta $9000,y
lda #$0100
sta: $8000,y
lda #$0800
sta $7000,y
lda #$0700
sta $6000,y
lda #$0600
sta $5000,y
lda #$0500
sta $4000,y
lda #$0400
sta $3000,y
lda #$0300
sta $2000,y
lda #$0200
sta $1000,y
lda #$0100
sta: $0000,y
rts
; SetAbsAddrs
;
; A = absolute address (largest)
; Y = offset
; X = number of lines
;
; Stores a value and decrements by $1000 for each line
SetAbsAddrs sec
jmp (:tbl,x)
:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15
da :bottom-21,:bottom-27,:bottom-33,:bottom-39
da :bottom-45,:bottom-51,:bottom-57,:bottom-63
da :bottom-69,:bottom-75,:bottom-81,:bottom-87
da :bottom-93
:top sta $F000,y
sbc #$1000
sta $E000,y
sbc #$1000
sta $D000,y
sbc #$1000
sta $C000,y
sbc #$1000
sta $B000,y
sbc #$1000
sta $A000,y
sbc #$1000
sta $9000,y
sbc #$1000
sta $8000,y
sbc #$1000
sta $7000,y
sbc #$1000
sta $6000,y
sbc #$1000
sta $5000,y
sbc #$1000
sta $4000,y
sbc #$1000
sta $3000,y
sbc #$1000
sta $2000,y
sbc #$1000
sta $1000,y
sbc #$1000
sta: $0000,y
:bottom rts
; Fill up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need
; a total of 13 banks to hold the 208 lines for full-screen support
;
; A = high word of bank table
; Y = index * 4 of the bank to initialize
BuildBank
:bankArray equ tmp0
:target equ tmp2
:nextBank equ tmp4
stx :bankArray
sta :bankArray+2
stz :target
iny
iny
lda [:bankArray],y
sta :target+2
iny ; move to the next item
iny
iny ; middle byte
cpy #4*13 ; if greater than the array length, wrap back to zero
bcc :ok
ldy #1
:ok lda [:bankArray],y ; Get the middle and high bytes of the address
sta :nextBank
:next
jsr :BuildLine2
lda :target
clc
adc #$1000
sta :target
bcc :next
phb
pei :target+1
plb
plb
; Change the patched value to one of DP_ENTRY, TWO_LYR_ENTRY or ONE_LYR_ENTRY based on the capabilities
; that the engine needs.
lda #$F000+{DP_ENTRY} ; Set the address from each line to the next
ldy #CODE_EXIT+1
ldx #15*2
jsr SetAbsAddrs
ldy #DP_ADDR
jsr SetDPAddrs
ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank
lda #{$005C+{DP_ENTRY}*256}
sta [:target],y
ldy #$F000+CODE_EXIT+2
lda :nextBank
sta [:target],y
ldy #$8000+CODE_EXIT ; Patch one line per bank to enable interrupts
lda #{$004C+{ENABLE_INT}*256}
sta [:target],y
plb
rts
; This is the relocation subroutine, it is responsible for copying the template to a
; memory location and patching up the necessary instructions.
;
; X = low word of address (must be a multiple of $1000)
; A = high word of address (bank)
:BuildLine
stx :target
sta :target+2
:BuildLine2
lda #CODE_LEN ; round up to an even number of bytes
inc
and #$FFFE
beq :nocopy
dec
dec
tay
:loop lda base,y
sta [:target],y
dey
dey
bpl :loop
:nocopy lda #0 ; copy is complete, now patch up the addresses
sep #$20
ldx #0
lda :target+2 ; patch in the bank for the absolute long addressing mode
:dobank ldy BankPatches,x
sta [:target],y
inx
inx
cpx #BankPatchNum
bcc :dobank
ldx #0
:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each
lda [:target],y
clc
adc :target+1
sta [:target],y
inx
inx
cpx #PagePatchNum
bcc :dopage
:out
rep #$20
rts