From 29d70dc567a597d2a811a4449587c0c8ebf1be60 Mon Sep 17 00:00:00 2001 From: Lucas Scharenbroich Date: Mon, 25 Apr 2022 11:30:24 -0500 Subject: [PATCH] Break up large source code files to help with dependency ordering --- src/Math.s | 88 ++++++++++ src/blitter/Rotation.s | 237 ++++++++++++++++++++++++++ src/blitter/TemplateUtils.s | 320 ++++++++++++++++++++++++++++++++++++ 3 files changed, 645 insertions(+) create mode 100644 src/Math.s create mode 100644 src/blitter/Rotation.s create mode 100644 src/blitter/TemplateUtils.s diff --git a/src/Math.s b/src/Math.s new file mode 100644 index 0000000..91ca035 --- /dev/null +++ b/src/Math.s @@ -0,0 +1,88 @@ +; Math-y functions + + mx %00 + +; Special subroutine to divide the accumulator by 164 and return remainder in the Accumulator +; +; 164 = $A4 = 1010_0100 +Mod164 cmp #%1010010000000000 + bcc *+5 + sbc #%1010010000000000 + + cmp #%0101001000000000 + bcc *+5 + sbc #%0101001000000000 + + cmp #%0010100100000000 + bcc *+5 + sbc #%0010100100000000 + + cmp #%0001010010000000 + bcc *+5 + sbc #%0001010010000000 + + cmp #%0000101001000000 + bcc *+5 + sbc #%0000101001000000 + + cmp #%0000010100100000 + bcc *+5 + sbc #%0000010100100000 + + cmp #%0000001010010000 + bcc *+5 + sbc #%0000001010010000 + + cmp #%0000000101001000 + bcc *+5 + sbc #%0000000101001000 + + cmp #%0000000010100100 + bcc *+5 + sbc #%0000000010100100 + rts + +; Special subroutine to divide the accumulator by 208 and return remainder in the Accumulator +; +; 208 = $D0 = 1101_0000 +; +; There are probably faster hacks to divide a 16-bit unsigned value by 208 +; https://www.drdobbs.com/parallel/optimizing-integer-division-by-a-constan/184408499 +; https://embeddedgurus.com/stack-overflow/2009/06/division-of-integers-by-constants/ + +Mod208 cmp #%1101000000000000 + bcc *+5 + sbc #%1101000000000000 + + cmp #%0110100000000000 + bcc *+5 + sbc #%0110100000000000 + + cmp #%0011010000000000 + bcc *+5 + sbc #%0011010000000000 + + cmp #%0001101000000000 + bcc *+5 + sbc #%0001101000000000 + + cmp #%0000110100000000 + bcc *+5 + sbc #%0000110100000000 + + cmp #%0000011010000000 + bcc *+5 + sbc #%0000011010000000 + + cmp #%0000001101000000 + bcc *+5 + sbc #%0000001101000000 + + cmp #%0000000110100000 + bcc *+5 + sbc #%0000000110100000 + + cmp #%0000000011010000 + bcc *+5 + sbc #%0000000011010000 + rts diff --git a/src/blitter/Rotation.s b/src/blitter/Rotation.s new file mode 100644 index 0000000..e91cc9a --- /dev/null +++ b/src/blitter/Rotation.s @@ -0,0 +1,237 @@ +; Support rotating the BG1 graphics by leveraging the fact that a rotation function can be decomposed +; into an addition of two function parametertized by the angle of rotation: pixel = *(f(x, a) + f(y, a)) +; +; The pre-build a number of rotation tables and then populate the direct page values and Y-register values +; for each line of the blitter, such that a single lda (00),y instruction fetched the appropriate data +; +; This is about as fast of a rotation as we can do. +; +; When possible, off-screen locations are calculate to produce an address of $FFFE, so that the last two bytes +; of the BG1 data buffer provides the "fill value". + +ANGLEBNK ext +ApplyBG1XPosAngle ENT + phb + phk + plb + jsr _ApplyBG1XPosAngle + plb + rtl + +_ApplyBG1XPosAngle +; phy + +; lda BG1StartX +; jsr Mod164 +; sta BG1StartXMod164 + +; lda #162 +; sec +; sbc StartXMod164 +; bpl *+6 +; clc +; adc #164 +; clc +; adc BG1StartXMod164 +; cmp #164 +; bcc *+5 +; sbc #164 + +; clc +; adc 1,s +; tay ; cache the value + +; pla ; pop the value + phd ; save the direct page because we are going to switch to the + lda BlitterDP ; blitter direct page space and fill in the addresses + tcd + + lda #^ANGLEBNK + sta $fe + sty $fc ; Store in the new direct page + ldy #162 + tyx +:loop + lda [$fc],y + sta 00,x ; store the value + dey + dey + dex + dex + bpl :loop + pld + rts + +ApplyBG1YPosAngle ENT + phb + phk + plb + jsr _ApplyBG1YPosAngle + plb + rtl + +_ApplyBG1YPosAngle +:virt_line equ tmp0 +:lines_left equ tmp1 +:draw_count equ tmp2 +:ytbl_idx equ tmp3 +:angle_tbl equ tmp4 + + sty :angle_tbl + + lda BG1StartY + jsr Mod208 + sta BG1StartYMod208 + sta :ytbl_idx ; Start copying from the first entry in the table + + lda StartYMod208 ; This is the base line of the virtual screen + sta :virt_line ; Keep track of it + + lda ScreenHeight + sta :lines_left + +:loop + lda :virt_line + asl + tax + ldal BTableLow,x ; Get the address of the first code field line + tay + + sep #$20 + ldal BTableHigh,x + pha ; push the bank on the stack + plb + rep #$20 + + lda :virt_line + and #$000F + eor #$FFFF + inc + clc + adc #16 + min :lines_left + + sta :draw_count ; Do this many lines + asl + tax + + lda :ytbl_idx ; Read from this location (duplicate every 4 lines) + lsr + lsr + asl + clc + adc :angle_tbl + sec + sbc #ANGLEBNK + jsr CopyAngleYTableToBG1Addr ; or CopyBG1YTableToBG1Addr2 + + lda :virt_line ; advance to the virtual line after the segment we just + clc ; filled in + adc :draw_count + sta :virt_line + + lda :ytbl_idx ; advance the index into the YTable + adc :draw_count + sta :ytbl_idx + + lda :lines_left ; subtract the number of lines we just completed + sec + sbc :draw_count + sta :lines_left + + jne :loop + + phk + plb + rts + +; Unrolled copy routine to move y_angle entries into BG1_ADDR position with an additional +; shift. This has to be split into two +; +; A = index into the array (x2) +; Y = starting line * $1000 +; X = number of lines (x2) +CopyAngleYTableToBG1Addr + phx + phb + + phk ; restore access to this bank + plb + jsr SaveBG1AngleValues + + plb + plx ; x is used directly in this routine + jsr ApplyBG1OffsetValues + rts + +SaveBG1AngleValues + jmp (:tbl,x) +:tbl da :none + da :do01,:do02,:do03,:do04 + da :do05,:do06,:do07,:do08 + da :do09,:do10,:do11,:do12 + da :do13,:do14,:do15,:do16 +:do15 tax + bra :x15 +:do14 tax + bra :x14 +:do13 tax + bra :x13 +:do12 tax + bra :x12 +:do11 tax + bra :x11 +:do10 tax + bra :x10 +:do09 tax + bra :x09 +:do08 tax + bra :x08 +:do16 tax + ldal ANGLEBNK+06,x + sta BG1YCache+30 +:x15 ldal ANGLEBNK+06,x + sta BG1YCache+28 +:x14 ldal ANGLEBNK+06,x + sta BG1YCache+26 +:x13 ldal ANGLEBNK+06,x + sta BG1YCache+24 +:x12 ldal ANGLEBNK+04,x + sta BG1YCache+22 +:x11 ldal ANGLEBNK+04,x + sta BG1YCache+20 +:x10 ldal ANGLEBNK+04,x + sta BG1YCache+18 +:x09 ldal ANGLEBNK+04,x + sta BG1YCache+16 +:x08 ldal ANGLEBNK+02,x + sta BG1YCache+14 +:x07 ldal ANGLEBNK+02,x + sta BG1YCache+12 +:x06 ldal ANGLEBNK+02,x + sta BG1YCache+10 +:x05 ldal ANGLEBNK+02,x + sta BG1YCache+08 +:x04 ldal ANGLEBNK+00,x + sta BG1YCache+06 +:x03 ldal ANGLEBNK+00,x + sta BG1YCache+04 +:x02 ldal ANGLEBNK+00,x + sta BG1YCache+02 +:x01 ldal ANGLEBNK+00,x + sta BG1YCache+00 +:none rts +:do07 tax + bra :x07 +:do06 tax + bra :x06 +:do05 tax + bra :x05 +:do04 tax + bra :x04 +:do03 tax + bra :x03 +:do02 tax + bra :x02 +:do01 tax + bra :x01 diff --git a/src/blitter/TemplateUtils.s b/src/blitter/TemplateUtils.s new file mode 100644 index 0000000..121f3af --- /dev/null +++ b/src/blitter/TemplateUtils.s @@ -0,0 +1,320 @@ +; Untility function related to patching and manipulating the blitter template code + + mx %00 + +; Generalized routine that calculates the on-screen address of the tiles and takes the +; StartX and StartY values into consideration. This routine really exists to support +; the dirty tile rendering mode and the tiles *must* be aligned with the playfield. +; That is, StartX % 4 == 0 and StartY % 8 == 0. If these conditions are not met, then +; screen will not render correctly. +_RecalcTileScreenAddrs +NextColPtr equ tmp0 +RowAddrPtr equ tmp1 +OnScreenAddr equ tmp2 +Counter equ tmp3 + + jsr _OriginToTileStore ; Get the (col,row) of the tile in the upper-left corner of the playfield + +; Manually add the offsets to the NextCol and TileStoreYTable array address and put in a direct page +; location so we can free up the registers. + + clc + txa + adc #NextCol + sta NextColPtr + + tya + adc #TileStoreYTable + sta RowAddrPtr + +; Calculate the on-screen address of the upper-left corner of the playfiled + + lda ScreenY0 ; Calculate the address of the first byte + asl ; of the right side of the playfield + tax + lda ScreenAddr,x ; This is the address for the left edge of the physical screen + clc + adc ScreenX0 + sta OnScreenAddr + +; Now, loop through the tile store + + lda #MAX_TILES + sta Counter + ldy #0 +:tsloop + lda (NextColPtr),y ; Need to recalculate each time since the wrap-around could + clc ; happen anywhere + adc (RowAddrPtr) ; + tax ; NOTE: Try to rework to use new TileStore2DLookup array + + lda OnScreenAddr + stal TileStore+TS_SCREEN_ADDR,x + + clc + adc #4 ; Go to the next tile + + iny + iny + cpy #2*41 ; If we've done 41 columns, move to the next line + bcc :nohop + + inc RowAddrPtr ; Advance the row address (with wrap-around) + inc RowAddrPtr + ldy #0 ; Reset the column counter + clc + adc #{8*160}-{4*41} +:nohop + sta OnScreenAddr ; Save the updated on-screen address + dec Counter + bne :tsloop + + rts + + +; Patch an 8-bit or 16-bit valueS into the bank. These are a set up unrolled loops to +; quickly patch in a constanct value, or a value from an array into a given set of +; templates. +; +; Because we have structured everything as parallel code blocks, most updates to the blitter +; reduce to storing a constant value and have an amortized cost of just a single store. +; +; The utility of these routines is that they also handle setting just a range of lines +; within a single bank. +; +; X = number of lines * 2, 0 to 32 +; Y = starting line * $1000 +; A = value +; +; Set M to 0 or 1 +SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-12,:bottom-15,:bottom-18,:bottom-21 + da :bottom-24,:bottom-27,:bottom-30,:bottom-33 + da :bottom-36,:bottom-39,:bottom-42,:bottom-45 + da :bottom-48 +:top sta $F000,y + sta $E000,y + sta $D000,y + sta $C000,y + sta $B000,y + sta $A000,y + sta $9000,y + sta $8000,y + sta $7000,y + sta $6000,y + sta $5000,y + sta $4000,y + sta $3000,y + sta $2000,y + sta $1000,y + sta: $0000,y +:bottom rts + +; SetDPAddrs +; +; A = absolute address (largest) +; Y = offset +; +; Initializes a bank of direct page offsets +SetDPAddrs + lda #$0800 + sta $F000,y + lda #$0700 + sta $E000,y + lda #$0600 + sta $D000,y + lda #$0500 + sta $C000,y + lda #$0400 + sta $B000,y + lda #$0300 + sta $A000,y + lda #$0200 + sta $9000,y + lda #$0100 + sta: $8000,y + + lda #$0800 + sta $7000,y + lda #$0700 + sta $6000,y + lda #$0600 + sta $5000,y + lda #$0500 + sta $4000,y + lda #$0400 + sta $3000,y + lda #$0300 + sta $2000,y + lda #$0200 + sta $1000,y + lda #$0100 + sta: $0000,y + rts + +; SetAbsAddrs +; +; A = absolute address (largest) +; Y = offset +; X = number of lines +; +; Stores a value and decrements by $1000 for each line +SetAbsAddrs sec + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15 + da :bottom-21,:bottom-27,:bottom-33,:bottom-39 + da :bottom-45,:bottom-51,:bottom-57,:bottom-63 + da :bottom-69,:bottom-75,:bottom-81,:bottom-87 + da :bottom-93 +:top sta $F000,y + sbc #$1000 + sta $E000,y + sbc #$1000 + sta $D000,y + sbc #$1000 + sta $C000,y + sbc #$1000 + sta $B000,y + sbc #$1000 + sta $A000,y + sbc #$1000 + sta $9000,y + sbc #$1000 + sta $8000,y + sbc #$1000 + sta $7000,y + sbc #$1000 + sta $6000,y + sbc #$1000 + sta $5000,y + sbc #$1000 + sta $4000,y + sbc #$1000 + sta $3000,y + sbc #$1000 + sta $2000,y + sbc #$1000 + sta $1000,y + sbc #$1000 + sta: $0000,y +:bottom rts + +; Fill up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need +; a total of 13 banks to hold the 208 lines for full-screen support +; +; A = high word of bank table +; Y = index * 4 of the bank to initialize +BuildBank + +:bankArray equ tmp0 +:target equ tmp2 +:nextBank equ tmp4 + + stx :bankArray + sta :bankArray+2 + + stz :target + iny + iny + lda [:bankArray],y + sta :target+2 + + iny ; move to the next item + iny + iny ; middle byte + cpy #4*13 ; if greater than the array length, wrap back to zero + bcc :ok + ldy #1 +:ok lda [:bankArray],y ; Get the middle and high bytes of the address + sta :nextBank + +:next + jsr :BuildLine2 + lda :target + clc + adc #$1000 + sta :target + bcc :next + + phb + pei :target+1 + plb + plb + +; Change the patched value to one of DP_ENTRY, TWO_LYR_ENTRY or ONE_LYR_ENTRY based on the capabilities +; that the engine needs. + + lda #$F000+{DP_ENTRY} ; Set the address from each line to the next + ldy #CODE_EXIT+1 + ldx #15*2 + jsr SetAbsAddrs + + ldy #DP_ADDR + jsr SetDPAddrs + + ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank + lda #{$005C+{DP_ENTRY}*256} + sta [:target],y + ldy #$F000+CODE_EXIT+2 + lda :nextBank + sta [:target],y + + ldy #$8000+CODE_EXIT ; Patch one line per bank to enable interrupts + lda #{$004C+{ENABLE_INT}*256} + sta [:target],y + + plb + rts + +; This is the relocation subroutine, it is responsible for copying the template to a +; memory location and patching up the necessary instructions. +; +; X = low word of address (must be a multiple of $1000) +; A = high word of address (bank) +:BuildLine + stx :target + sta :target+2 + +:BuildLine2 + lda #CODE_LEN ; round up to an even number of bytes + inc + and #$FFFE + beq :nocopy + dec + dec + tay +:loop lda base,y + sta [:target],y + + dey + dey + bpl :loop + +:nocopy lda #0 ; copy is complete, now patch up the addresses + sep #$20 + + ldx #0 + lda :target+2 ; patch in the bank for the absolute long addressing mode +:dobank ldy BankPatches,x + sta [:target],y + inx + inx + cpx #BankPatchNum + bcc :dobank + + ldx #0 +:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each + lda [:target],y + clc + adc :target+1 + sta [:target],y + inx + inx + cpx #PagePatchNum + bcc :dopage + +:out + rep #$20 + rts \ No newline at end of file