diff --git a/macros/APP.MACS.S b/macros/APP.MACS.S index 5c12199..9ef741c 100644 --- a/macros/APP.MACS.S +++ b/macros/APP.MACS.S @@ -48,6 +48,57 @@ _R1W1 mac ; Read Bank 0 / Write Bank 1 stal STATE_REG <<< +_PushReg mac ; Used to save/restore registers when calling subroutines. + pha + phx + phy + <<< + +_PullReg mac + ply + plx + pla + <<< + +_PushReg2 mac ; Variation to also save the P-register to preserve m/x + pha + phx + phy + php + <<< + +_PullReg2 mac + plp + ply + plx + pla + <<< + +jne mac + beq *+5 + jmp ]1 + <<< + +jeq mac + bne *+5 + jmp ]1 + <<< + +jcc mac + bcs *+5 + jmp ]1 + <<< + +jcs mac + bcc *+5 + jmp ]1 + <<< + +min mac + cmp ]1 + bcc mout + lda ]1 +mout <<< **************************************** * Basic Error Macro * **************************************** @@ -70,3 +121,11 @@ NoErr eom + + + + + + + + diff --git a/src/App.Main.s b/src/App.Main.s index 5c3e5e1..cf3a234 100644 --- a/src/App.Main.s +++ b/src/App.Main.s @@ -33,6 +33,7 @@ KBD_REG equ $E0C000 KBD_STROBE_REG equ $E0C010 VBL_STATE_REG equ $E0C019 +SHADOW_SCREEN equ $012000 SHR_SCREEN equ $E12000 SHR_SCB equ $E19D00 @@ -292,7 +293,13 @@ DoFrame ; Set the Y-Position within the virtual buffer lda #0 ; Set the virtual Y-position - jsr SetYPos + jsr SetBG0YPos + + lda #0 ; Set the virtual X-position + jsr SetBG0XPos + + jsr Render ; Render the play field + rts ; Just load the screen width here. This is not semantically right; we actually are taking the nummber ; of tiles in the width of the playfield, multiplying by two to get the number of words and then @@ -333,11 +340,8 @@ DoFrame jsr SetConst rep #$30 - ldy #$7000 ; Set the return after line 200 (Bank 13, line 8) - jsr SetReturn - -; jsr BltDispatch ; Execute the blit - +; ldy #$7000 ; Set the return after line 200 (Bank 13, line 8) +; jsr SetReturn plb ; set the bank back to the code field ldx ScreenWidth ; This is the word to exit from @@ -640,7 +644,9 @@ qtRec adrl $0000 put App.Init.s put App.Msg.s put font.s + put Render.s put blitter/Blitter.s + put blitter/Horz.s put blitter/PEISlammer.s put blitter/Tables.s put blitter/Template.s @@ -677,6 +683,14 @@ qtRec adrl $0000 + + + + + + + + diff --git a/src/Render.s b/src/Render.s index e15bb2a..db6784f 100644 --- a/src/Render.s +++ b/src/Render.s @@ -46,12 +46,14 @@ ; edges of the rendered play field. +; The render function is the point of committment -- most of the APIs that set sprintes and +; update coordinates are lazy; they simply save the value and set a dirty flag in the +; DirtyBits word. +; +; This function examines the dirty bits and actually performs the work to update the code field +; and internal data structure to properly render the play field. Then the update pipeline is +; executed. Render - jsr ShadowOff - jsr ShadowOn rts - - - diff --git a/src/blitter/Blitter.s b/src/blitter/Blitter.s index 6914efd..c3631cb 100644 --- a/src/blitter/Blitter.s +++ b/src/blitter/Blitter.s @@ -8,117 +8,76 @@ ; The lines are based on the appearance of lines in the play field, so blitting lines 0 through ; 19 will draw the first 20 lines on the play field, regardless of where the playfield is physically ; on the SHR screen or the current value of StartY -exit_ptr equ tmp0 +exit_ptr equ tmp0 +jmp_low_save equ tmp2 BltRange - clc` + clc` - tya ; Get the address of the line that we want to return from - adc StartY ; and create a pointer to it - asl - tay - lda BTableLow,y - sta exit_ptr - lda BTableHigh,y - sta exit_ptr+2 + tya ; Get the address of the line that we want to return from + adc StartY ; and create a pointer to it + asl + tay + lda BTableLow,y + sta exit_ptr + lda BTableHigh,y + sta exit_ptr+2 - txa ; get the first line (0 - 199) - adc StartY ; add in the virtual offset (0, 207) -- max value of 406 - asl - tax ; this is the offset into the blitter table + txa ; get the first line (0 - 199) + adc StartY ; add in the virtual offset (0, 207) -- max value of 406 + asl + tax ; this is the offset into the blitter table - sep #$20 ; 8-bit Acc - lda BTableHigh,x ; patch in the bank - sta blt_entry+3 + sep #$20 ; 8-bit Acc + lda BTableHigh,x ; patch in the bank + sta blt_entry+3 - lda BTableLow+1,x ; patch in the page - sta blt_entry+2 + lda BTableLow+1,x ; patch in the page + sta blt_entry+2 ; The way we patch the exit code is subtle, but very fast. The CODE_EXIT offset points to ; an JMP/JML instruction that transitions to the next line after all of the code has been -; executed. Since every code field line is bank-aligned, we know that the low-byte of the -; operand is always $00. +; executed. ; ; The trick we use is to patch the low byte to force the code to jump to a special return -; function (jml blt_return) in the *next* code field line. When it's time to restore the -; code, we can unconditionally store a $00 value to set things back to normal. -; -; This is the ideal situation -- patch/restore in a single 8-bit lda #imm / sta instruction -; pair with no need to preserve the data +; function (jml blt_return) in the *next* code field line. - ldy #CODE_EXIT+1 ; this is a JMP or JML instruction that points to the next line. - lda #FULL_RETURN ; this is the offset of the return code - sta [exit_ptr],y ; patch out the low byte of the JMP/JML - rep #$20 + ldy #CODE_EXIT+1 ; this is a JMP or JML instruction that points to the next line. + lda [exit_ptr],y + sta jmp_low_save + lda #FULL_RETURN ; this is the offset of the return code + sta [exit_ptr],y ; patch out the low byte of the JMP/JML ; Now we need to set up the Bank, Stack Pointer and Direct Page registers for calling into ; the code field - pei BG1DataBank-1 ; Set the data bank for BG1 data - plb - plb - - phd ; Save the application direct page - lda BlitterDP ; Set the direct page to the blitter data - tcd - - sei ; disable interrupts - _R0W1 - tsc ; save the stack pointer - stal stk_save+1 - -blt_entry jml $000000 ; Jump into the blitter code $XX/YYZZ - -blt_return _R0W0 -stk_save lda #0000 ; load the stack - tcs - cli ; re-enable interrupts - pld ; restore the direct page - - sep #$20 - ldy #CODE_EXIT+1 - lda #00 - sta [exit_ptr],y - rep #$20 - - rts - -; This subroutine is used to set up the BltDispatch code based on the current state of -; the machine and/or the state of the engine. The tasks it performs are -; -; 1. Set the blt_entry low byte based on the graphics engine configuration -BltSetup - sep #$20 ; Only need 8-bits for this - lda EngineMode - bit #$01 ; Are both background layers enabled? - beq :oneLyr - lda #entry_2-base - bra :twoLyr -:oneLyr lda #entry_3-base -:twoLyr sta blt_entry+1 ; set the low byte of the JML - rep #$20 - rts - - - - - - - - - - - - - - - - - + lda BG1DataBank ; Set the data bank for BG1 data + pha + plb + rep #$20 + phd ; Save the application direct page + lda BlitterDP ; Set the direct page to the blitter data + tcd + sei ; disable interrupts + _R0W1 + tsc ; save the stack pointer + stal stk_save+1 +blt_entry jml $000000 ; Jump into the blitter code $XX/YY00 +blt_return _R0W0 +stk_save lda #0000 ; load the stack + tcs + cli ; re-enable interrupts + pld ; restore the direct page + sep #$20 + ldy #CODE_EXIT+1 + lda jmp_low_save + sta [exit_ptr],y + rep #$20 + rts diff --git a/src/blitter/DirectPage.s b/src/blitter/DirectPage.s index d636032..3a50634 100644 --- a/src/blitter/DirectPage.s +++ b/src/blitter/DirectPage.s @@ -8,15 +8,23 @@ ScreenX1 equ 10 ScreenTileHeight equ 12 ; Height of the playfield in 8x8 blocks ScreenTileWidth equ 14 ; Width of the playfield in 8x8 blocks -StartY equ 16 ; Which code buffer line displays first on screen. Range = 0 to 207 -EngineMode equ 18 ; Defined the mode/capabilities that are enabled +StartX equ 16 ; Which code buffer byte is the left edge of the screen. Range = 0 to 167 +StartY equ 18 ; Which code buffer line is the top of the screen. Range = 0 to 207 +EngineMode equ 20 ; Defined the mode/capabilities that are enabled ; bit 0: 0 = Single Background, 1 = Parallax -DirtyBits equ 20 ; Identify values that have changed between frames +DirtyBits equ 22 ; Identify values that have changed between frames -BG1DataBank equ 22 ; Data bank that holds BG1 layer data -BlitterDP equ 23 ; Direct page address the holder blitter data +BG1DataBank equ 24 ; Data bank that holds BG1 layer data +BlitterDP equ 25 ; Direct page address the holder blitter data -bstk equ 224 ; 16-byte stack to push bank addresses +OldStartX equ 26 +OldStartY equ 28 + +bstk equ 208 ; 16-byte stack to push bank addresses + +tmp8 equ 224 +tmp9 equ 226 +tmp10 equ 228 tmp0 equ 240 ; 16 bytes of temporary space to be used as scratch tmp1 equ 242 @@ -35,3 +43,8 @@ DIRTY_BIT_BG0_Y equ $0002 + + + + + diff --git a/src/blitter/Horz.s b/src/blitter/Horz.s new file mode 100644 index 0000000..10b01e8 --- /dev/null +++ b/src/blitter/Horz.s @@ -0,0 +1,339 @@ +; Subroutines that deal with the horizontal scrolling. The primary function of +; these routines are to adjust tables and patch in new values into the code field +; when the virtual X-position of the play field changes. + + +; SetBG0XPos +; +; Set the virtual horizontal position of the primary background layer. In addition to +; updating the direct page state locations, this routine needs to preserve the original +; value as well. This is a bit subtle, because if this routine is called multiple times +; with different values, we need to make sure the *original* value is preserved and not +; continuously overwrite it. +; +; We assume that there is a clean code field in this routine +SetBG0XPos + cmp StartX + beq :out ; Easy, if nothing changed, then nothing changes + + ldx StartX ; Load the old value (but don't save it yet) + sta StartX ; Save the new position + + lda #DIRTY_BIT_BG0_X + tsb DirtyBits ; Check if the value is already dirty, if so exit + bne :out ; without overwriting the original value + + stx OldStartX ; First change, so preserve the value +:out rts + +; Based on the current value of StartX in the direct page, patch up the code fields +; to render the correct data. Note that we do *not* do the OpcodeRestore in this +; routine. The reason is that the restore *must* be applied using the (StartX, StartY) +; values from the previous frame, which requires logic that is not relevant to setting +; up the code field. +_ApplyBG0XPos + +:virt_line equ tmp1 +:lines_left equ tmp2 +:draw_count equ tmp3 +:exit_offset equ tmp4 +:entry_offset equ tmp5 +:exit_bra equ tmp6 +:exit_address equ tmp7 +:base_address equ tmp8 +:draw_count_x2 equ tmp9 + +; This code is fairly succinct. See the corresponding code in Vert.s for more detailed comments. + + lda StartY ; This is the base line of the virtual screen + sta :virt_line ; Keep track of it + + lda ScreenHeight + sta :lines_left + +; Calculate the exit and entry offsets into the code fields. This is a bit tricky, because odd-aligned +; rendering causes the left and right edges to move in a staggered fashion. +; +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | 04 | 06 | 08 | 0A | 0C | | 44 | 46 | 48 | 4A | +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | | +; +---- screen width --------------+ +; entry | | exit +; +; Here is an example of a screen 64 bytes wide. When everything is aligned to an even offset +; then the entry point is column $08 and the exit point is column $48 +; +; If we move the screen forward one byte (which means the pointers move backwards) then the low-byte +; of column $06 will be on the right edge of the screen and the high-byte of column $46 will left-edge +; of the screen. Since the one-byte edges are handled specially, the exit point shifts one column, but +; the entry point does not. +; +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | 04 | 06 | 08 | 0A | 0C | | 44 | 46 | 48 | 4A | +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | | | | +; +--|------ screen width -------|--+ +; entry | | exit +; +; When the screen is moved one more byte forward, then the entry point will move to the +; next column. +; +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | 04 | 06 | 08 | 0A | 0C | | 44 | 46 | 48 | 4A | +; ... +----+----+----+----+----+- ... -+----+----+----+----+----+ +; | | +; +------ screen width ------------+ +; entry | | exit +; +; So, in short, the entry tile position is rounded up from the x-position and the exit +; tile position is rounded down. + + lda StartX ; This is the starting byte offset (0 - 163) + inc ; round up to calculate the entry column + and #$FFFE + tax + lda Col2CodeOffset,X ; This is an offset from the base page boundary + sta :entry_offset + + lda StartX ; Repeat with adding the screen width + clc ; to calculate the exit column + adc ScreenWidth + bit #$0001 ; Check if odd or even + bne :isOdd + + and #$FFFE + tax + lda CodeFieldEvenBRA,x + sta :exit_bra + bra :wasEven +:isOdd + and #$FFFE + tax + lda CodeFieldOddBRA,x + sta :exit_bra +:wasEven + lda Col2CodeOffset,X + sta :exit_offset + +; Main loop that +; +; 1. Saves the opcodes in the code field +; 2. Writes the BRA instruction to exit the code field +; 3. Writes the JMP entry point to enter the code field + +:loop + lda :virt_line + asl ; This will clear the carry bit + tax + ldal BTableLow,x ; Get the address of the first code field line + tay ; Save it to use as the base address + adc :exit_offset ; Add some offsets to get the base address in the code field line + sta :exit_address + sty :base_address + + sep #$20 + ldal BTableHigh,x + pha + plb ; This is the bank that will receive the updates + rep #$20 + + lda :virt_line + and #$000F + eor #$FFFF + inc + clc + adc #16 + min :lines_left + + sta :draw_count ; Do this many lines + asl + sta :draw_count_x2 + +; First step is to set the BRA instruction to exit the code field at the proper location. There +; are two sub-steps to do here; we need to save the 16-bit value that exists at the location and +; then overwrite it with the branch instruction. +; +; Special note, the SaveOpcode function stores the opcode *within* the code field as it is +; used in odd-aligned cases to determine how to draw the 8-bit value on the left edge of the +; screen + + ; y is already set to :base_address + tax ; :draw_count_x2 + lda :exit_address ; Save from this location + jsr SaveOpcode + + ldx :draw_count_x2 ; Do this many lines + lda :exit_bra ; Copy this value into all of the lines + ldy :exit_address ; starting at this address + jsr SetConst + +; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an +; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly + + sep #$20 + ldx :draw_count_x2 + lda :entry_offset + ldy :base_address + jsr SetCodeEntry + rep #$20 + +; Do the end of the loop -- update the virtual line counter and reduce the number +; of lines left to render + + lda :virt_line ; advance to the virtual line after the segment we just + clc ; filled in + adc :draw_count + sta :virt_line + + lda :lines_left ; subtract the number of lines we just completed + sec + sbc :draw_count + sta :lines_left + + jne :loop + + phk + plb + rts + +; SaveOpcode +; +; Save the values to the restore location. This should only be used to patch the +; code field since the save location is fixed. +; +; X = number of lines * 2, 0 to 32 +; Y = starting line * $1000 +; A = code field location * $1000 +SaveOpcode + jmp (:tbl,x) + +:tbl da :bottom + da :do01,:do02,:do03,:do04 + da :do05,:do06,:do07,:do08 + da :do09,:do10,:do11,:do12 + da :do13,:do14,:do15,:do16 + +:do15 tax + bra :x15 +:do14 tax + bra :x14 +:do13 tax + bra :x13 +:do12 tax + bra :x12 +:do11 tax + bra :x11 +:do10 tax + bra :x10 +:do09 tax + bra :x09 +:do08 tax + bra :x08 +:do07 tax + bra :x07 +:do06 tax + bra :x06 +:do05 tax + bra :x05 +:do04 tax + bra :x04 +:do03 tax + bra :x03 +:do02 tax + bra :x02 +:do01 tax + bra :x01 +:do16 tax +:x16 lda $F000,x + sta OPCODE_SAVE+$F000,y +:x15 lda $E000,x + sta OPCODE_SAVE+$E000,y +:x14 lda $D000,x + sta OPCODE_SAVE+$D000,y +:x13 lda $C000,x + sta OPCODE_SAVE+$C000,y +:x12 lda $B000,x + sta OPCODE_SAVE+$B000,y +:x11 lda $A000,x + sta OPCODE_SAVE+$A000,y +:x10 lda $9000,x + sta OPCODE_SAVE+$9000,y +:x09 lda $8000,x + sta OPCODE_SAVE+$8000,y +:x08 lda $7000,x + sta OPCODE_SAVE+$7000,y +:x07 lda $6000,x + sta OPCODE_SAVE+$6000,y +:x06 lda $5000,x + sta OPCODE_SAVE+$5000,y +:x05 lda $4000,x + sta OPCODE_SAVE+$4000,y +:x04 lda $3000,x + sta OPCODE_SAVE+$3000,y +:x03 lda $2000,x + sta OPCODE_SAVE+$2000,y +:x02 lda $1000,x + sta OPCODE_SAVE+$1000,y +:x01 lda: $0000,x + sta: OPCODE_SAVE+$0000,y +:bottom rts + +; SetCodeEntry +; +; Patch in the low byte at the CODE_ENTRY. Must be called with 8-bit accumulator +; +; X = number of lines * 2, 0 to 32 +; Y = starting line * $1000 +; A = address low byte +SetCodeEntry + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-12,:bottom-15,:bottom-18,:bottom-21 + da :bottom-24,:bottom-27,:bottom-30,:bottom-33 + da :bottom-36,:bottom-39,:bottom-42,:bottom-45 + da :bottom-48 +:top sta CODE_ENTRY+$F000,y + sta CODE_ENTRY+$E000,y + sta CODE_ENTRY+$D000,y + sta CODE_ENTRY+$C000,y + sta CODE_ENTRY+$B000,y + sta CODE_ENTRY+$A000,y + sta CODE_ENTRY+$9000,y + sta CODE_ENTRY+$8000,y + sta CODE_ENTRY+$7000,y + sta CODE_ENTRY+$6000,y + sta CODE_ENTRY+$5000,y + sta CODE_ENTRY+$4000,y + sta CODE_ENTRY+$3000,y + sta CODE_ENTRY+$2000,y + sta CODE_ENTRY+$1000,y + sta: CODE_ENTRY+$0000,y +:bottom rts + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/blitter/Tables.s b/src/blitter/Tables.s index 6b88a8a..d030ae1 100644 --- a/src/blitter/Tables.s +++ b/src/blitter/Tables.s @@ -14,7 +14,7 @@ ; ldx Col2CodeOffset,y ; sta $0001,x ; -; This table is necessary, because due to the data being draw via stack instructions, the +; This table is necessary, because due to the data being drawn via stack instructions, the ; tile order is reversed. PER_TILE_SIZE equ 3 @@ -216,8 +216,3 @@ BlitBuff ds 4*13 BTableHigh ds 208*2*2 BTableLow ds 208*2*2 - - - - - diff --git a/src/blitter/Template.s b/src/blitter/Template.s index fa98e4a..c328bc8 100644 --- a/src/blitter/Template.s +++ b/src/blitter/Template.s @@ -2,17 +2,21 @@ mx %00 -DP_ADDR equ entry_1-base+1 ; offset to patch in the direct page for dynamic tiles -BG1_ADDR equ entry_2-base+1 ; offset to patch in the Y-reg for BG1 (dp),y addressing -STK_ADDR equ entry_3-base+1 ; offset to patch in the stack (SHR) right edge address +DP_ADDR equ entry_1-base+1 ; offset to patch in the direct page for dynamic tiles +BG1_ADDR equ entry_2-base+1 ; offset to patch in the Y-reg for BG1 (dp),y addressing +STK_ADDR equ entry_3-base+1 ; offset to patch in the stack (SHR) right edge address -CODE_ENTRY equ entry_jmp-base+1 ; low byte of the page-aligned jump address +DP_ENTRY equ entry_1-base +TWO_LYR_ENTRY equ entry_2-base +ONE_LYR_ENTRY equ entry_3-base + +CODE_ENTRY equ entry_jmp-base+1 ; low byte of the page-aligned jump address CODE_TOP equ loop-base CODE_LEN equ top-base CODE_EXIT equ even_exit-base -OPCODE_SAVE equ odd_exit-base+1 ; spot to save the code field opcode when patching exit BRA -FULL_RETURN equ full_return-base ; offset that returns from the blitter -ENABLE_INT equ enable_int-base ; offset that re-enable interrupts and continues +OPCODE_SAVE equ odd_exit-base+1 ; spot to save the code field opcode when patching exit BRA +FULL_RETURN equ full_return-base ; offset that returns from the blitter +ENABLE_INT equ enable_int-base ; offset that re-enable interrupts and continues LINES_PER_BANK equ 16 ; Locations that need the page offset added @@ -56,17 +60,17 @@ BankPatchNum equ *-BankPatches ; usually only be executed once during app initialization. It doesn't get called ; with any significant frequency. -SetScreenRect sty ScreenHeight ; Save the screen height and width +SetScreenRect sty ScreenHeight ; Save the screen height and width stx ScreenWidth - tax ; Temp save of the accumulator + tax ; Temp save of the accumulator and #$00FF sta ScreenY0 clc adc ScreenHeight sta ScreenY1 - txa ; Restore the accumulator + txa ; Restore the accumulator xba and #$00FF sta ScreenX0 @@ -74,31 +78,31 @@ SetScreenRect sty ScreenHeight ; Save the screen height adc ScreenWidth sta ScreenX1 - lda ScreenHeight ; Divide the height in scanlines by 8 to get the number tiles + lda ScreenHeight ; Divide the height in scanlines by 8 to get the number tiles lsr lsr lsr sta ScreenTileHeight - lda ScreenWidth ; Divide width in bytes by 4 to get the number of tiles + lda ScreenWidth ; Divide width in bytes by 4 to get the number of tiles lsr lsr sta ScreenTileWidth - lda ScreenY0 ; Calculate the address of the first byte - asl ; of the right side of the playfield + lda ScreenY0 ; Calculate the address of the first byte + asl ; of the right side of the playfield tax - lda ScreenAddr,x ; This is the address for the left edge of the physical screen + lda ScreenAddr,x ; This is the address for the left edge of the physical screen clc adc ScreenX1 dec - pha ; Save for second loop + pha ; Save for second loop ldx #0 ldy ScreenHeight jsr :loop - pla ; Reset the address and continue filling in the - ldy ScreenHeight ; second half of the table + pla ; Reset the address and continue filling in the + ldy ScreenHeight ; second half of the table :loop clc sta RTable,x adc #160 @@ -113,7 +117,7 @@ FillScreen lda #0 jsr ClearToColor ldy ScreenY0 -]yloop +:yloop tya asl a tax @@ -127,16 +131,16 @@ FillScreen lda #0 lsr tay lda #$FFFF -]xloop stal $E10000,x +:xloop stal SHR_SCREEN,x inx inx dey - bne ]xloop + bne :xloop ply iny cpy ScreenY1 - bcc ]yloop + bcc :yloop rts ; Set the starting line of the virtual buffer that will be displayed on the first physical line @@ -194,22 +198,10 @@ FillScreen lda #0 ; Input: A = line number [0, 207] ; Output: A = low word, X = high word GetBlitLineAddress - pha ; save the value - - and #$FFF0 ; Divide by 16 to get the bank number of this line and - lsr ; then multiply by 4 to get the offset. So just divide by 4. - lsr - tax - lda BlitBuff+2,x ; This is the high word of the bank address - tax - - pla ; Pop the value and multiply the lower 4 bits by 4096 to get - and #$000F ; the line offset within the bank - xba asl - asl - asl - asl ; This is the page of the line + tay + lda BTableLow,y + ldx BTableHigh,y rts @@ -217,25 +209,25 @@ lines_left ds 2 start_mod_16 ds 2 tblptr ds 2 stksave ds 2 -SetYPos sta StartY ; Save the position +SetYPos sta StartY ; Save the position lda ScreenHeight sta lines_left - lda StartY ; Now figure out exactly how many banks we cross by - and #$000F ; calculating ((StartY % 16) + ScreenHeight) / 16 + lda StartY ; Now figure out exactly how many banks we cross by + and #$000F ; calculating ((StartY % 16) + ScreenHeight) / 16 sta start_mod_16 clc adc ScreenHeight - and #$00F0 ; Just keep the relevant nibble + and #$00F0 ; Just keep the relevant nibble lsr lsr lsr - tax ; Keep the value pre-multiplied by 2 + tax ; Keep the value pre-multiplied by 2 ldy #0 - jsr PushBanks ; Push the bank bytes on the stack + jsr PushBanks ; Push the bank bytes on the stack brl :out ; Start of the main body of the function. We need to get a pointer to the correct offset of @@ -253,23 +245,23 @@ SetYPos sta StartY ; Save the position :prologue lda start_mod_16 beq :body - _Mul4096 ; Save the offset into the code bank of the - tay ; first line. + _Mul4096 ; Save the offset into the code bank of the + tay ; first line. - lda #16 ; Now figure out how many lines to execute. Usually - sec ; this will just be the lines to the end of the code - sbc start_mod_16 ; bank, but if the total screen height is smaller than - cmp ScreenHeight ; the number of lines in the code bank, we need to clamp - bcc :min_1 ; the maximum value + lda #16 ; Now figure out how many lines to execute. Usually + sec ; this will just be the lines to the end of the code + sbc start_mod_16 ; bank, but if the total screen height is smaller than + cmp ScreenHeight ; the number of lines in the code bank, we need to clamp + bcc :min_1 ; the maximum value lda ScreenHeight -:min_1 sta tmp4 ; save for updating the counters +:min_1 sta tmp4 ; save for updating the counters asl - tax ; do this many lines - lda tblptr ; starting at this address + tax ; do this many lines + lda tblptr ; starting at this address - plb ; Set the code field bank - jsr CopyFromArray2 ; Copy the right screen edge addresses + plb ; Set the code field bank + jsr CopyFromArray2 ; Copy the right screen edge addresses lda lines_left sec @@ -289,8 +281,8 @@ SetYPos sta StartY ; Save the position ldy #0 ldx tblptr -:body0 plb ; Set the code field bank - jsr CopyFromArray2Top ; to bypass the need to set the X register +:body0 plb ; Set the code field bank + jsr CopyFromArray2Top ; to bypass the need to set the X register txa clc @@ -302,22 +294,22 @@ SetYPos sta StartY ; Save the position sbc #16 sta lines_left - cmp #16 ; Repeat the test here to we can skip some - bcs :body0 ; redundant setup and spill the X register - stx tblptr ; back into tblptr when done + cmp #16 ; Repeat the test here to we can skip some + bcs :body0 ; redundant setup and spill the X register + stx tblptr ; back into tblptr when done :epilogue lda lines_left beq :out - asl ; Y is still zero + asl ; Y is still zero tax lda tblptr - plb ; Set the code field bank - jsr CopyFromArray2 ; to bypass the need to set the X register + plb ; Set the code field bank + jsr CopyFromArray2 ; to bypass the need to set the X register -:out lda stksave ; put the stack back +:out lda stksave ; put the stack back tcs - phk ; Need to restore the current bank + phk ; Need to restore the current bank plb rts @@ -374,11 +366,11 @@ Mod208 cmp #%1101000000000000 ; Patch out the final JMP to jump to the long JML return code ; ; Y = starting line * $1000 -SetReturn lda #$0280 ; BRA *+4 +SetReturn lda #$0280 ; BRA *+4 sta CODE_EXIT,y rts -ResetReturn lda #$004C ; JMP $XX00 +ResetReturn lda #$004C ; JMP $XX00 sta CODE_EXIT,y rts @@ -389,7 +381,7 @@ SetNextLine lda #$F000+{entry_3-base} jmp SetAbsAddrs ; Copy a series of bank bytes onto the direct page, which we will later point the stack -; at, and are use to iterate among the different code banks. +; at and use to iterate among the different code banks. ; ; Y = starting index * 4 ; X = number of bank @@ -400,7 +392,7 @@ PushBanks sep #$20 da :bottom-25,:bottom-30,:bottom-35,:bottom-40 da :bottom-45,:bottom-50,:bottom-55,:bottom-60 da :bottom-65 -:top lda: BlitBuff+48,y ; These are all 8-bit loads and stores +:top lda: BlitBuff+48,y ; These are all 8-bit loads and stores sta bstk+13 lda: BlitBuff+44,y sta bstk+12 @@ -446,7 +438,7 @@ PushBanks sep #$20 ; A = value ; ; Set M to 0 or 1 -SetConst ; Need a blnk line here, otherwise the :tbl local variable resolveds backwards +SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards jmp (:tbl,x) :tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 da :bottom-12,:bottom-15,:bottom-18,:bottom-21 @@ -479,49 +471,77 @@ SetConst ; Need a blnk line here, ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = store location * $1000 -SaveOpcode pha ; save the accumulator - ldal :tbl,x - dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump +SaveOpcode0 + jmp (:tbl,x) -:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 - da :bottom-24,:bottom-30,:bottom-36,:bottom-42 - da :bottom-48,:bottom-54,:bottom-60,:bottom-66 - da :bottom-72,:bottom-78,:bottom-84,:bottom-90 - da :bottom-96 -:top lda $F000,y +:tbl da :bottom + da :do01,:do02,:do03,:do04 + da :do05,:do06,:do07,:do08 + da :do09,:do10,:do11,:do12 + da :do13,:do14,:do15,:do16 + +:do15 tax + bra :x15 +:do14 tax + bra :x14 +:do13 tax + bra :x13 +:do12 tax + bra :x12 +:do11 tax + bra :x11 +:do10 tax + bra :x10 +:do09 tax + bra :x09 +:do08 tax + bra :x08 +:do07 tax + bra :x07 +:do06 tax + bra :x06 +:do05 tax + bra :x05 +:do04 tax + bra :x04 +:do03 tax + bra :x03 +:do02 tax + bra :x02 +:do01 tax + bra :x01 +:do16 tax +:x16 lda $F000,y sta $F000,x - lda $E000,y +:x15 lda $E000,y sta $E000,x - lda $D000,y +:x14 lda $D000,y sta $D000,x - lda $C000,y +:x13 lda $C000,y sta $C000,x - lda $B000,y +:x12 lda $B000,y sta $B000,x - lda $A000,y +:x11 lda $A000,y sta $A000,x - lda $9000,y +:x10 lda $9000,y sta $9000,x - lda $8000,y +:x09 lda $8000,y sta $8000,x - lda $7000,y +:x08 lda $7000,y sta $7000,x - lda $6000,y +:x07 lda $6000,y sta $6000,x - lda $5000,y +:x06 lda $5000,y sta $5000,x - lda $4000,y +:x05 lda $4000,y sta $4000,x - lda $3000,y +:x04 lda $3000,y sta $3000,x - lda $2000,y +:x03 lda $2000,y sta $2000,x - lda $1000,y +:x02 lda $1000,y sta $1000,x - lda: $0000,y +:x01 lda: $0000,y sta: $0000,x :bottom rts @@ -533,50 +553,77 @@ SaveOpcode pha ; save the accumulator ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = store location * $1000 -RestoreOpcode pha ; save the accumulator - ldal :tbl,x - dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump +RestoreOpcode + jmp (:tbl,x) -:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 - da :bottom-24,:bottom-30,:bottom-36,:bottom-42 - da :bottom-48,:bottom-54,:bottom-60,:bottom-66 - da :bottom-72,:bottom-78,:bottom-84,:bottom-90 - da :bottom-96 +:tbl da :bottom + da :do01,:do02,:do03,:do04 + da :do05,:do06,:do07,:do08 + da :do09,:do10,:do11,:do12 + da :do13,:do14,:do15,:do16 -:top lda $F000,x +:do15 tax + bra :x15 +:do14 tax + bra :x14 +:do13 tax + bra :x13 +:do12 tax + bra :x12 +:do11 tax + bra :x11 +:do10 tax + bra :x10 +:do09 tax + bra :x09 +:do08 tax + bra :x08 +:do07 tax + bra :x07 +:do06 tax + bra :x06 +:do05 tax + bra :x05 +:do04 tax + bra :x04 +:do03 tax + bra :x03 +:do02 tax + bra :x02 +:do01 tax + bra :x01 +:do16 tax +:x16 lda $F000,x sta $F000,y - lda $E000,x +:x15 lda $E000,x sta $E000,y - lda $D000,x +:x14 lda $D000,x sta $D000,y - lda $C000,x +:x13 lda $C000,x sta $C000,y - lda $B000,x +:x12 lda $B000,x sta $B000,y - lda $A000,x +:x11 lda $A000,x sta $A000,y - lda $9000,x +:x10 lda $9000,x sta $9000,y - lda $8000,x +:x09 lda $8000,x sta $8000,y - lda $7000,x +:x08 lda $7000,x sta $7000,y - lda $6000,x +:x07 lda $6000,x sta $6000,y - lda $5000,x +:x06 lda $5000,x sta $5000,y - lda $4000,x +:x05 lda $4000,x sta $4000,y - lda $3000,x +:x04 lda $3000,x sta $3000,y - lda $2000,x +:x03 lda $2000,x sta $2000,y - lda $1000,x +:x02 lda $1000,x sta $1000,y - lda: $0000,x +:x01 lda: $0000,x sta: $0000,y :bottom rts @@ -587,12 +634,12 @@ RestoreOpcode pha ; save the accumulator ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = array address -CopyFromArray2 pha ; save the accumulator +CopyFromArray2 pha ; save the accumulator ldal :tbl,x dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump + plx ; put the accumulator into X + pha ; push the address into the stack + rts ; and jump :tbl da bottomCFA2-00,bottomCFA2-06,bottomCFA2-12,bottomCFA2-18 da bottomCFA2-24,bottomCFA2-30,bottomCFA2-36,bottomCFA2-42 @@ -729,8 +776,8 @@ SetAbsAddrs sec sta: $0000,y :bottom rts -; Full up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need -; a total of 13 banks to hold the 208 lines to full-screen support +; Fill up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need +; a total of 13 banks to hold the 208 lines for full-screen support ; ; A = high word of bank table ; Y = index * 4 of the bank to initialize @@ -747,13 +794,13 @@ BuildBank lda [bankArray],y sta target+2 - iny ; move to the next item + iny ; move to the next item iny - iny ; middle byte - cpy #4*13 ; if greater than the array length, wrap back to zero + iny ; middle byte + cpy #4*13 ; if greater than the array length, wrap back to zero bcc :ok ldy #1 -:ok lda [bankArray],y ; Get the middle and high bytes of the address +:ok lda [bankArray],y ; Get the middle and high bytes of the address sta nextBank :next @@ -769,13 +816,13 @@ BuildBank plb plb - lda #$F000+{entry_3-base} ; Set the address from each line to the next + lda #$F000+{ONE_LYR_ENTRY} ; Set the address from each line to the next ldy #CODE_EXIT+1 ldx #15*2 jsr SetAbsAddrs - ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank - lda #{$005C+{entry_3-base}*256} + ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank + lda #{$005C+{ONE_LYR_ENTRY}*256} sta [target],y ldy #$F000+CODE_EXIT+2 lda nextBank @@ -794,7 +841,7 @@ BuildLine sta target+2 BuildLine2 - lda #CODE_LEN ; round up to an even number of bytes + lda #CODE_LEN ; round up to an even number of bytes inc and #$FFFE beq :nocopy @@ -808,11 +855,11 @@ BuildLine2 dey bpl :loop -:nocopy lda #0 ; copy is complete, now patch up the addresses +:nocopy lda #0 ; copy is complete, now patch up the addresses sep #$20 ldx #0 - lda target+2 ; patch in the bank for the absolute long addressing mode + lda target+2 ; patch in the bank for the absolute long addressing mode :dobank ldy BankPatches,x sta [target],y inx @@ -821,7 +868,7 @@ BuildLine2 bcc :dobank ldx #0 -:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each +:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each lda [target],y clc adc target+1 @@ -841,71 +888,71 @@ BuildLine2 ; ; The 'base' location is always assumed to be on a 4kb ($1000) boundary base -entry_1 ldx #0000 ; Used for LDA 00,x addressing -entry_2 ldy #0000 ; Used for LDA (00),y addressing -entry_3 lda #0000 ; Sets screen address (right edge) +entry_1 ldx #0000 ; Used for LDA 00,x addressing +entry_2 ldy #0000 ; Used for LDA (00),y addressing +entry_3 lda #0000 ; Sets screen address (right edge) tcs long_0 entry_jmp jmp $0100 - dfb $00 ; if the screen is odd-aligned, then the opcode is set to - ; $AF to convert to a LDA long instruction. This puts the - ; first two bytes of the instruction field in the accumulator - ; and falls through to the next instruction. + dfb $00 ; if the screen is odd-aligned, then the opcode is set to + ; $AF to convert to a LDA long instruction. This puts the + ; first two bytes of the instruction field in the accumulator + ; and falls through to the next instruction. - ; We structure the line so that the entry point only needs to - ; update the low-byte of the address, the means it takes only - ; an amortized 4-cycles per line to set the entry point break + ; We structure the line so that the entry point only needs to + ; update the low-byte of the address, the means it takes only + ; an amortized 4-cycles per line to set the entry point break -right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction - beq r_is_pea ; This costs 6 cycles in the fast-path +right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction + beq r_is_pea ; This costs 6 cycles in the fast-path - bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants + bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants bne r_is_jmp long_1 stal *+4-base - dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path - ; gets a 1-cycle penalty, but we save 3 cycles here. + dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path + ; gets a 1-cycle penalty, but we save 3 cycles here. -r_is_pea xba ; fast code for PEA +r_is_pea xba ; fast code for PEA sep #$30 pha rep #$30 -odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the - ; code field. This is OK, even if the entry point was the - ; last instruction, because there is a JMP at the end of - ; the code field, so the code will simply jump to that - ; instruction directly. - ; - ; As with the original entry point, because all of the - ; code field is page-aligned, only the low byte needs to - ; be updated when the scroll position changes +odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the + ; code field. This is OK, even if the entry point was the + ; last instruction, because there is a JMP at the end of + ; the code field, so the code will simply jump to that + ; instruction directly. + ; + ; As with the original entry point, because all of the + ; code field is page-aligned, only the low byte needs to + ; be updated when the scroll position changes -r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte +r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte long_2 ldal entry_jmp+1-base long_3 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) ; The next labels are special, in that they are entry points into special subroutines. They are special ; because they are within the first 256 bytes of each code field, which allows them to be selectable ; by patching the low byte of the JMP instructions. ; Return to caller -- the even_exit JMP from the previous line will jump here when a render is complete -full_return jml blt_return ; Full exit +full_return jml blt_return ; Full exit -; Re-enable interrupts and contniue -- the even_exit JMP fro the previous line will jump here every +; Re-enable interrupts and continue -- the even_exit JMP from the previous line will jump here every ; 8 or 16 lines in order to give the system some extra time to handle interrupts. -enable_int ldal stk_save ; restore the stack +enable_int ldal stk_save ; restore the stack tcs - sep #$20 ; 8-bit mode - ldal STATE_REG ; Read Bank 0 / Write Bank 0 + sep #$20 ; 8-bit mode + ldal STATE_REG ; Read Bank 0 / Write Bank 0 and #$CF stal STATE_REG cli - nop ; Give a couple of cycles + nop ; Give a couple of cycles sei ldal STATE_REG - ora #$10 ; Read Bank 0 / Write Bank 1 + ora #$10 ; Read Bank 0 / Write Bank 1 stal STATE_REG rep #$20 bra entry_1 @@ -917,19 +964,19 @@ enable_int ldal stk_save ; restore the stack ; page-crossing penalty of the branch. ds 166 -loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is -loop_exit_2 jmp even_exit-base ; +3 odd-aligned +loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is +loop_exit_2 jmp even_exit-base ; +3 odd-aligned -loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes - pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap +loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes + pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap --^ -loop_back jmp loop-base ; +252 Ensure execution continues to loop around -loop_exit_3 jmp even_exit-base ; +255 +loop_back jmp loop-base ; +252 Ensure execution continues to loop around +loop_exit_3 jmp even_exit-base ; +255 -odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field - ; that are replaced by the needed BRA instruction to exit the code field. When the - ; left edge is odd-aligned, we are able to immediately load the value and perform - ; similar logic to the right_odd code path above +odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field + ; that are replaced by the needed BRA instruction to exit the code field. When the + ; left edge is odd-aligned, we are able to immediately load the value and perform + ; similar logic to the right_odd code path above left_odd bit #$000B beq l_is_pea @@ -944,14 +991,14 @@ l_is_pea xba pha rep #$30 bra even_exit -l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte +l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte long_5 ldal entry_jmp+1-base long_6 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) ; JMP opcode = $4C, JML opcode = $5C -even_exit jmp $1000 ; Jump to the next line. - ds 1 ; space so that the last line in a bank can be patched into a JML +even_exit jmp $1000 ; Jump to the next line. + ds 1 ; space so that the last line in a bank can be patched into a JML ; Special epilogue: skip a number of bytes and jump back into the code field. This is useful for ; large, floating panels in the attract mode of a game, or to overlay solid @@ -961,8 +1008,8 @@ epilogue_1 tsc sec sbc #0 tcs - jmp $0000 ; This jumps back into the code field -:out jmp $0000 ; This jumps to the next epilogue chain element + jmp $0000 ; This jumps back into the code field +:out jmp $0000 ; This jumps to the next epilogue chain element ds 1 ; These are the special code snippets -- there is a 1:1 relationship between each snippet space @@ -1029,4 +1076,23 @@ top + + + + + + + + + + + + + + + + + + + diff --git a/src/blitter/Vert.s b/src/blitter/Vert.s index d54c773..e30a9bc 100644 --- a/src/blitter/Vert.s +++ b/src/blitter/Vert.s @@ -8,18 +8,191 @@ ; Set the virtual position of the primary background layer. In addition to ; updating the direct page state locations, this routine needs to SetBG0YPos - cmp StartY - beq :nochange - sta StartY ; Save the position - lda #DIRTY_BIT_BG0_Y ; Mark that it has changed - tsb DirtyBits -:nochange - rts + cmp StartY + beq :out ; Easy, if nothing changed, then nothing changes + + ldx StartY ; Load the old value (but don't save it yet) + sta StartY ; Save the new position + + lda #DIRTY_BIT_BG0_Y + tsb DirtyBits ; Check if the value is already dirty, if so exit + bne :out ; without overwriting the original value + + stx OldStartY ; First change, so preserve the value +:out rts ; Based on the current value of StartY in the direct page. Set up the dispatch -; information so that the BltDispatch driver will render the correct code field -; lines in the the correct order +; information so that the BltRange driver will render the correct code field +; lines in the correct order _ApplyBG0YPos +:rtbl_idx equ tmp0 +:virt_line equ tmp1 +:lines_left equ tmp2 +:draw_count equ tmp3 + +; First task is to fill in the STK_ADDR values by copying them from the RTable array. We +; copy from RTable[i] into BlitField[StartY+i]. As with all of this code, the difficult part +; is decomposing the update across banks + + stz :rtbl_idx ; Start copying from the first entry in the table + + lda StartY ; This is the base line of the virtual screen + sta :virt_line ; Keep track of it + +; copy a range of address from the table into the destination bank. If we restrict ourselves to +; rectangular playfields, this can be optimized to just subtracting a constant value. See the +; Templates::SetScreenAddrs subroutine. + + lda ScreenHeight + sta :lines_left + +; This is the verbose part -- figure out how many lines to draw. We don't want to artificially limit +; the height of the visible screen (for example, doing an animated wipe while scrolling), so the screen +; height could be anything from 1 to 200. +; +; For larger values, we want to break things up on 16-line boundaries based on the virt_line value. So, +; +; draw_count = min(lines_left, (16 - (virt_line % 16)) +; +; Note that almost everything in this loop can be done with 8-bit operations sincc the values are +; all under 200. The one exception is the virt_line value which could exceed 256. This will be +; a later optimization and might save around 10 cycles per iteration, or up to ~120 cycles per frame +; and ~2,500 per secord. This is ~1% of our total CPU budget and is *just* enough cycles to be +; interesting.... Another 8 cycles could be removed by doing all calculatinos pre-multiplied by 2 +; to avoid several 'asl' instructions +:loop + lda :virt_line + asl + tax + ldal BTableLow,x ; Get the address of the first code field line + tay + + sep #$20 + ldal BTableHigh,x + pha + plb ; This is the bank that will receive the updates + rep #$20 + + lda :virt_line + and #$000F + eor #$FFFF + inc + clc + adc #16 + min :lines_left + + sta :draw_count ; Do this many lines + asl + tax + + lda :rtbl_idx ; Read from this location in the RTable + asl + + jsr CopyRTableToStkAddr + + lda :virt_line ; advance to the virtual line after the segment we just + clc ; filled in + adc :draw_count + sta :virt_line + + lda :rtbl_idx ; advance the index into the RTable + adc :draw_count + sta :rtbl_idx + + lda :lines_left ; subtract the number of lines we just completed + sec + sbc :draw_count + sta :lines_left + + jne :loop + + phk + plb + rts + +; Unrolled copy routine to move RTable intries into STK_ADDR position. +; +; A = intect into the RTable array (x2) +; Y = starting line * $1000 +; X = number of lines (x2) +CopyRTableToStkAddr + jmp (:tbl,x) +:tbl da :none + da :do01,:do02,:do03,:do04 + da :do05,:do06,:do07,:do08 + da :do09,:do10,:do11,:do12 + da :do13,:do14,:do15,:do16 +:do15 tax + bra :x15 +:do14 tax + bra :x14 +:do13 tax + bra :x13 +:do12 tax + bra :x12 +:do11 tax + bra :x11 +:do10 tax + bra :x10 +:do09 tax + bra :x09 +:do08 tax + bra :x08 +:do07 tax + bra :x07 +:do06 tax + bra :x06 +:do05 tax + bra :x05 +:do04 tax + bra :x04 +:do03 tax + bra :x03 +:do02 tax + bra :x02 +:do01 tax + bra :x01 +:do16 tax + ldal RTable+30,x + sta STK_ADDR+$F000,y +:x15 ldal RTable+28,x + sta STK_ADDR+$E000,y +:x14 ldal RTable+26,x + sta STK_ADDR+$D000,y +:x13 ldal RTable+24,x + sta: STK_ADDR+$C000,y +:x12 ldal RTable+22,x + sta STK_ADDR+$B000,y +:x11 ldal RTable+20,x + sta STK_ADDR+$A000,y +:x10 ldal RTable+18,x + sta STK_ADDR+$9000,y +:x09 ldal RTable+16,x + sta: STK_ADDR+$8000,y +:x08 ldal RTable+14,x + sta STK_ADDR+$7000,y +:x07 ldal RTable+12,x + sta STK_ADDR+$6000,y +:x06 ldal RTable+10,x + sta STK_ADDR+$5000,y +:x05 ldal RTable+08,x + sta: STK_ADDR+$4000,y +:x04 ldal RTable+06,x + sta STK_ADDR+$3000,y +:x03 ldal RTable+04,x + sta STK_ADDR+$2000,y +:x02 ldal RTable+02,x + sta STK_ADDR+$1000,y +:x01 ldal RTable+00,x + sta: STK_ADDR+$0000,y +:none rts + + + + + + +