diff --git a/src/blitter/Horz.s b/src/blitter/Horz.s index eb7aa5d..496956f 100644 --- a/src/blitter/Horz.s +++ b/src/blitter/Horz.s @@ -33,8 +33,8 @@ _RestoreBG0Opcodes lda LastPatchOffset ; If zero, there are no saved opcodes sta :exit_offset -:loop ldx :virt_line_x2 +:loop ldal BTableLow,x ; Get the address of the first code field line tay @@ -47,8 +47,7 @@ _RestoreBG0Opcodes txa ; lda :virt_line_x2 and #$001E eor #$FFFF - inc - clc + sec adc #32 min :lines_left_x2 sta :draw_count_x2 ; Do half of this many lines @@ -60,12 +59,13 @@ _RestoreBG0Opcodes clc adc :exit_offset ; Add some offsets to get the base address in the code field line - jsr RestoreOpcode + jsr (RestoreOpcode,x) lda :virt_line_x2 ; advance to the virtual line after the segment we just clc ; filled in adc :draw_count_x2 sta :virt_line_x2 + tax lda :lines_left_x2 ; subtract the number of lines we just completed sec @@ -113,16 +113,16 @@ _ApplyBG0XPosPre _ApplyBG0XPos -:virt_line equ tmp1 -:lines_left equ tmp2 -:draw_count equ tmp3 +:stk_save equ tmp0 +:virt_line_x2 equ tmp1 +:lines_left_x2 equ tmp2 +:draw_count_x2 equ tmp3 :exit_offset equ tmp4 :entry_offset equ tmp5 :exit_bra equ tmp6 :exit_address equ tmp7 :base_address equ tmp8 -:draw_count_x2 equ tmp9 -:opcode equ tmp0 +:opcode equ tmp9 :odd_entry_offset equ tmp10 ; If there are saved opcodes that have not been restored, do not run this routine @@ -133,10 +133,12 @@ _ApplyBG0XPos ; This code is fairly succinct. See the corresponding code in Vert.s for more detailed comments. :ok lda StartYMod208 ; This is the base line of the virtual screen - sta :virt_line ; Keep track of it + asl + sta :virt_line_x2 ; Keep track of it lda ScreenHeight - sta :lines_left + asl + sta :lines_left_x2 ; Calculate the exit and entry offsets into the code fields. This is a bit tricky, because odd-aligned ; rendering causes the left and right edges to move in a staggered fashion. @@ -280,32 +282,30 @@ _ApplyBG0XPos ; 3. Writes the JMP entry point to enter the code field phb ; Save the existing bank + tsc + sta :stk_save + :loop - lda :virt_line - asl ; This will clear the carry bit - tax + ldx :virt_line_x2 ldal BTableLow,x ; Get the address of the first code field line tay ; Save it to use as the base address + + clc adc :exit_offset ; Add some offsets to get the base address in the code field line sta :exit_address sty :base_address - sep #$20 ldal BTableHigh,x pha - plb ; This is the bank that will receive the updates - rep #$20 + plb - lda :virt_line - and #$000F + txa + and #$001E eor #$FFFF - inc - clc - adc #16 - min :lines_left + sec + adc #32 + min :lines_left_x2 - sta :draw_count ; Do this many lines - asl sta :draw_count_x2 ; First step is to set the BRA instruction to exit the code field at the proper location. There @@ -317,64 +317,63 @@ _ApplyBG0XPos ; screen ; y is already set to :base_address tax ; :draw_count_x2 - lda :exit_address ; Save from this location - jsr SaveOpcode + clc ; advance to the virtual line after the segment we just + adc :virt_line_x2 ; filled in + sta :virt_line_x2 + lda :exit_address ; Save from this location + jsr (SaveOpcode,x) ; X = :exit_address on return + + txy ; ldy :exit_address -- starting at this address ldx :draw_count_x2 ; Do this many lines lda :exit_bra ; Copy this value into all of the lines - ldy :exit_address ; starting at this address - jsr SetConst + jsr (SetConst,x) ; All registers are preserved ; Next, patch in the CODE_ENTRY value, which is the low byte of a JMP instruction. This is an ; 8-bit operation and, since the PEA code is bank aligned, we use the entry_offset value directly sep #$20 - ldx :draw_count_x2 +; ldx :draw_count_x2 lda :entry_offset ldy :base_address - jsr SetCodeEntry + jsr (SetCodeEntry,x) ; All registers are preserved ; Now, patch in the opcode - ldx :draw_count_x2 +; ldx :draw_count_x2 lda :opcode - ldy :base_address ; Y-register is preserved, this can be removed - jsr SetCodeEntryOpcode + jsr (SetCodeEntryOpcode,x) ; All registers are preserved ; If this is an odd entry, also set the odd_entry low byte and save the operand high byte lda :odd_entry_offset beq :not_odd - ldx :draw_count_x2 - ldy :base_address ; Y-register is preserved, this can be removed - jsr SetOddCodeEntry +; NOTE: SetOddCodeEntry and SaveHighOperand can probably be combined to eliminate call/return overhead - ldx :draw_count_x2 - ldy :base_address ; Y-register is preserved, this can be removed - pei :exit_address - jmp :SaveHighOperand ; Only used once, so "inline" it +; ldx :draw_count_x2 + jsr (SetOddCodeEntry,x) ; All registers are preserved + +; ldx :draw_count_x2 + jmp (:SaveHighOperand,x) ; Only used once, so "inline" it :save_high_op_rtn :not_odd - rep #$20 + rep #$21 ; clear the carry ; Do the end of the loop -- update the virtual line counter and reduce the number ; of lines left to render - lda :virt_line ; advance to the virtual line after the segment we just - clc ; filled in - adc :draw_count - sta :virt_line - - lda :lines_left ; subtract the number of lines we just completed + lda :lines_left_x2 ; subtract the number of lines we just completed sec - sbc :draw_count - sta :lines_left + sbc :draw_count_x2 + sta :lines_left_x2 jne :loop + lda :stk_save + tcs plb rts @@ -387,45 +386,43 @@ _ApplyBG0XPos ; Y = starting line * $1000 ; A = code field location * $1000 :SaveHighOperand - jmp (:tbl,x) - -:tbl da :bottom + da :bottom da :do01,:do02,:do03,:do04 da :do05,:do06,:do07,:do08 da :do09,:do10,:do11,:do12 da :do13,:do14,:do15,:do16 -:do15 plx +:do15 ldx :exit_address ; accumulator is in 8-bit mode, so can't use TAX bra :x15 -:do14 plx +:do14 ldx :exit_address bra :x14 -:do13 plx +:do13 ldx :exit_address bra :x13 -:do12 plx +:do12 ldx :exit_address bra :x12 -:do11 plx +:do11 ldx :exit_address bra :x11 -:do10 plx +:do10 ldx :exit_address bra :x10 -:do09 plx +:do09 ldx :exit_address bra :x09 -:do08 plx +:do08 ldx :exit_address bra :x08 -:do07 plx +:do07 ldx :exit_address bra :x07 -:do06 plx +:do06 ldx :exit_address bra :x06 -:do05 plx +:do05 ldx :exit_address bra :x05 -:do04 plx +:do04 ldx :exit_address bra :x04 -:do03 plx +:do03 ldx :exit_address bra :x03 -:do02 plx +:do02 ldx :exit_address bra :x02 -:do01 plx +:do01 ldx :exit_address bra :x01 -:do16 plx +:do16 ldx :exit_address :x16 lda $F002,x sta OPCODE_HIGH_SAVE+$F000,y :x15 lda $E002,x @@ -469,9 +466,7 @@ _ApplyBG0XPos ; Y = starting line * $1000 ; A = code field location * $1000 SaveOpcode - jmp (:tbl,x) - -:tbl da :bottom + da :bottom da :do01,:do02,:do03,:do04 da :do05,:do06,:do07,:do08 da :do09,:do10,:do11,:do12 @@ -550,9 +545,7 @@ SaveOpcode ; Y = starting line * $1000 ; A = code field location * $1000 RestoreOpcode - jmp (:tbl,x) - -:tbl da :bottom + da :bottom da :do01,:do02,:do03,:do04 da :do05,:do06,:do07,:do08 da :do09,:do10,:do11,:do12 @@ -631,8 +624,7 @@ RestoreOpcode ; Y = starting line * $1000 ; A = address low byte SetCodeEntry - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-00,:bottom-03,:bottom-06,:bottom-09 da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-36,:bottom-39,:bottom-42,:bottom-45 @@ -663,8 +655,7 @@ SetCodeEntry ; Y = starting line * $1000 ; A = address low byte SetOddCodeEntry - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-00,:bottom-03,:bottom-06,:bottom-09 da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-36,:bottom-39,:bottom-42,:bottom-45 @@ -695,8 +686,7 @@ SetOddCodeEntry ; Y = starting line * $1000 ; A = opcode value SetCodeEntryOpcode - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-00,:bottom-03,:bottom-06,:bottom-09 da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-36,:bottom-39,:bottom-42,:bottom-45 diff --git a/src/blitter/TemplateUtils.s b/src/blitter/TemplateUtils.s index 1f11770..0dae3d5 100644 --- a/src/blitter/TemplateUtils.s +++ b/src/blitter/TemplateUtils.s @@ -87,9 +87,8 @@ Counter equ tmp3 ; A = value ; ; Set M to 0 or 1 -SetConst ; Need a blank line here, otherwise the :tbl local variable resolveds backwards - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 +SetConst + da :bottom-00,:bottom-03,:bottom-06,:bottom-09 da :bottom-12,:bottom-15,:bottom-18,:bottom-21 da :bottom-24,:bottom-27,:bottom-30,:bottom-33 da :bottom-36,:bottom-39,:bottom-42,:bottom-45 diff --git a/src/blitter/Vert.s b/src/blitter/Vert.s index 042cce3..eafe61f 100644 --- a/src/blitter/Vert.s +++ b/src/blitter/Vert.s @@ -7,29 +7,36 @@ ; lines in the correct order _ApplyBG0YPos -:rtbl_idx equ tmp0 -:virt_line equ tmp1 -:lines_left equ tmp2 -:draw_count equ tmp3 +:rtbl_idx_x2 equ tmp0 +:virt_line_x2 equ tmp1 +:lines_left_x2 equ tmp2 +:draw_count_x2 equ tmp3 +:stk_save equ tmp4 ; First task is to fill in the STK_ADDR values by copying them from the RTable array. We ; copy from RTable[i] into BlitField[StartY+i]. As with all of this code, the difficult part ; is decomposing the update across banks - stz :rtbl_idx ; Start copying from the first entry in the table + stz :rtbl_idx_x2 ; Start copying from the first entry in the table lda StartY ; This is the base line of the virtual screen jsr Mod208 sta StartYMod208 - sta :virt_line ; Keep track of it + asl + sta :virt_line_x2 ; Keep track of it + + phb ; Save the current bank + tsc ; we intentionally leak one byte of stack in each loop + sta :stk_save ; iteration, so save the stack to repair at the end ; copy a range of address from the table into the destination bank. If we restrict ourselves to ; rectangular playfields, this can be optimized to just subtracting a constant value. See the ; Templates::SetScreenAddrs subroutine. lda ScreenHeight - sta :lines_left + asl + sta :lines_left_x2 ; This is the verbose part -- figure out how many lines to draw. We don't want to artificially limit ; the height of the visible screen (for example, doing an animated wipe while scrolling), so the screen @@ -38,62 +45,47 @@ _ApplyBG0YPos ; For larger values, we want to break things up on 16-line boundaries based on the virt_line value. So, ; ; draw_count = min(lines_left, (16 - (virt_line % 16)) -; -; Note that almost everything in this loop can be done with 8-bit operations sincc the values are -; all under 200. The one exception is the virt_line value which could exceed 256. This will be -; a later optimization and might save around 10 cycles per iteration, or up to ~120 cycles per frame -; and ~2,500 per secord. This is ~1% of our total CPU budget and is *just* enough cycles to be -; interesting.... Another 8 cycles could be removed by doing all calculatinos pre-multiplied by 2 -; to avoid several 'asl' instructions - phb + :loop - lda :virt_line - asl - tax + ldx :virt_line_x2 ldal BTableLow,x ; Get the address of the first code field line tay - sep #$20 - ldal BTableHigh,x + ldal BTableHigh,x ; Target bank in low byte, current bank in high pha - plb ; This is the bank that will receive the updates - rep #$20 - lda :virt_line - and #$000F + txa + and #$001E eor #$FFFF - inc - clc - adc #16 - min :lines_left + sec + adc #32 + min :lines_left_x2 - sta :draw_count ; Do this many lines - asl + sta :draw_count_x2 ; Do this many lines tax - lda :rtbl_idx ; Read from this location in the RTable - asl + clc ; pre-advance virt_line_2 because we have the value + adc :virt_line_x2 + sta :virt_line_x2 - jsr CopyRTableToStkAddr + plb + jsr (:CopyRTableToStkAddr,x) ; X = rtbl_idx_x2 on return - lda :virt_line ; advance to the virtual line after the segment we just - clc ; filled in - adc :draw_count - sta :virt_line + txa ; carry flag is unchanged + adc :draw_count_x2 ; advance the index into the RTable + sta :rtbl_idx_x2 - lda :rtbl_idx ; advance the index into the RTable - adc :draw_count - sta :rtbl_idx - lda :lines_left ; subtract the number of lines we just completed + lda :lines_left_x2 ; subtract the number of lines we just completed sec - sbc :draw_count - sta :lines_left + sbc :draw_count_x2 + sta :lines_left_x2 jne :loop - plb -:out + lda :stk_save + tcs + plb rts ; Unrolled copy routine to move RTable intries into STK_ADDR position. @@ -101,44 +93,43 @@ _ApplyBG0YPos ; A = intect into the RTable array (x2) ; Y = starting line * $1000 ; X = number of lines (x2) -CopyRTableToStkAddr - jmp (:tbl,x) -:tbl da :none +:CopyRTableToStkAddr + da :none da :do01,:do02,:do03,:do04 da :do05,:do06,:do07,:do08 da :do09,:do10,:do11,:do12 da :do13,:do14,:do15,:do16 -:do15 tax +:do15 ldx :rtbl_idx_x2 bra :x15 -:do14 tax +:do14 ldx :rtbl_idx_x2 bra :x14 -:do13 tax +:do13 ldx :rtbl_idx_x2 bra :x13 -:do12 tax +:do12 ldx :rtbl_idx_x2 bra :x12 -:do11 tax +:do11 ldx :rtbl_idx_x2 bra :x11 -:do10 tax +:do10 ldx :rtbl_idx_x2 bra :x10 -:do09 tax +:do09 ldx :rtbl_idx_x2 bra :x09 -:do08 tax +:do08 ldx :rtbl_idx_x2 bra :x08 -:do07 tax +:do07 ldx :rtbl_idx_x2 bra :x07 -:do06 tax +:do06 ldx :rtbl_idx_x2 bra :x06 -:do05 tax +:do05 ldx :rtbl_idx_x2 bra :x05 -:do04 tax +:do04 ldx :rtbl_idx_x2 bra :x04 -:do03 tax +:do03 ldx :rtbl_idx_x2 bra :x03 -:do02 tax +:do02 ldx :rtbl_idx_x2 bra :x02 -:do01 tax +:do01 ldx :rtbl_idx_x2 bra :x01 -:do16 tax +:do16 ldx :rtbl_idx_x2 ldal RTable+30,x sta STK_ADDR+$F000,y :x15 ldal RTable+28,x