diff --git a/macros/APP.MACS.S b/macros/APP.MACS.S index e36196f..5c12199 100644 --- a/macros/APP.MACS.S +++ b/macros/APP.MACS.S @@ -30,6 +30,24 @@ _Div16 mac lsr <<< +_R0W0 mac ; Read Bank 0 / Write Bank 0 + ldal STATE_REG + and #$FFCF + stal STATE_REG + <<< + +_R0W1 mac ; Read Bank 0 / Write Bank 1 + ldal STATE_REG + ora #$0010 + stal STATE_REG + <<< + +_R1W1 mac ; Read Bank 0 / Write Bank 1 + ldal STATE_REG + ora #$0030 + stal STATE_REG + <<< + **************************************** * Basic Error Macro * **************************************** @@ -46,3 +64,9 @@ NoErr eom + + + + + + diff --git a/src/App.Init.s b/src/App.Init.s index 3ecbba3..a602a7a 100644 --- a/src/App.Init.s +++ b/src/App.Init.s @@ -16,38 +16,38 @@ mx %00 -MemInit PushLong #0 ; space for result - PushLong #$008000 ; size (32k) +MemInit PushLong #0 ; space for result + PushLong #$008000 ; size (32k) PushWord UserId - PushWord #%11000000_00010111 ; Fixed location + PushWord #%11000000_00010111 ; Fixed location PushLong #$002000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) _Deref stx Buff00 sta Buff00+2 - PushLong #0 ; space for result - PushLong #$008000 ; size (32k) + PushLong #0 ; space for result + PushLong #$008000 ; size (32k) PushWord UserId - PushWord #%11000000_00010111 ; Fixed location + PushWord #%11000000_00010111 ; Fixed location PushLong #$012000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) _Deref stx Buff01 sta Buff01+2 - PushLong #0 ; space for result - PushLong #$000A00 ; size (10 pages) + PushLong #0 ; space for result + PushLong #$000A00 ; size (10 pages) PushWord UserId - PushWord #%11000000_00010101 ; Page-aligned, fixed bank + PushWord #%11000000_00010101 ; Page-aligned, fixed bank PushLong #$000000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) _Deref stx ZeroPage sta ZeroPage+2 @@ -57,9 +57,28 @@ MemInit PushLong #0 ; space for result lup 13 jsr AllocOneBank2 sta BlitBuff+]step+2 - sta BlitBuffMid+]step+2 stz BlitBuff+]step - stz BlitBuffMid+]step +]step equ ]step+4 + --^ + + ldx #0 + ldy #0 + lda BlitBuff+2,y ; Copy the high word first +]step equ 0 + lup 16 + sta BTableHigh+]step+2,x ; 16 lines per bank + sta BTableHigh+]step+2+{208*2},x ; 16 lines per bank +]step equ ]step+4 + --^ + lda BlitBuff,y + sta BTableLow,x + sta BTableLow+{208*2},x + clc +]step equ 0 + lup 15 + adc #$1000 + sta BTableLow+]step,x + sta BTableLow+]step+{208*2},x ]step equ ]step+4 --^ @@ -69,14 +88,6 @@ Buff00 ds 4 Buff01 ds 4 ZeroPage ds 4 -; Array of addressed for the banks that hold the blitter. This is actually a double-length -; array, which is a pattern that is used a lot in GTE. Whenever we have a situation where -; we need to wrap around an array, we can to this be doubling the array length and using an -; unrolled loop that starts in the middle instead of doing some kind of "mod N" or loop -; splitting. -BlitBuff ds 4*13 -BlitBuffMid ds 4*13 - ; Bank allocator (for one full, fixed bank of memory. Can be immediately deferenced) AllocOneBank PushLong #0 @@ -84,12 +95,12 @@ AllocOneBank PushLong #0 PushWord UserId PushWord #%11000000_00011100 PushLong #0 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) - xba ; swap accumulator bytes to XX00 - sta :bank+2 ; store as bank for next op (overwrite $XX00) -:bank ldal $000001,X ; recover the bank address in A=XX/00 + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + xba ; swap accumulator bytes to XX00 + sta :bank+2 ; store as bank for next op (overwrite $XX00) +:bank ldal $000001,X ; recover the bank address in A=XX/00 rts ; Variation that return pointer in the X/A registers (X = low, A = high) @@ -99,8 +110,8 @@ AllocOneBank2 PushLong #0 PushWord #%11000000_00011100 PushLong #0 _NewHandle - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) _Deref rts @@ -136,6 +147,18 @@ ShutDown rts + + + + + + + + + + + + diff --git a/src/App.Main.s b/src/App.Main.s index cfc5f44..546272d 100644 --- a/src/App.Main.s +++ b/src/App.Main.s @@ -1,8 +1,16 @@ -; Test program for graphics stufff... +; Test driver to exercise graphics routines. ; -; Allow dynamic resizing to benchmark against different games +; The general organization of the code is +; +; 1. The blitter/ folder contains all of the low-level graphics primitives +; 2. The blitter/DirectPage.s file defines all of the DP locations +; 3. Subroutines are written to try and be stateless, but, if local +; storage is needed, it is takes from the stack and uses stack-relative +; addressing. - rel +; Allow dynamic resizing to benchmark against different games + REL + DSK MAINSEG use Util.Macs.s use Locator.Macs.s @@ -17,7 +25,7 @@ SHADOW_REG equ $E0C035 STATE_REG equ $E0C068 NEW_VIDEO_REG equ $E0C029 -BORDER_REG equ $E0C034 ; 0-3 = border 4-7 Text color +BORDER_REG equ $E0C034 ; 0-3 = border, 4-7 Text color VBL_VERT_REG equ $E0C02E VBL_HORZ_REG equ $E0C02F @@ -28,6 +36,9 @@ VBL_STATE_REG equ $E0C019 SHR_SCREEN equ $E12000 SHR_SCB equ $E19D00 +; External references +tiledata ext + ; Typical init phk @@ -46,7 +57,10 @@ SHR_SCB equ $E19D00 _MTStartUp -; Install interrupt handlers +; Install interrupt handlers. We use the VBL interrupt to keep animations +; moving at a consistent rate, regarless of the rendered frame rate. The +; one-second timer is generally just used for counters and as a handy +; frames-per-second trigger. PushLong #0 pea $0015 ; Get the existing 1-second interrupt handler and save @@ -72,9 +86,6 @@ SHR_SCB equ $E19D00 ldx #6 ; Gameboy Advance size jsr SetScreenMode - lda #0 ; Set the virtual Y-position - jsr SetYPos - ; Load a picture and copy it into Bank $E1. Then turn on the screen. jsr AllocOneBank ; Alloc 64KB for Load/Unpack @@ -105,7 +116,7 @@ EvtLoop bne :5 jsr DoHUP -:5 cmp #'1' +:5 cmp #'1' ; User selects a new screen size bcc :6 cmp #'9'+1 bcs :6 @@ -116,6 +127,26 @@ EvtLoop :6 bra EvtLoop +; Exit code +Exit + pea $0007 ; disable 1-second interrupts + _IntSource + + PushLong #VBLTASK ; Remove our heartbeat task + _DelHeartBeat + + pea $0015 + PushLong OldOneSecVec ; Reset the interrupt vector + _SetVector + + PushWord UserId ; Deallocate all of our memory + _DisposeAll + + _QuitGS qtRec + + bcs Fatal +Fatal brk $00 + ; Allow the user to dynamically select one of the pre-configured screen sizes ; ; 1. Full Screen : 40 x 25 320 x 200 (32,000 bytes (100.0%)) @@ -129,6 +160,7 @@ EvtLoop ; 9. Game Boy Color : 20 x 18 160 x 144 (11,520 bytes ( 36.0%)) ; ; X=mode number + ]ScreenModeWidth dw 320,272,256,256,280,256,240,288,160 ]ScreenModeHeight dw 200,192,200,176,160,160,160,128,144 @@ -194,45 +226,51 @@ DoHUP DoFrame ; Render some tiles -:bank equ 0 -:column equ 2 -:tile equ 4 +:bank equ 1 +:column equ 3 +:tile equ 5 + + + pea $0000 ; Allocate local variable space + pea $0000 + pea $0000 - stz :bank - stz :tile :bankloop - ldx :bank + lda :bank,s + tax ldal BlitBuff+1,x ; set the data bank to the code field pha plb plb - stz :column + lda #0 + sta :column,s :tileloop - ldx :column + lda :column,s + tax ldal Col2CodeOffset,x tay iny - lda :tile + lda :tile,s jsr CopyTile - lda :tile + lda :tile,s inc and #$000F - sta :tile + sta :tile,s - lda :column + lda :column,s clc adc #4 - sta :column + sta :column,s cmp #4*40 bcc :tileloop - lda :bank + lda :bank,s clc adc #4 - sta :bank + sta :bank,s cmp #4*13 bcc :bankloop @@ -251,7 +289,18 @@ DoFrame pha ; push twice because we will use it later rep #$20 - ldx #80*2 ; This is the word to exit from +; Set the Y-Position within the virtual buffer + + lda #0 ; Set the virtual Y-position + jsr SetYPos + +; Just load the screen width here. This is not semantically right; we actually are taking the nummber +; of tiles in the width of the playfield, multiplying by two to get the number of words and then +; multiplying by two again to get an index offset. It just happens that TILES * 4 = BYTES. +; +; TODO: Once we start scrolling, this will be ScreenWidth + BG0_X + + ldx ScreenWidth ; This is the word to exit from ldy Col2CodeOffset,x ; Get the offset sep #$20 ; 8-bit acc @@ -265,15 +314,17 @@ DoFrame lda #OPCODE_SAVE jsr SaveOpcode - ldx #80*2 ; X-register is overwritten by SaveOpcode + ldx ScreenWidth ; X-register is overwritten by SaveOpcode ldal CodeFieldEvenBRA,x ; Get the value to place there ldx #16*2 jsr SetConst -; lda #{$2000+159+15*160} ; Set the stack address to the right edge of the screen -; ldy #0 -; ldx #16*2 -; jsr SetScreenAddrs + +; Fill in the screen address of each line. This routine must be called whenever the +; lda #{$2000+159+15*160} ; Set the stack address to the right edge of the screen +; ldy #0 +; ldx #16*2 +; jsr SetScreenAddrs sep #$20 ; only need to do an 8-bit store lda #$06 ; This is the entry address to start drawing @@ -285,26 +336,11 @@ DoFrame ldy #$7000 ; Set the return after line 200 (Bank 13, line 8) jsr SetReturn - sei ; disable interrupts + jsr BltDispatch ; Execute the blit - ldal STATE_REG - ora #$0010 ; Read Bank 0 / Write Bank 1 - stal STATE_REG - - tsc ; save the stack pointer - stal stk_save+1 - -blt_entry jml $000006 ; Jump into the blitter code $XX/YY06 - -blt_return ldal STATE_REG ; Read Bank 0 / Write Bank 0 - and #$FFCF - stal STATE_REG -stk_save lda #0000 ; load the stack - tcs - cli ; re-enable interrupts plb ; set the bank back to the code field - ldx #80*2 ; This is the word to exit from + ldx ScreenWidth ; This is the word to exit from ldal Col2CodeOffset,x ; Get the offset tay ldx #16*2 @@ -313,6 +349,10 @@ stk_save lda #0000 ; load the stack phk ; restore data bank plb + + pla ; restore the stack + pla + pla rts DoLoadPic @@ -332,27 +372,6 @@ DoLoadPic bpl :copySHR rts -Exit - pea $0007 ; disable 1-second interrupts - _IntSource - - PushLong #VBLTASK ; Remove our heartbeat task - _DelHeartBeat - - pea $0015 - PushLong OldOneSecVec ; Reset the interrupt vector - _SetVector - - PushWord UserId ; Deallocate all of our memory - _DisposeAll - - _QuitGS qtRec - - bcs Fatal -Fatal brk $00 - -Hello str '000000' ; str adds leading length byte - **************************************** * Fatal Error Handler * **************************************** @@ -621,20 +640,12 @@ qtRec adrl $0000 put App.Init.s put App.Msg.s put font.s - put blitter/Template.s + put blitter/Blitter.s + put blitter/PEISlammer.s put blitter/Tables.s - - - - - - - - - - - - + put blitter/Template.s + put blitter/Tiles.s + put blitter/Vert.s diff --git a/src/App.Msg.s b/src/App.Msg.s index 047a9ec..9f364a9 100644 --- a/src/App.Msg.s +++ b/src/App.Msg.s @@ -42,8 +42,6 @@ Addr2ToString xba ; A=Value ; X=Screen offset -WordBuff dfb 4 - ds 4 DrawWord phx ; Save register value ldy #WordBuff+1 jsr WordToString @@ -53,27 +51,31 @@ DrawWord phx ; Save register value jsr DrawString rts -; Rendout out the bank addresses of all the blitter fields -:count = tmp0 -:ptr = tmp1 -:addr = tmp3 -DumpBanks stz :addr - lda #13 - sta :count - lda #BlitBuff - sta :ptr - lda #^BlitBuff - sta :ptr+2 +; Render out the bank addresses of all the blitter fields +DumpBanks + +:addr = 1 +:count = 3 +:ptr = 5 + + pea #^BlitBuff ; pointer to address table + pea #BlitBuff + pea #13 ; count = 13 + pea $0000 ; addr = 0 + + tsc + phd ; save the direct page + tcd ; set the direct page :loop lda [:ptr] tax ldy #2 lda [:ptr],y - ldy #Hello+1 + ldy #Addr3Buff+1 jsr Addr3ToString - lda #Hello + lda #Addr3Buff ldx :addr ldy #$7777 jsr DrawString @@ -83,17 +85,36 @@ DumpBanks stz :addr adc #160*8 sta :addr - inc :ptr - inc :ptr - inc :ptr - inc :ptr + lda #4 + adc :ptr + sta :ptr dec :count - lda :count bne :loop + pld ; restore the direct page + tcs ; restore the stack pointer + clc + adc #8 + tsc rts +WordBuff str '0000' +Addr3Buff str '000000' ; str adds leading length byte + + + + + + + + + + + + + + diff --git a/src/App.Tile.s b/src/App.Tile.s new file mode 100644 index 0000000..a26c143 --- /dev/null +++ b/src/App.Tile.s @@ -0,0 +1,8 @@ + REL + DSK TILESEG +tiledata ENT + ds 65536 + + + + diff --git a/src/App.s b/src/App.s index 64be8bb..3b6f10d 100644 --- a/src/App.s +++ b/src/App.s @@ -1,13 +1,38 @@ ; IIgs Game Engine -; - DSK GTETestApp + TYP $B3 ; S16 file + DSK GTETestApp XPL ; Segment #1 -- Main execution block ASM App.Main.s - SNA Main +; SNA Main + +; Segment #2 -- 64KB Tile Memory + + ASM App.Tile.s + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Render.s b/src/Render.s new file mode 100644 index 0000000..e15bb2a --- /dev/null +++ b/src/Render.s @@ -0,0 +1,57 @@ +; Renders a frame of animation +; +; The rendering engine is built around the idea of compositing all of the moving components +; on to the Bank 01 graphics buffer and then revealing everything in a single, vertical pass. +; +; If there was just a scrolling screen with no sprites, the screen would just get rendered +; in a single pass, but it gets more complicated with sprites and various effects. +; +; Here is the high-level pipeline: +; +; 1. Identify row ranges with effects. These effects can be sprites or user-defined overlays +; 2. Turn shadowing off +; 3. Render the background for each effect row range (in any order) +; 4. Render the sprites (in any order) +; 5. Turn shadowing on +; 6. Render the background for each non-effect row, a pei slam for sprite rows, and +; the user-defined overlays (in sorted order) +; +; As a concrete example, consider: +; +; Rows 0 - 9 have a user-defined floating overlay for a score board +; Rows 10 - 100 are background only +; Rows 101 - 120 have one or more sprites +; Rows 121 - 140 are background only +; Rows 141 - 159 have a user-defined solid overlay for an animated platform +; +; A floating overlay means that some background data bay show through. A solid overlay means that +; the user-defined data covers the entire scan line. +; +; The renderer would proceed as: +; +; - shadow off +; - render_background(0, 10) +; - render_background(101, 121) +; - render_sprites() +; - shadow_on +; - render_user_overlay_1() +; - render_background(10, 101) +; - pei_slam(101, 121) +; - render_background(121, 141) +; - render_user_overlay_2() +; +; Generally speaking, a PEI Slam is faster that trying to do any sort of dirty-rectangle update by +; tracking sprinte bounding boxes. But, if an application would benefit from skipping some background +; drawing on sprite rows, that can be handled by using the low level routines to control the left/right +; edges of the rendered play field. + + +Render + + jsr ShadowOff + + jsr ShadowOn + rts + + + diff --git a/src/blitter/Blitter.s b/src/blitter/Blitter.s new file mode 100644 index 0000000..d0f3fd7 --- /dev/null +++ b/src/blitter/Blitter.s @@ -0,0 +1,122 @@ +; This is the method that is most useful from the high-level code. We want the +; freedom to blit a range of lines. This subroutine can assume that all of the +; data in the code fields is set up properly. +; +; X = first line (inclusive), valid range of 0 to 199 +; Y = last line (inclusive), valid range >X up to 199 +; +; The lines are based on the appearance of lines in the play field, so blitting lines 0 through +; 19 will draw the first 20 lines on the play field, regardless of where the playfield is physically +; on the SHR screen or the current value of StartY +BltRange + clc` + + tya ; Get the address of the line that we want to return from + adc StartY ; and create a pointer to it + asl + tay + lda BTableLow,y + sta exit_ptr + lda BTableHigh,y + sta exit_ptr+2 + + txa ; get the first line (0 - 199) + adc StartY ; add in the virtual offset (0, 207) -- max value of 406 + asl + tax ; this is the offset into the blitter table + + sep #$20 ; 8-bit Acc + lda BTableHigh,x ; patch in the bank + sta blt_entry+3 + + lda BTableLow+1,x ; patch in the page + sta blt_entry+2 + +; The way we patch the exit code is subtle, but very fast. The CODE_EXIT offset points to +; an JMP/JML instruction that transitions to the next line after all of the code has been +; executed. Since every code field line is bank-aligned, we know that the low-byte of the +; operand is always $00. +; +; The trick we use is to patch the low byte to force the code to jump to a special return +; function (jml blt_return) in the *next* code field line. When it's time to restore the +; code, we can unconditionally store a $00 value to set things back to normal. +; +; This is the ideal situation -- patch/restore in a single 8-bit lda #imm / sta instruction +; pair with no need to preserve the data + + ldy #CODE_EXIT+1 ; this is a JMP or JML instruction that points to the next line. + lda #FULL_RETURN ; this is the offset of the return code + sta [exit_ptr],y ; patch out the low byte of the JMP/JML + rep #$20 + +; Now we need to set up the Bank, Stack Pointer and Direct Page registers for calling into +; the code field + + pei BG1DataBank-1 ; Set the data bank for BG1 data + plb + plb + + phd ; Save the application direct page + lda BlitterDP ; Set the direct page to the blitter data + tcd + + sei ; disable interrupts + _R0W1 + tsc ; save the stack pointer + stal stk_save+1 + +blt_entry jml $000000 ; Jump into the blitter code $XX/YYZZ + +blt_return _R0W0 +stk_save lda #0000 ; load the stack + tcs + cli ; re-enable interrupts + pld ; restore the direct page + + sep #$20 + ldy #CODE_EXIT+1 + lda #00 + sta [exit_ptr],y + rep #$20 + + rts + +; This subroutine is used to set up the BltDispatch code based on the current state of +; the machine and/or the state of the engine. The tasks it performs are +; +; 1. Set the blt_entry low byte based on the graphics engine configuration +BltSetup + sep #$20 ; Only need 8-bits for this + lda EngineMode + bit #$01 ; Are both background layers enabled? + beq :oneLyr + lda #entry_2-base + bra :twoLyr +:oneLyr lda #entry_3-base +:twoLyr sta blt_entry+1 ; set the low byte of the JML + rep #$20 + rts + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/blitter/DirectPage.s b/src/blitter/DirectPage.s index 6a6f300..d636032 100644 --- a/src/blitter/DirectPage.s +++ b/src/blitter/DirectPage.s @@ -9,6 +9,12 @@ ScreenTileHeight equ 12 ; Height of the playfield in 8x8 blocks ScreenTileWidth equ 14 ; Width of the playfield in 8x8 blocks StartY equ 16 ; Which code buffer line displays first on screen. Range = 0 to 207 +EngineMode equ 18 ; Defined the mode/capabilities that are enabled + ; bit 0: 0 = Single Background, 1 = Parallax +DirtyBits equ 20 ; Identify values that have changed between frames + +BG1DataBank equ 22 ; Data bank that holds BG1 layer data +BlitterDP equ 23 ; Direct page address the holder blitter data bstk equ 224 ; 16-byte stack to push bank addresses @@ -21,6 +27,9 @@ tmp5 equ 250 tmp6 equ 252 tmp7 equ 254 +DIRTY_BIT_BG0_X equ $0001 +DIRTY_BIT_BG0_Y equ $0002 + diff --git a/src/blitter/Tables.s b/src/blitter/Tables.s index 4760377..6b88a8a 100644 --- a/src/blitter/Tables.s +++ b/src/blitter/Tables.s @@ -1,17 +1,17 @@ ; Collection of data tables ; -; Tile2CodeOffset +; Col2CodeOffset ; -; Takes a tile number (0 - 40) and returns the offset into the blitter code +; Takes a column number (0 - 81) and returns the offset into the blitter code ; template. ; ; This is used for rendering tile data into the code field. For example, is we assume that -; we are filling in the operans for a bunch of PEA values, we could do this +; we are filling in the operands for a bunch of PEA values, we could do this ; -; ldy tileNumber*2 +; ldy tileColumn*2 ; lda #DATA -; ldx Tile2CodeOffset,y +; ldx Col2CodeOffset,y ; sta $0001,x ; ; This table is necessary, because due to the data being draw via stack instructions, the @@ -207,3 +207,17 @@ ScreenAddr lup 200 ; playfield is less than 200 lines tall, then any values after 2 * PLAYFIELD_HEIGHT are undefine. RTable ds 400 +; Array of addresses for the banks that hold the blitter. +BlitBuff ds 4*13 + +; The blitter table (BTable) is a double-length table that holds the full 4-byte address of each +; line of the blit fields. We decompose arrays of pointers into separate high and low words so +; that everything can use the same indexing offsets +BTableHigh ds 208*2*2 +BTableLow ds 208*2*2 + + + + + + diff --git a/src/blitter/Template.s b/src/blitter/Template.s index 2735ebf..717bcec 100644 --- a/src/blitter/Template.s +++ b/src/blitter/Template.s @@ -1,43 +1,44 @@ ; Template and equates for GTE blitter - mx %00 + mx %00 -DP_ADDR equ entry_1-base+1 -BG1_ADDR equ entry_2-base+1 -STK_ADDR equ entry_3-base+1 +DP_ADDR equ entry_1-base+1 ; offset to patch in the direct page for dynamic tiles +BG1_ADDR equ entry_2-base+1 ; offset to patch in the Y-reg for BG1 (dp),y addressing +STK_ADDR equ entry_3-base+1 ; offset to patch in the stack (SHR) right edge address -CODE_ENTRY equ entry_jmp-base+1 ; low byte of the page-aligned jump address -CODE_TOP equ loop-base -CODE_LEN equ top-base -CODE_EXIT equ even_exit-base -OPCODE_SAVE equ odd_exit-base+1 ; spot to save the code field opcode when patching exit BRA +CODE_ENTRY equ entry_jmp-base+1 ; low byte of the page-aligned jump address +CODE_TOP equ loop-base +CODE_LEN equ top-base +CODE_EXIT equ even_exit-base +OPCODE_SAVE equ odd_exit-base+1 ; spot to save the code field opcode when patching exit BRA +FULL_RETURN equ full_return-base ; offset that returns from the blitter -LINES_PER_BANK equ 16 +LINES_PER_BANK equ 16 ; Locations that need the page offset added -PagePatches da {long_0-base+2} - da {long_1-base+2} - da {long_2-base+2} - da {long_3-base+2} - da {long_4-base+2} - da {long_5-base+2} - da {long_6-base+2} - da {odd_entry-base+2} - da {loop_exit_1-base+2} - da {loop_exit_2-base+2} - da {loop_back-base+2} - da {loop_exit_3-base+2} - da {even_exit-base+2} -PagePatchNum equ *-PagePatches +PagePatches da {long_0-base+2} + da {long_1-base+2} + da {long_2-base+2} + da {long_3-base+2} + da {long_4-base+2} + da {long_5-base+2} + da {long_6-base+2} + da {odd_entry-base+2} + da {loop_exit_1-base+2} + da {loop_exit_2-base+2} + da {loop_back-base+2} + da {loop_exit_3-base+2} + da {even_exit-base+2} +PagePatchNum equ *-PagePatches -BankPatches da {long_0-base+3} - da {long_1-base+3} - da {long_2-base+3} - da {long_3-base+3} - da {long_4-base+3} - da {long_5-base+3} - da {long_6-base+3} -BankPatchNum equ *-BankPatches +BankPatches da {long_0-base+3} + da {long_1-base+3} + da {long_2-base+3} + da {long_3-base+3} + da {long_4-base+3} + da {long_5-base+3} + da {long_6-base+3} +BankPatchNum equ *-BankPatches ; Set the physical location of the virtual screen on the physical screen. The ; screen size must by a multiple of 8 @@ -55,95 +56,88 @@ BankPatchNum equ *-BankPatches ; usually only be executed once during app initialization. It doesn't get called ; with any significant frequency. -SetScreenRect sty ScreenHeight ; Save the screen height and width - stx ScreenWidth +SetScreenRect sty ScreenHeight ; Save the screen height and width + stx ScreenWidth - tax ; Temp save of the accumulator - and #$00FF - sta ScreenY0 - clc - adc ScreenHeight - sta ScreenY1 + tax ; Temp save of the accumulator + and #$00FF + sta ScreenY0 + clc + adc ScreenHeight + sta ScreenY1 - txa ; Restore the accumulator - xba - and #$00FF - sta ScreenX0 - clc - adc ScreenWidth - sta ScreenX1 + txa ; Restore the accumulator + xba + and #$00FF + sta ScreenX0 + clc + adc ScreenWidth + sta ScreenX1 - lda ScreenHeight ; Divide the height in scanlines by 8 to get the number tiles - lsr - lsr - lsr - sta ScreenTileHeight + lda ScreenHeight ; Divide the height in scanlines by 8 to get the number tiles + lsr + lsr + lsr + sta ScreenTileHeight - lda ScreenWidth ; Divide width in bytes by 4 to get the number of tiles - lsr - lsr - sta ScreenTileWidth + lda ScreenWidth ; Divide width in bytes by 4 to get the number of tiles + lsr + lsr + sta ScreenTileWidth - lda ScreenY0 ; Calculate the address of the first byte - asl ; of the right side of the playfield - tax - lda ScreenAddr,x - clc - adc ScreenX1 - dec - pha ; Save for second loop + lda ScreenY0 ; Calculate the address of the first byte + asl ; of the right side of the playfield + tax + lda ScreenAddr,x ; This is the address for the left edge of the physical screen + clc + adc ScreenX1 + dec + pha ; Save for second loop - ldx #0 - ldy ScreenHeight - clc -:loop1 sta RTable,x - adc #160 - inx - inx - dey - bne :loop1 - - pla ; Reset the address and continue filling in the - ldy ScreenHeight ; second half of the table -:loop2 sta RTable,x - adc #160 - inx - inx - dey - bne :loop2 - - rts + ldx #0 + ldy ScreenHeight + jsr :loop + pla ; Reset the address and continue filling in the + ldy ScreenHeight ; second half of the table +:loop clc + sta RTable,x + adc #160 + inx + inx + dey + bne :loop + rts ; Clear the SHR screen and then infill the defined field -FillScreen lda #0 - jsr ClearToColor +FillScreen lda #0 + jsr ClearToColor - ldy ScreenY0 + ldy ScreenY0 ]yloop - tya - asl a - tax - lda ScreenAddr,x - clc - adc ScreenX0 - tax - phy + tya + asl a + tax + lda ScreenAddr,x + clc + adc ScreenX0 + tax + phy - lda ScreenWidth - lsr - tay - lda #$FFFF -]xloop stal $E10000,x - inx - inx - dey - bne ]xloop + lda ScreenWidth + lsr + tay + lda #$FFFF +]xloop stal $E10000,x + inx + inx + dey + bne ]xloop - ply - iny - cpy ScreenY1 - bcc ]yloop - rts + ply + iny + cpy ScreenY1 + bcc ]yloop + rts ; Set the starting line of the virtual buffer that will be displayed on the first physical line ; of the playfield. @@ -153,6 +147,9 @@ FillScreen lda #0 ; There are a few things that need to happen with the Y-position of the virtual buffer is changed: ; ; 1. The address of the stack in the code fields needs to be changed +; 2. The entry point into the code field needs to be set +; 3. The (old) return code needs to be removed +; 4. The new return code needs to be inserted after the last line ; ; If there is a second background, then the Y-register value in the code field needs to ; change as well, but that is deferred until later because we don't want to duplicate work @@ -192,129 +189,137 @@ FillScreen lda #0 ; do_action(curr_bank, 0, line_count) ; } -start_mod_16 equ tmp0 -lines_left equ tmp1 -tblptr equ tmp2 -stksave equ tmp7 +; Helper function to return the address of a specific blitter code field line +; +; Input: A = line number [0, 207] +; Output: A = low word, X = high word +GetBlitLineAddress + pha ; save the value -SetYPos sta StartY ; Save the position + and #$FFF0 ; Divide by 16 to get the bank number of this line and + lsr ; then multiply by 4 to get the offset. So just divide by 4. + lsr + tax + lda BlitBuff+2,x ; This is the high word of the bank address + tax -; First action is to calculate the number of code banks that we will be updating and push all of the -; bank bytes onto the stack in order so that we can use a single 'plb' instruction to set the target -; for updating the screen address of each blitter line. + pla ; Pop the value and multiply the lower 4 bits by 4096 to get + and #$000F ; the line offset within the bank + xba + asl + asl + asl + asl ; This is the page of the line + rts - lsr ; divide by 4. This is really StartY / 16 but we - lsr ; need to multiple by 4 to index into the array of - and #$003C ; code bank addresses. - tay -; Quick stack save because we re-point the stack into some direct page space to aboid having to -; mix 8 and 16 bit modes for bank anipulation +lines_left ds 2 +start_mod_16 ds 2 +tblptr ds 2 +stksave ds 2 +SetYPos sta StartY ; Save the position - tsc - sta stksave + lda ScreenHeight + sta lines_left - lda ScreenHeight - sta lines_left + lda StartY ; Now figure out exactly how many banks we cross by + and #$000F ; calculating ((StartY % 16) + ScreenHeight) / 16 + sta start_mod_16 + clc + adc ScreenHeight + and #$00F0 ; Just keep the relevant nibble + lsr + lsr + lsr + tax ; Keep the value pre-multiplied by 2 - lda StartY ; Now figure out exactly how many banks we cross by - and #$000F ; calculating ((StartY % 16) + ScreenHeight) / 16 - sta start_mod_16 - clc - adc ScreenHeight - and #$00F0 ; Just keep the relevant nibble - lsr - lsr - lsr - tax ; Keep the value pre-multiplied by 2 + ldy #0 - ldy #0 - - jsr PushBanks ; Push the bank bytes on the stack - brl :out + jsr PushBanks ; Push the bank bytes on the stack + brl :out ; Start of the main body of the function. We need to get a pointer to the correct offset of ; the RTable to copy screen addresses into the code fields - lda ScreenY0 - asl - clc - adc #RTable - sta tblptr + lda ScreenY0 + asl + clc + adc #RTable + sta tblptr ; Check to see where we start. If we are aligned with a code bank, then skip to the -; fast inner loop. Otherwise to one iteration to get things lined up +; fast inner loop. Otherwise do one iteration to get things lined up -:prologue lda start_mod_16 - beq :body +:prologue lda start_mod_16 + beq :body - _Mul4096 ; Save the offset into the code bank of the - tay ; first line. + _Mul4096 ; Save the offset into the code bank of the + tay ; first line. - lda #16 ; Now figure out how many lines to execute. Usually - sec ; this will just be the lines to the end of the code - sbc start_mod_16 ; bank, but if the total screen height is smaller than - cmp ScreenHeight ; the number of lines in the code bank, we need to clamp - bcc :min_1 ; the maximum value - lda ScreenHeight -:min_1 sta tmp4 ; save for updating the counters + lda #16 ; Now figure out how many lines to execute. Usually + sec ; this will just be the lines to the end of the code + sbc start_mod_16 ; bank, but if the total screen height is smaller than + cmp ScreenHeight ; the number of lines in the code bank, we need to clamp + bcc :min_1 ; the maximum value + lda ScreenHeight +:min_1 sta tmp4 ; save for updating the counters - asl - tax ; do this many lines - lda tblptr ; starting at this address + asl + tax ; do this many lines + lda tblptr ; starting at this address - plb ; Set the code field bank - jsr CopyFromArray2 ; Copy the right screen edge addresses + plb ; Set the code field bank + jsr CopyFromArray2 ; Copy the right screen edge addresses - lda lines_left - sec - sbc tmp4 - sta lines_left + lda lines_left + sec + sbc tmp4 + sta lines_left - lda tblptr - clc - adc tmp4 - adc tmp4 - sta tblptr + lda tblptr + clc + adc tmp4 + adc tmp4 + sta tblptr ; While the number of lines left to render is 16 or greater, loop -:body lda lines_left - cmp #16 - bcc :epilogue +:body lda lines_left + cmp #16 + bcc :epilogue - ldy #0 - ldx tblptr -:body0 plb ; Set the code field bank - jsr CopyFromArray2Top ; to bypass the need to set the X register + ldy #0 + ldx tblptr +:body0 plb ; Set the code field bank + jsr CopyFromArray2Top ; to bypass the need to set the X register - txa - clc - adc #32 - tax + txa + clc + adc #32 + tax - lda lines_left - sec - sbc #16 - sta lines_left + lda lines_left + sec + sbc #16 + sta lines_left - cmp #16 ; Repeat the test here to we can skip some - bcs :body0 ; redundant setup and spill the X register - stx tblptr ; back into tblptr when done + cmp #16 ; Repeat the test here to we can skip some + bcs :body0 ; redundant setup and spill the X register + stx tblptr ; back into tblptr when done -:epilogue lda lines_left - beq :out +:epilogue lda lines_left + beq :out - asl ; Y is still zero - tax - lda tblptr - plb ; Set the code field bank - jsr CopyFromArray2 ; to bypass the need to set the X register + asl ; Y is still zero + tax + lda tblptr + plb ; Set the code field bank + jsr CopyFromArray2 ; to bypass the need to set the X register -:out lda stksave ; put the stack back - tcs - phk ; Need to restore the current bank - plb - rts +:out lda stksave ; put the stack back + tcs + phk ; Need to restore the current bank + plb + rts ; Special subroutine to divide the accumulator by 208 and return remainder in the Accumulator ; @@ -324,101 +329,64 @@ SetYPos sta StartY ; Save the position ; https://www.drdobbs.com/parallel/optimizing-integer-division-by-a-constan/184408499 ; https://embeddedgurus.com/stack-overflow/2009/06/division-of-integers-by-constants/ -Mod208 cmp #%1101000000000000 - bcc *+5 - sbc #%1101000000000000 +Mod208 cmp #%1101000000000000 + bcc *+5 + sbc #%1101000000000000 - cmp #%0110100000000000 - bcc *+5 - sbc #%0110100000000000 + cmp #%0110100000000000 + bcc *+5 + sbc #%0110100000000000 - cmp #%0011010000000000 - bcc *+5 - sbc #%0011010000000000 + cmp #%0011010000000000 + bcc *+5 + sbc #%0011010000000000 - cmp #%0001101000000000 - bcc *+5 - sbc #%0001101000000000 + cmp #%0001101000000000 + bcc *+5 + sbc #%0001101000000000 - cmp #%0000110100000000 - bcc *+5 - sbc #%0000110100000000 + cmp #%0000110100000000 + bcc *+5 + sbc #%0000110100000000 - cmp #%0000011010000000 - bcc *+5 - sbc #%0000011010000000 + cmp #%0000011010000000 + bcc *+5 + sbc #%0000011010000000 - cmp #%0000001101000000 - bcc *+5 - sbc #%0000001101000000 + cmp #%0000001101000000 + bcc *+5 + sbc #%0000001101000000 - cmp #%0000000110100000 - bcc *+5 - sbc #%0000000110100000 + cmp #%0000000110100000 + bcc *+5 + sbc #%0000000110100000 - cmp #%0000000011010000 - bcc *+5 - sbc #%0000000011010000 - rts + cmp #%0000000011010000 + bcc *+5 + sbc #%0000000011010000 + rts ; BankYSetup ; ; This is the set of function that have to be done to set up all of the code banks ; for execution when the Y-Origin of the virtual screen changes. The tasks are: -; -; - -; Copy tile data into code field. Their are specialized copy routines -; -; CopyTileConst -- the first 16 tile numbers are reserved and can be used -; to draw a solid tile block -CopyTile cmp #$0010 - bcs :invalid - asl - tax - ldal TilePatterns,x - bra CopyTileConst -:invalid rts - -TilePatterns dw $0000,$1111,$2222,$3333 - dw $4444,$5555,$6666,$7777 - dw $8888,$9999,$AAAA,$BBBB - dw $CCCC,$DDDD,$EEEE,$FFFF - -CopyTileConst sta: $0000,y - sta: $0003,y - sta $1000,y - sta $1003,y - sta $2000,y - sta $2003,y - sta $3000,y - sta $3003,y - sta $4000,y - sta $4003,y - sta $5000,y - sta $5003,y - sta $6000,y - sta $6003,y - sta $7000,y - sta $7003,y - rts ; Patch out the final JMP to jump to the long JML return code ; ; Y = starting line * $1000 -SetReturn lda #$0280 ; BRA *+4 - sta CODE_EXIT,y - rts +SetReturn lda #$0280 ; BRA *+4 + sta CODE_EXIT,y + rts -ResetReturn lda #$004C ; JMP $XX00 - sta CODE_EXIT,y - rts +ResetReturn lda #$004C ; JMP $XX00 + sta CODE_EXIT,y + rts ; Fill in the even_exit JMP instruction to jump to the next line (all but last line) -SetNextLine lda #$F000+{entry_3-base} - ldy #CODE_EXIT+1 - ldx #15*2 - jmp SetAbsAddrs +SetNextLine lda #$F000+{entry_3-base} + ldy #CODE_EXIT+1 + ldx #15*2 + jmp SetAbsAddrs ; Copy a series of bank bytes onto the direct page, which we will later point the stack ; at, and are use to iterate among the different code banks. @@ -426,42 +394,42 @@ SetNextLine lda #$F000+{entry_3-base} ; Y = starting index * 4 ; X = number of bank -PushBanks sep #$20 - jmp (:tbl,x) -:tbl da :bottom-05,:bottom-10,:bottom-15,:bottom-20 - da :bottom-25,:bottom-30,:bottom-35,:bottom-40 - da :bottom-45,:bottom-50,:bottom-55,:bottom-60 - da :bottom-65 -:top lda: BlitBuff+48,y ; These are all 8-bit loads and stores - sta bstk+13 - lda: BlitBuff+44,y - sta bstk+12 - lda: BlitBuff+42,y - sta bstk+11 - lda: BlitBuff+38,y - sta bstk+10 - lda: BlitBuff+34,y - sta bstk+9 - lda: BlitBuff+30,y - sta bstk+8 - lda: BlitBuff+26,y - sta bstk+7 - lda: BlitBuff+22,y - sta bstk+6 - lda: BlitBuff+18,y - sta bstk+5 - lda: BlitBuff+14,y - sta bstk+4 - lda: BlitBuff+10,y - sta bstk+3 - lda: BlitBuff+6,y - sta bstk+2 - lda: BlitBuff+2,y - sta bstk+1 - lda: BlitBuff,y - sta bstk -:bottom rep #$20 - rts +PushBanks sep #$20 + jmp (:tbl,x) +:tbl da :bottom-05,:bottom-10,:bottom-15,:bottom-20 + da :bottom-25,:bottom-30,:bottom-35,:bottom-40 + da :bottom-45,:bottom-50,:bottom-55,:bottom-60 + da :bottom-65 +:top lda: BlitBuff+48,y ; These are all 8-bit loads and stores + sta bstk+13 + lda: BlitBuff+44,y + sta bstk+12 + lda: BlitBuff+42,y + sta bstk+11 + lda: BlitBuff+38,y + sta bstk+10 + lda: BlitBuff+34,y + sta bstk+9 + lda: BlitBuff+30,y + sta bstk+8 + lda: BlitBuff+26,y + sta bstk+7 + lda: BlitBuff+22,y + sta bstk+6 + lda: BlitBuff+18,y + sta bstk+5 + lda: BlitBuff+14,y + sta bstk+4 + lda: BlitBuff+10,y + sta bstk+3 + lda: BlitBuff+6,y + sta bstk+2 + lda: BlitBuff+2,y + sta bstk+1 + lda: BlitBuff,y + sta bstk +:bottom rep #$20 + rts ; Patch an 8-bit or 16-bit valueS into the bank. These are a set up unrolled loops to ; quickly patch in a constanct value, or a value from an array into a given set of @@ -478,30 +446,30 @@ PushBanks sep #$20 ; A = value ; ; Set M to 0 or 1 -SetConst ; Need a blnk line here, otherwise the :tbl local variable resolveds backwards - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 - da :bottom-12,:bottom-15,:bottom-18,:bottom-21 - da :bottom-24,:bottom-27,:bottom-30,:bottom-33 - da :bottom-36,:bottom-39,:bottom-42,:bottom-45 - da :bottom-48 -:top sta $F000,y - sta $E000,y - sta $D000,y - sta $C000,y - sta $B000,y - sta $A000,y - sta $9000,y - sta $8000,y - sta $7000,y - sta $6000,y - sta $5000,y - sta $4000,y - sta $3000,y - sta $2000,y - sta $1000,y - sta: $0000,y -:bottom rts +SetConst ; Need a blnk line here, otherwise the :tbl local variable resolveds backwards + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-12,:bottom-15,:bottom-18,:bottom-21 + da :bottom-24,:bottom-27,:bottom-30,:bottom-33 + da :bottom-36,:bottom-39,:bottom-42,:bottom-45 + da :bottom-48 +:top sta $F000,y + sta $E000,y + sta $D000,y + sta $C000,y + sta $B000,y + sta $A000,y + sta $9000,y + sta $8000,y + sta $7000,y + sta $6000,y + sta $5000,y + sta $4000,y + sta $3000,y + sta $2000,y + sta $1000,y + sta: $0000,y +:bottom rts ; SaveOpcode ; @@ -511,51 +479,51 @@ SetConst ; Need a blnk line here, o ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = store location * $1000 -SaveOpcode pha ; save the accumulator - ldal :tbl,x - dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump +SaveOpcode pha ; save the accumulator + ldal :tbl,x + dec + plx ; put the accumulator into X + pha ; push the address into the stack + rts ; and jump -:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 - da :bottom-24,:bottom-30,:bottom-36,:bottom-42 - da :bottom-48,:bottom-54,:bottom-60,:bottom-66 - da :bottom-72,:bottom-78,:bottom-84,:bottom-90 - da :bottom-96 -:top lda $F000,y - sta $F000,x - lda $E000,y - sta $E000,x - lda $D000,y - sta $D000,x - lda $C000,y - sta $C000,x - lda $B000,y - sta $B000,x - lda $A000,y - sta $A000,x - lda $9000,y - sta $9000,x - lda $8000,y - sta $8000,x - lda $7000,y - sta $7000,x - lda $6000,y - sta $6000,x - lda $5000,y - sta $5000,x - lda $4000,y - sta $4000,x - lda $3000,y - sta $3000,x - lda $2000,y - sta $2000,x - lda $1000,y - sta $1000,x - lda: $0000,y - sta: $0000,x -:bottom rts +:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 + da :bottom-24,:bottom-30,:bottom-36,:bottom-42 + da :bottom-48,:bottom-54,:bottom-60,:bottom-66 + da :bottom-72,:bottom-78,:bottom-84,:bottom-90 + da :bottom-96 +:top lda $F000,y + sta $F000,x + lda $E000,y + sta $E000,x + lda $D000,y + sta $D000,x + lda $C000,y + sta $C000,x + lda $B000,y + sta $B000,x + lda $A000,y + sta $A000,x + lda $9000,y + sta $9000,x + lda $8000,y + sta $8000,x + lda $7000,y + sta $7000,x + lda $6000,y + sta $6000,x + lda $5000,y + sta $5000,x + lda $4000,y + sta $4000,x + lda $3000,y + sta $3000,x + lda $2000,y + sta $2000,x + lda $1000,y + sta $1000,x + lda: $0000,y + sta: $0000,x +:bottom rts ; RestoreOpcode ; @@ -565,52 +533,52 @@ SaveOpcode pha ; save the accumulator ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = store location * $1000 -RestoreOpcode pha ; save the accumulator - ldal :tbl,x - dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump +RestoreOpcode pha ; save the accumulator + ldal :tbl,x + dec + plx ; put the accumulator into X + pha ; push the address into the stack + rts ; and jump -:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 - da :bottom-24,:bottom-30,:bottom-36,:bottom-42 - da :bottom-48,:bottom-54,:bottom-60,:bottom-66 - da :bottom-72,:bottom-78,:bottom-84,:bottom-90 - da :bottom-96 +:tbl da :bottom-00,:bottom-06,:bottom-12,:bottom-18 + da :bottom-24,:bottom-30,:bottom-36,:bottom-42 + da :bottom-48,:bottom-54,:bottom-60,:bottom-66 + da :bottom-72,:bottom-78,:bottom-84,:bottom-90 + da :bottom-96 -:top lda $F000,x - sta $F000,y - lda $E000,x - sta $E000,y - lda $D000,x - sta $D000,y - lda $C000,x - sta $C000,y - lda $B000,x - sta $B000,y - lda $A000,x - sta $A000,y - lda $9000,x - sta $9000,y - lda $8000,x - sta $8000,y - lda $7000,x - sta $7000,y - lda $6000,x - sta $6000,y - lda $5000,x - sta $5000,y - lda $4000,x - sta $4000,y - lda $3000,x - sta $3000,y - lda $2000,x - sta $2000,y - lda $1000,x - sta $1000,y - lda: $0000,x - sta: $0000,y -:bottom rts +:top lda $F000,x + sta $F000,y + lda $E000,x + sta $E000,y + lda $D000,x + sta $D000,y + lda $C000,x + sta $C000,y + lda $B000,x + sta $B000,y + lda $A000,x + sta $A000,y + lda $9000,x + sta $9000,y + lda $8000,x + sta $8000,y + lda $7000,x + sta $7000,y + lda $6000,x + sta $6000,y + lda $5000,x + sta $5000,y + lda $4000,x + sta $4000,y + lda $3000,x + sta $3000,y + lda $2000,x + sta $2000,y + lda $1000,x + sta $1000,y + lda: $0000,x + sta: $0000,y +:bottom rts ; CopyFromArray ; @@ -619,52 +587,52 @@ RestoreOpcode pha ; save the accumulator ; X = number of lines * 2, 0 to 32 ; Y = starting line * $1000 ; A = array address -CopyFromArray2 pha ; save the accumulator - ldal :tbl,x - dec - plx ; put the accumulator into X - pha ; push the address into the stack - rts ; and jump +CopyFromArray2 pha ; save the accumulator + ldal :tbl,x + dec + plx ; put the accumulator into X + pha ; push the address into the stack + rts ; and jump -:tbl da bottomCFA2-00,bottomCFA2-06,bottomCFA2-12,bottomCFA2-18 - da bottomCFA2-24,bottomCFA2-30,bottomCFA2-36,bottomCFA2-42 - da bottomCFA2-48,bottomCFA2-54,bottomCFA2-60,bottomCFA2-66 - da bottomCFA2-72,bottomCFA2-78,bottomCFA2-84,bottomCFA2-90 - da bottomCFA2-96 +:tbl da bottomCFA2-00,bottomCFA2-06,bottomCFA2-12,bottomCFA2-18 + da bottomCFA2-24,bottomCFA2-30,bottomCFA2-36,bottomCFA2-42 + da bottomCFA2-48,bottomCFA2-54,bottomCFA2-60,bottomCFA2-66 + da bottomCFA2-72,bottomCFA2-78,bottomCFA2-84,bottomCFA2-90 + da bottomCFA2-96 -CopyFromArray2Top lda: $001E,x - sta $F000,y - lda: $001C,x - sta $E000,y - lda: $001A,x - sta $D000,y - lda: $0018,x - sta $C000,y - lda: $0016,x - sta $B000,y - lda: $0014,x - sta $A000,y - lda: $0012,x - sta $9000,y - lda: $0010,x - sta $8000,y - lda: $000E,x - sta $7000,y - lda: $000C,x - sta $6000,y - lda: $000A,x - sta $5000,y - lda: $0008,x - sta $4000,y - lda: $0006,x - sta $3000,y - lda: $0004,x - sta $2000,y - lda: $0002,x - sta $1000,y - lda: $0000,x - sta: $0000,y -bottomCFA2 rts +CopyFromArray2Top lda: $001E,x + sta $F000,y + lda: $001C,x + sta $E000,y + lda: $001A,x + sta $D000,y + lda: $0018,x + sta $C000,y + lda: $0016,x + sta $B000,y + lda: $0014,x + sta $A000,y + lda: $0012,x + sta $9000,y + lda: $0010,x + sta $8000,y + lda: $000E,x + sta $7000,y + lda: $000C,x + sta $6000,y + lda: $000A,x + sta $5000,y + lda: $0008,x + sta $4000,y + lda: $0006,x + sta $3000,y + lda: $0004,x + sta $2000,y + lda: $0002,x + sta $1000,y + lda: $0000,x + sta: $0000,y +bottomCFA2 rts ; SetScreenAddrs ; @@ -673,46 +641,46 @@ bottomCFA2 rts ; X = number of lines ; ; Automatically decrements address by 160 bytes each line -SetScreenAddrs sec - jmp (:tbl,x) -:tbl da bottomSSA-00,bottomSSA-03,bottomSSA-09,bottomSSA-15 - da bottomSSA-21,bottomSSA-27,bottomSSA-33,bottomSSA-39 - da bottomSSA-45,bottomSSA-51,bottomSSA-57,bottomSSA-63 - da bottomSSA-69,bottomSSA-75,bottomSSA-81,bottomSSA-87 - da bottomSSA-93 +SetScreenAddrs sec + jmp (:tbl,x) +:tbl da bottomSSA-00,bottomSSA-03,bottomSSA-09,bottomSSA-15 + da bottomSSA-21,bottomSSA-27,bottomSSA-33,bottomSSA-39 + da bottomSSA-45,bottomSSA-51,bottomSSA-57,bottomSSA-63 + da bottomSSA-69,bottomSSA-75,bottomSSA-81,bottomSSA-87 + da bottomSSA-93 -SetScreenAddrsTop sta STK_ADDR+$F000,y - sbc #160 - sta STK_ADDR+$E000,y - sbc #160 - sta STK_ADDR+$D000,y - sbc #160 - sta STK_ADDR+$C000,y - sbc #160 - sta STK_ADDR+$B000,y - sbc #160 - sta STK_ADDR+$A000,y - sbc #160 - sta STK_ADDR+$9000,y - sbc #160 - sta STK_ADDR+$8000,y - sbc #160 - sta STK_ADDR+$7000,y - sbc #160 - sta STK_ADDR+$6000,y - sbc #160 - sta STK_ADDR+$5000,y - sbc #160 - sta STK_ADDR+$4000,y - sbc #160 - sta STK_ADDR+$3000,y - sbc #160 - sta STK_ADDR+$2000,y - sbc #160 - sta STK_ADDR+$1000,y - sbc #160 - sta: STK_ADDR+$0000,y -bottomSSA rts +SetScreenAddrsTop sta STK_ADDR+$F000,y + sbc #160 + sta STK_ADDR+$E000,y + sbc #160 + sta STK_ADDR+$D000,y + sbc #160 + sta STK_ADDR+$C000,y + sbc #160 + sta STK_ADDR+$B000,y + sbc #160 + sta STK_ADDR+$A000,y + sbc #160 + sta STK_ADDR+$9000,y + sbc #160 + sta STK_ADDR+$8000,y + sbc #160 + sta STK_ADDR+$7000,y + sbc #160 + sta STK_ADDR+$6000,y + sbc #160 + sta STK_ADDR+$5000,y + sbc #160 + sta STK_ADDR+$4000,y + sbc #160 + sta STK_ADDR+$3000,y + sbc #160 + sta STK_ADDR+$2000,y + sbc #160 + sta STK_ADDR+$1000,y + sbc #160 + sta: STK_ADDR+$0000,y +bottomSSA rts ; SetAbsAddrs ; @@ -721,269 +689,276 @@ bottomSSA rts ; X = number of lines ; ; Stores a value and decrements by $1000 for each line -SetAbsAddrs sec - jmp (:tbl,x) -:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15 - da :bottom-21,:bottom-27,:bottom-33,:bottom-39 - da :bottom-45,:bottom-51,:bottom-57,:bottom-63 - da :bottom-69,:bottom-75,:bottom-81,:bottom-87 - da :bottom-93 -:top sta $F000,y - sbc #$1000 - sta $E000,y - sbc #$1000 - sta $D000,y - sbc #$1000 - sta $C000,y - sbc #$1000 - sta $B000,y - sbc #$1000 - sta $A000,y - sbc #$1000 - sta $9000,y - sbc #$1000 - sta $8000,y - sbc #$1000 - sta $7000,y - sbc #$1000 - sta $6000,y - sbc #$1000 - sta $5000,y - sbc #$1000 - sta $4000,y - sbc #$1000 - sta $3000,y - sbc #$1000 - sta $2000,y - sbc #$1000 - sta $1000,y - sbc #$1000 - sta: $0000,y -:bottom rts +SetAbsAddrs sec + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15 + da :bottom-21,:bottom-27,:bottom-33,:bottom-39 + da :bottom-45,:bottom-51,:bottom-57,:bottom-63 + da :bottom-69,:bottom-75,:bottom-81,:bottom-87 + da :bottom-93 +:top sta $F000,y + sbc #$1000 + sta $E000,y + sbc #$1000 + sta $D000,y + sbc #$1000 + sta $C000,y + sbc #$1000 + sta $B000,y + sbc #$1000 + sta $A000,y + sbc #$1000 + sta $9000,y + sbc #$1000 + sta $8000,y + sbc #$1000 + sta $7000,y + sbc #$1000 + sta $6000,y + sbc #$1000 + sta $5000,y + sbc #$1000 + sta $4000,y + sbc #$1000 + sta $3000,y + sbc #$1000 + sta $2000,y + sbc #$1000 + sta $1000,y + sbc #$1000 + sta: $0000,y +:bottom rts ; Full up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need ; a total of 13 banks to hold the 208 lines to full-screen support ; ; A = high word of bank table ; Y = index * 4 of the bank to initialize -bankArray equ tmp0 -target equ tmp2 -nextBank equ tmp4 +bankArray equ tmp0 +target equ tmp2 +nextBank equ tmp4 BuildBank - stx bankArray - sta bankArray+2 + stx bankArray + sta bankArray+2 - stz target - iny - iny - lda [bankArray],y - sta target+2 + stz target + iny + iny + lda [bankArray],y + sta target+2 - iny ; move to the next item - iny - iny ; middle byte - cpy #4*13 ; if greater than the array length, wrap back to zero - bcc :ok - ldy #1 -:ok lda [bankArray],y ; Get the middle and high bytes of the address - sta nextBank + iny ; move to the next item + iny + iny ; middle byte + cpy #4*13 ; if greater than the array length, wrap back to zero + bcc :ok + ldy #1 +:ok lda [bankArray],y ; Get the middle and high bytes of the address + sta nextBank :next - jsr BuildLine2 - lda target - clc - adc #$1000 - sta target - bcc :next + jsr BuildLine2 + lda target + clc + adc #$1000 + sta target + bcc :next - phb - pei target+1 - plb - plb + phb + pei target+1 + plb + plb - lda #$F000+{entry_3-base} ; Set the address from each line to the next - ldy #CODE_EXIT+1 - ldx #15*2 - jsr SetAbsAddrs + lda #$F000+{entry_3-base} ; Set the address from each line to the next + ldy #CODE_EXIT+1 + ldx #15*2 + jsr SetAbsAddrs - ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank - lda #{$005C+{entry_3-base}*256} - sta [target],y - ldy #$F000+CODE_EXIT+2 - lda nextBank - sta [target],y + ldy #$F000+CODE_EXIT ; Patch the last line with a JML to go to the next bank + lda #{$005C+{entry_3-base}*256} + sta [target],y + ldy #$F000+CODE_EXIT+2 + lda nextBank + sta [target],y - plb - rts + plb + rts -; this is a relocation subroutine, it is responsible for copying the template to a +; This is the relocation subroutine, it is responsible for copying the template to a ; memory location and patching up the necessary instructions. ; ; X = low word of address (must be a multiple of $1000) ; A = high word of address (bank) BuildLine - stx target - sta target+2 + stx target + sta target+2 BuildLine2 - lda #CODE_LEN ; round up to an even number of bytes - inc - and #$FFFE - beq :nocopy - dec - dec - tay -:loop lda base,y - sta [target],y + lda #CODE_LEN ; round up to an even number of bytes + inc + and #$FFFE + beq :nocopy + dec + dec + tay +:loop lda base,y + sta [target],y - dey - dey - bpl :loop + dey + dey + bpl :loop -:nocopy lda #0 ; copy is complete, now patch up the addresses - sep #$20 +:nocopy lda #0 ; copy is complete, now patch up the addresses + sep #$20 - ldx #0 - lda target+2 ; patch in the bank for the absolute long addressing mode -:dobank ldy BankPatches,x - sta [target],y - inx - inx - cpx #BankPatchNum - bcc :dobank + ldx #0 + lda target+2 ; patch in the bank for the absolute long addressing mode +:dobank ldy BankPatches,x + sta [target],y + inx + inx + cpx #BankPatchNum + bcc :dobank - ldx #0 -:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each - lda [target],y - clc - adc target+1 - sta [target],y - inx - inx - cpx #PagePatchNum - bcc :dopage + ldx #0 +:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each + lda [target],y + clc + adc target+1 + sta [target],y + inx + inx + cpx #PagePatchNum + bcc :dopage :out - rep #$20 - rts + rep #$20 + rts -; start of the template code +; Start of the template code. This code is replicated 16 times per bank and spans +; 13 banks for a total of 208 lines, which is what is required to render 26 tiles +; to cover the full screen vertical scrolling. +; +; The 'base' location is always assumed to be on a 4kb ($1000) boundary base -entry_1 ldx #0000 -entry_2 ldy #0000 -entry_3 lda #0000 - tcs +entry_1 ldx #0000 ; Used for LDA 00,x addressing +entry_2 ldy #0000 ; Used for LDA (00),y addressing +entry_3 lda #0000 ; Sets screen address (right edge) + tcs long_0 -entry_jmp jmp $0100 - dfb $00 ; if the screen is odd-aligned, then the opcode is set to -; ; $AF to convert to a LDA long instruction. This puts the -; ; first two bytes of the instruction field in the accumulator -; ; and falls through to the next instruction. -; -; ; We structure the line so that the entry point only needs to -; ; update the low-byte of the address, the means it takes only -; ; an amortized 4-cycles per line to set the entry pointbra +entry_jmp jmp $0100 + dfb $00 ; if the screen is odd-aligned, then the opcode is set to + ; $AF to convert to a LDA long instruction. This puts the + ; first two bytes of the instruction field in the accumulator + ; and falls through to the next instruction. -right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction - beq r_is_pea ; This costs 6 cycles in the fast-path + ; We structure the line so that the entry point only needs to + ; update the low-byte of the address, the means it takes only + ; an amortized 4-cycles per line to set the entry pointbra - bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants - bne r_is_jmp +right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction + beq r_is_pea ; This costs 6 cycles in the fast-path -long_1 stal *+4-base - dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path -; ; gets a 1-cycle penalty, but we save 3 cycles here. + bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants + bne r_is_jmp -r_is_pea xba ; fast code for PEA - sep #$30 - pha - rep #$30 -odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the -; ; code field. This is OK, even if the entry point was the -; ; last instruction, because there is a JMP at the end of -; ; the code field, so the code will simply jump to that -; ; instruction directly. -; ; -; ; As with the original entry point, because all of the -; ; code field is page-aligned, only the low byte needs to -; ; be updated when the scroll position changes +long_1 stal *+4-base + dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path + ; gets a 1-cycle penalty, but we save 3 cycles here. -r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte -long_2 ldal entry_jmp+1-base -long_3 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) +r_is_pea xba ; fast code for PEA + sep #$30 + pha + rep #$30 +odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the + ; code field. This is OK, even if the entry point was the + ; last instruction, because there is a JMP at the end of + ; the code field, so the code will simply jump to that + ; instruction directly. + ; + ; As with the original entry point, because all of the + ; code field is page-aligned, only the low byte needs to + ; be updated when the scroll position changes + +r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte +long_2 ldal entry_jmp+1-base +long_3 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) + +; Special exit code that is less than 256 bytes from the start of the template +full_return jml blt_return ; Full exit ; This is the spot that needs to be page-aligned. In addition to simplifying the entry address ; and only needing to update a byte instad of a word, because the code breaks out of the ; code field with a BRA instruction, we keep everything within a page to avoid the 1-cycle ; page-crossing penalty of the branch. - ds 204 -loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is -loop_exit_2 jmp even_exit-base ; +3 odd-aligned -loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes - pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap - --^ -loop_back jmp loop-base ; +252 Ensure execution continues to loop around -loop_exit_3 jmp even_exit-base ; +255 + ds 200 +loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is +loop_exit_2 jmp even_exit-base ; +3 odd-aligned -odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field -; ; that are replaced by the needed BRA instruction to exit the code field. When the -; ; left edge is odd-aligned, we are able to immediately load the value and perform -; ; similar logic to the right_odd code path above +loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes + pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap + --^ +loop_back jmp loop-base ; +252 Ensure execution continues to loop around +loop_exit_3 jmp even_exit-base ; +255 -left_odd bit #$000B - beq l_is_pea +odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field + ; that are replaced by the needed BRA instruction to exit the code field. When the + ; left edge is odd-aligned, we are able to immediately load the value and perform + ; similar logic to the right_odd code path above - bit #$0040 - bne l_is_jmp +left_odd bit #$000B + beq l_is_pea -long_4 stal *+4-base - dfb $00,$00 -l_is_pea xba - sep #$30 - pha - rep #$30 - bra even_exit -l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte -long_5 ldal entry_jmp+1-base -long_6 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) + bit #$0040 + bne l_is_jmp + +long_4 stal *+4-base + dfb $00,$00 +l_is_pea xba + sep #$30 + pha + rep #$30 + bra even_exit +l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte +long_5 ldal entry_jmp+1-base +long_6 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) ; JMP opcode = $4C, JML opcode = $5C -even_exit jmp $1000 ; Jump to the next line. - ds 1 ; space so that the last line in a bank can be patched into a JML -full_return jml blt_return ; Full exit +even_exit jmp $1000 ; Jump to the next line. + ds 1 ; space so that the last line in a bank can be patched into a JML ; Special epilogue: skip a number of bytes and jump back into the code field. This is useful for ; large, floating panels in the attract mode of a game, or to overlay solid -; dialog. +; dialog while still animating the play field -epilogue_1 tsc - sec - sbc #0 - tcs - jmp $0000 ; This jumps back into the code field -:out jmp $0000 ; This jumps to the next epilogue chain element - ds 1 +epilogue_1 tsc + sec + sbc #0 + tcs + jmp $0000 ; This jumps back into the code field +:out jmp $0000 ; This jumps to the next epilogue chain element + ds 1 ; Special epilogue: re-enable interrupts. Used every 8 or 16 lines to allow music to continue playing -epilogue_2 ldal stk_save ; restore the stack - tcs - sep #$20 ; 8-bit mode - ldal STATE_REG ; Read Bank 0 / Write Bank 0 - and #$CF - stal STATE_REG - cli - nop ; Give a couple of cycles - sei - ldal STATE_REG - ora #$10 ; Read Bank 0 / Write Bank 1 - stal STATE_REG - rep #$20 - jmp $0000 - ds 1 +epilogue_2 ldal stk_save ; restore the stack + tcs + sep #$20 ; 8-bit mode + ldal STATE_REG ; Read Bank 0 / Write Bank 0 + and #$CF + stal STATE_REG + cli + nop ; Give a couple of cycles + sei + ldal STATE_REG + ora #$10 ; Read Bank 0 / Write Bank 1 + stal STATE_REG + rep #$20 + jmp $0000 + ds 1 ; These are the special code snippets -- there is a 1:1 relationship between each snippet space ; and a 3-byte entry in the code field. Thus, each snippet has a hard-coded JMP to return to @@ -1037,3 +1012,8 @@ top + + + + + diff --git a/src/blitter/Tiles.s b/src/blitter/Tiles.s new file mode 100644 index 0000000..2364486 --- /dev/null +++ b/src/blitter/Tiles.s @@ -0,0 +1,94 @@ +; Collection of functions that deal with tiles. Primarily rendering tile data into +; the code fields. +; +; Tile data can be done faily often, so these routines are performance-sensitive. +; +; CopyTileConst -- the first 16 tile numbers are reserved and can be used +; to draw a solid tile block +; CopyTileLinear -- copies the tile data from the tile bank in linear order, e.g. +; 32 consecutive bytes are copied + + +; CopyTile +; +; Copy a solid tile into one of the code banks +; +; B = bank of the code field +; A = Tile ID (0 - 1023) +; Y = Base Adddress in the code field + +CopyTile cmp #$0010 + bcc :FillWord + cmp #$0400 + bcc :CopyTileMem + rts ; Tile number is too large + +:TilePatterns dw $0000,$1111,$2222,$3333 + dw $4444,$5555,$6666,$7777 + dw $8888,$9999,$AAAA,$BBBB + dw $CCCC,$DDDD,$EEEE,$FFFF + +:FillWord asl + tax + ldal :TilePatterns,x + +CopyTileConst sta: $0000,y + sta: $0003,y + sta $1000,y + sta $1003,y + sta $2000,y + sta $2003,y + sta $3000,y + sta $3003,y + sta $4000,y + sta $4003,y + sta $5000,y + sta $5003,y + sta $6000,y + sta $6003,y + sta $7000,y + sta $7003,y + rts + +:CopyTileMem asl + asl + asl + asl + asl + tax + +CopyTileLinear ldal tiledata+0,x + sta: $0000,y + ldal tiledata+2,x + sta: $0003,y + ldal tiledata+4,x + sta $1000,y + ldal tiledata+6,x + sta $1003,y + ldal tiledata+8,x + sta $2000,y + ldal tiledata+10,x + sta $2003,y + ldal tiledata+12,x + sta $3000,y + ldal tiledata+14,x + sta $3003,y + ldal tiledata+16,x + sta $4000,y + ldal tiledata+18,x + sta $4003,y + ldal tiledata+20,x + sta $5000,y + ldal tiledata+22,x + sta $5003,y + ldal tiledata+24,x + sta $6000,y + ldal tiledata+26,x + sta $6003,y + ldal tiledata+28,x + sta $7000,y + ldal tiledata+30,x + sta $7003,y + rts + + diff --git a/src/blitter/Vert.s b/src/blitter/Vert.s new file mode 100644 index 0000000..d54c773 --- /dev/null +++ b/src/blitter/Vert.s @@ -0,0 +1,25 @@ +; Subroutines that deal with the vertical scrolling and rendering. The primary function +; of these routines are to adjust tables and patch in new values into the code field +; when the virtual Y-position of the play field changes. + + +; SetBG0YPos +; +; Set the virtual position of the primary background layer. In addition to +; updating the direct page state locations, this routine needs to +SetBG0YPos + cmp StartY + beq :nochange + sta StartY ; Save the position + lda #DIRTY_BIT_BG0_Y ; Mark that it has changed + tsb DirtyBits +:nochange + rts + +; Based on the current value of StartY in the direct page. Set up the dispatch +; information so that the BltDispatch driver will render the correct code field +; lines in the the correct order +_ApplyBG0YPos + + +