diff --git a/test/App.Main.s b/test/App.Main.s index 9c0b34d..6833676 100644 --- a/test/App.Main.s +++ b/test/App.Main.s @@ -12,8 +12,9 @@ mx %00 SHADOW_REG equ $E0C035 +STATE_REG equ $E0C068 NEW_VIDEO_REG equ $E0C029 -BORDER_REG equ $E0C034 ; 0-3 = border 4-7 Text color +BORDER_REG equ $E0C034 ; 0-3 = border 4-7 Text color VBL_VERT_REG equ $E0C02E VBL_HORZ_REG equ $E0C02F @@ -21,6 +22,9 @@ KBD_REG equ $E0C000 KBD_STROBE_REG equ $E0C010 VBL_STATE_REG equ $E0C019 +SHR_SCREEN equ $E12000 +SHR_SCB equ $E19D00 + ; Typical init phk @@ -28,48 +32,46 @@ VBL_STATE_REG equ $E0C019 ; Tool startup - _TLStartUp ; normal tool initialization + _TLStartUp ; normal tool initialization pha _MMStartUp - _Err ; should never happen + _Err ; should never happen pla - sta MasterId ; our master handle references the memory allocated to us - ora #$0100 ; set auxID = $01 (valid values $01-0f) - sta UserId ; any memory we request must use our own id + sta MasterId ; our master handle references the memory allocated to us + ora #$0100 ; set auxID = $01 (valid values $01-0f) + sta UserId ; any memory we request must use our own id _MTStartUp ; Install interrupt handlers PushLong #0 - pea $0015 ; Get the existing 1-second interrupt handler and save + pea $0015 ; Get the existing 1-second interrupt handler and save _GetVector PullLong OldOneSecVec - pea $0015 ; Set the new handler and enable interrupts + pea $0015 ; Set the new handler and enable interrupts PushLong #OneSecHandler _SetVector pea $0006 _IntSource - PushLong #VBLTASK ; Also register a Heart Beat Task + PushLong #VBLTASK ; Also register a Heart Beat Task _SetHeartBeat ; Start up the graphics engine... jsr MemInit + jsr GrafInit - lda BlitBuff+2 ; Fill in this bank + lda BlitBuff+2 ; Fill in this bank jsr BuildBank ; Load a picture and copy it into Bank $E1. Then turn on the screen. - jsr AllocOneBank ; Alloc 64KB for Load/Unpack - sta BankLoad ; Store "Bank Pointer" - - jsr GrafOn - + jsr AllocOneBank ; Alloc 64KB for Load/Unpack + sta BankLoad ; Store "Bank Pointer" EvtLoop jsr WaitForKey cmp #'q' @@ -79,8 +81,72 @@ EvtLoop bne :2 brl DoLoadPic :2 cmp #'m' - beq DoMessage - bra EvtLoop + bne :3 + brl DoMessage +:3 cmp #'f' ; render a 'f'rame + bne :4 + brl DoFrame +:4 bra EvtLoop + +; Set up the code field and render it +DoFrame + +; This sets up the environment for calling the blitter. The blitter code takes care of moving from +; line to line and should be set up ahead of time with appropriate epilougs for lines to periodically +; enable interrupts and other stuff. In short, we call into the code once and, when it returns, all of +; the lines set up to render will be finished. + + tsc ; save the stack pointer + sta stk_save+1 ; save a cycle by storing while bank is set + + ldx #80*2 ; This is the word to exit from + ldy Tile2CodeOffset,x ; Get the offset + + lda BlitBuff+1 ; set the data bank to the code field + sta blt_entry+2 ; Patch into the long jump + pha + plb + plb + + ldal CodeFieldEvenBRA,x ; Get the value to place there + ldx #16*2 + jsr SetConst + + jsr SetNextLine ; Link the lines together + + lda #{$2000+159+15*160} ; Set the stack address to the right edge of the screen + ldy #0 + ldx #16*2 + jsr SetScreenAddrs + + sep #$20 ; only need to do an 8-bit store + lda #$06 ; This is the entry address to start drawing + ldy #CODE_ENTRY ; don't actually need to set these again + ldx #16*2 + jsr SetConst + rep #$30 + + ldy #$F000 + jsr SetReturn + + sei ; disable interrupts + + ldal STATE_REG + ora #$0010 ; Read Bank 0 / Write Bank 1 + stal STATE_REG + +blt_entry jml $000006 ; Jump into the blitter code $XX/YY06 + +blt_return ldal STATE_REG ; Read Bank 0 / Write Bank 0 + and #$FFCF + stal STATE_REG +stk_save lda #0000 ; load the stack + tcs + cli ; re-enable interrupts + + phk ; restore data bank + plb + jmp EvtLoop HexToChar dfb '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' DoMessage @@ -144,33 +210,33 @@ DoMessage DoLoadPic lda BankLoad - ldx #ImageName ; Load+Unpack Boot Picture - jsr LoadPicture ; X=Name, A=Bank to use for loading + ldx #ImageName ; Load+Unpack Boot Picture + jsr LoadPicture ; X=Name, A=Bank to use for loading - lda BankLoad ; get address of loaded/uncompressed picture + lda BankLoad ; get address of loaded/uncompressed picture clc - adc #$0080 ; skip header? - sta :copySHR+2 ; and store that over the 'ldal' address below - ldx #$7FFE ; copy all image data -:copySHR ldal $000000,x ; load from BankLoad we allocated - stal $E12000,x ; store to SHR screen + adc #$0080 ; skip header? + sta :copySHR+2 ; and store that over the 'ldal' address below + ldx #$7FFE ; copy all image data +:copySHR ldal $000000,x ; load from BankLoad we allocated + stal $E12000,x ; store to SHR screen dex dex bpl :copySHR jmp EvtLoop Exit - pea $0007 ; disable 1-second interrupts + pea $0007 ; disable 1-second interrupts _IntSource - PushLong #VBLTASK ; Remove our heartbeat task + PushLong #VBLTASK ; Remove our heartbeat task _DelHeartBeat pea $0015 - PushLong OldOneSecVec ; Reset the interrupt vector + PushLong OldOneSecVec ; Reset the interrupt vector _SetVector - PushWord UserId ; Deallocate all of our memory + PushWord UserId ; Deallocate all of our memory _DisposeAll _QuitGS qtRec @@ -208,7 +274,7 @@ OneSecHandler mx %11 sep #$20 ldal $E0C032 - and #%10111111 ;clear IRQ source + and #%10111111 ;clear IRQ source stal $E0C032 pla @@ -226,12 +292,10 @@ VBLTASK hex 00000000 ; Graphic screen initialization -GrafInit ldx #$7FFE - lda #0000 -:loop stal $E12000,x - dex - dex - bne :loop +GrafInit lda #$8888 + jsr ClearToColor + jsr GrafOn + jsr ShadowOn rts ; Return the current border color ($0 - $F) in the accumulator @@ -243,14 +307,30 @@ GetBorderColor lda #0000 rts ; Set the border color to the accumulator value. -SetBorderColor sep #$20 ; ACC = $X_Y, REG = $W_Z - eorl BORDER_REG ; ACC = $(X^Y)_(Y^Z) - and #$0F ; ACC = $0_(Y^Z) - eorl BORDER_REG ; ACC = $W_(Y^Z^Z) = $W_Y +SetBorderColor sep #$20 ; ACC = $X_Y, REG = $W_Z + eorl BORDER_REG ; ACC = $(X^Y)_(Y^Z) + and #$0F ; ACC = $0_(Y^Z) + eorl BORDER_REG ; ACC = $W_(Y^Z^Z) = $W_Y stal BORDER_REG rep #$20 rts +; Clear to SHR screen to a specific color +ClearToColor ldx #$7D00 ;start at top of pixel data! ($2000-9D00) +:clearloop dex + dex + stal SHR_SCREEN,x ;screen location + bne :clearloop ;loop until we've worked our way down to 0 + rts + +; Initialize the SCB +SetSCBs ldx #$0100 ;set all $100 scbs to A +:scbloop dex + dex + stal SHR_SCB,x + bne :scbloop + rts + ; Turn SHR screen On/Off GrafOn sep #$20 lda #$81 @@ -283,21 +363,21 @@ GetVBL sep #$20 ldal VBL_HORZ_REG asl ldal VBL_VERT_REG - rol ; put V5 into carry bit, if needed. See TN #39 for details. + rol ; put V5 into carry bit, if needed. See TN #39 for details. rep #$20 and #$00FF rts WaitForVBL sep #$20 -:wait1 ldal VBL_STATE_REG ; If we are already in VBL, then wait +:wait1 ldal VBL_STATE_REG ; If we are already in VBL, then wait bmi :wait1 :wait2 ldal VBL_STATE_REG - bpl :wait2 ; spin until transition into VBL + bpl :wait2 ; spin until transition into VBL rep #$20 rts WaitForKey sep #$20 - stal KBD_STROBE_REG ; clear the strobe + stal KBD_STROBE_REG ; clear the strobe :WFK ldal KBD_REG bpl :WFK rep #$20 @@ -312,42 +392,42 @@ ClearKeyboardStrobe sep #$20 ; Graphics helpers LoadPicture - jsr LoadFile ; X=Nom Image, A=Banc de chargement XX/00 + jsr LoadFile ; X=Nom Image, A=Banc de chargement XX/00 bcc :loadOK rts :loadOK - jsr UnpackPicture ; A=Packed Size + jsr UnpackPicture ; A=Packed Size rts -UnpackPicture sta UP_PackedSize ; Size of Packed Data - lda #$8000 ; Size of output Data Buffer +UnpackPicture sta UP_PackedSize ; Size of Packed Data + lda #$8000 ; Size of output Data Buffer sta UP_UnPackedSize - lda BankLoad ; Banc de chargement / Decompression - sta UP_Packed+1 ; Packed Data + lda BankLoad ; Banc de chargement / Decompression + sta UP_Packed+1 ; Packed Data clc adc #$0080 - stz UP_UnPacked ; On remet a zero car modifie par l'appel + stz UP_UnPacked ; On remet a zero car modifie par l'appel stz UP_UnPacked+2 - sta UP_UnPacked+1 ; Unpacked Data buffer + sta UP_UnPacked+1 ; Unpacked Data buffer - PushWord #0 ; Space for Result : Number of bytes unpacked - PushLong UP_Packed ; Pointer to buffer containing the packed data - PushWord UP_PackedSize ; Size of the Packed Data - PushLong #UP_UnPacked ; Pointer to Pointer to unpacked buffer - PushLong #UP_UnPackedSize ; Pointer to a Word containing size of unpacked data + PushWord #0 ; Space for Result : Number of bytes unpacked + PushLong UP_Packed ; Pointer to buffer containing the packed data + PushWord UP_PackedSize ; Size of the Packed Data + PushLong #UP_UnPacked ; Pointer to Pointer to unpacked buffer + PushLong #UP_UnPackedSize ; Pointer to a Word containing size of unpacked data _UnPackBytes - pla ; Number of byte unpacked + pla ; Number of byte unpacked rts -UP_Packed hex 00000000 ; Address of Packed Data -UP_PackedSize hex 0000 ; Size of Packed Data -UP_UnPacked hex 00000000 ; Address of Unpacked Data Buffer (modified) -UP_UnPackedSize hex 0000 ; Size of Unpacked Data Buffer (modified) +UP_Packed hex 00000000 ; Address of Packed Data +UP_PackedSize hex 0000 ; Size of Packed Data +UP_UnPacked hex 00000000 ; Address of Unpacked Data Buffer (modified) +UP_UnPackedSize hex 0000 ; Size of Unpacked Data Buffer (modified) ; Basic I/O function to load files -LoadFile stx openRec+4 ; X=File, A=Bank/Page XX/00 +LoadFile stx openRec+4 ; X=File, A=Bank/Page XX/00 sta readRec+5 :openFile _OpenGS openRec @@ -367,7 +447,7 @@ LoadFile stx openRec+4 ; X=File, A=Bank/Page XX/00 :closeFile _CloseGS closeRec clc - lda eofRec+4 ; File Size + lda eofRec+4 ; File Size rts :openReadErr jsr :closeFile @@ -398,22 +478,22 @@ MasterId ds 2 UserId ds 2 BankLoad hex 0000 -openRec dw 2 ; pCount - ds 2 ; refNum - adrl ImageName ; pathname +openRec dw 2 ; pCount + ds 2 ; refNum + adrl ImageName ; pathname -eofRec dw 2 ; pCount - ds 2 ; refNum - ds 4 ; eof +eofRec dw 2 ; pCount + ds 2 ; refNum + ds 4 ; eof -readRec dw 4 ; pCount - ds 2 ; refNum - ds 4 ; dataBuffer - ds 4 ; requestCount - ds 4 ; transferCount +readRec dw 4 ; pCount + ds 2 ; refNum + ds 4 ; dataBuffer + ds 4 ; requestCount + ds 4 ; transferCount -closeRec dw 1 ; pCount - ds 2 ; refNum +closeRec dw 1 ; pCount + ds 2 ; refNum qtRec adrl $0000 da $00 @@ -423,22 +503,3 @@ qtRec adrl $0000 put blitter/Template.s put blitter/Tables.s - lda #BG1_ADDR - - - - - - - - - - - - - - - - - - diff --git a/test/blitter/Tables.s b/test/blitter/Tables.s index 5e38d9f..8f5024d 100644 --- a/test/blitter/Tables.s +++ b/test/blitter/Tables.s @@ -17,10 +17,180 @@ ; This table is necessary, because due to the data being draw via stack instructions, the ; tile order is reversed. -PER_TILE_SIZE equ 6 -]step equ 0 -Tile2CodeOffset lup 41 - dw CODE_TOP+{]step*PER_TILE_SIZE} -]step equ ]step+1 - --^ +PER_TILE_SIZE equ 3 +]step equ 0 +Tile2CodeOffset lup 82 + dw CODE_TOP+{]step*PER_TILE_SIZE} +]step equ ]step+1 + --^ +; Table of BRA instructions that are used to exit the code field. Separate tables for +; even and odd aligned cases. +; +; The even exit point is closest to the code field. The odd exit point is 3 bytes further +CodeFieldEvenBRA + bra *-3 ; 0 + bra *-6 ; 1 + bra *-9 ; 2 + bra *-12 ; 3 + bra *-15 ; 4 + bra *-18 ; 5 + bra *-21 ; 6 + bra *-24 ; 7 + bra *-27 ; 8 + bra *-30 ; 9 + bra *-33 ; 10 + bra *-36 ; 11 + bra *-39 ; 12 + bra *-42 ; 13 + bra *-45 ; 14 + bra *-48 ; 15 + bra *-51 ; 16 + bra *-54 ; 17 + bra *-57 ; 18 + bra *-60 ; 19 + bra *-63 ; 20 + bra *-66 ; 21 + bra *-69 ; 22 + bra *-72 ; 23 + bra *-75 ; 24 + bra *-78 ; 25 + bra *-81 ; 26 + bra *-84 ; 27 + bra *-87 ; 28 + bra *-90 ; 29 + bra *-93 ; 30 + bra *-96 ; 31 + bra *-99 ; 32 + bra *-102 ; 33 + bra *-105 ; 34 + bra *-108 ; 35 + bra *-111 ; 36 + bra *-114 ; 37 + bra *-117 ; 38 + bra *-120 ; 39 + bra *-123 ; 40 + bra *+126 ; 41 + bra *+123 ; 42 + bra *+120 ; 43 + bra *+117 ; 44 + bra *+114 ; 45 + bra *+111 ; 46 + bra *+108 ; 47 + bra *+105 ; 48 + bra *+102 ; 49 + bra *+99 ; 50 + bra *+96 ; 51 + bra *+93 ; 52 + bra *+90 ; 53 + bra *+87 ; 54 + bra *+84 ; 55 + bra *+81 ; 56 + bra *+78 ; 57 + bra *+75 ; 58 + bra *+72 ; 59 + bra *+69 ; 60 + bra *+66 ; 61 + bra *+63 ; 62 + bra *+60 ; 63 + bra *+57 ; 64 + bra *+54 ; 65 + bra *+51 ; 66 + bra *+48 ; 67 + bra *+45 ; 68 + bra *+42 ; 69 + bra *+39 ; 70 + bra *+36 ; 71 + bra *+33 ; 72 + bra *+30 ; 73 + bra *+27 ; 74 + bra *+24 ; 75 + bra *+21 ; 76 + bra *+18 ; 77 + bra *+15 ; 78 + bra *+12 ; 79 + bra *+9 ; 80 + bra *+6 ; 81 -- need to skip over the JMP loop that passed control back +CodeFieldOddBRA + bra *-6 ; 0 -- branch back 6 to skip the JMP even path + bra *-9 ; 1 + bra *-12 ; 2 + bra *-15 ; 3 + bra *-18 ; 4 + bra *-21 ; 5 + bra *-24 ; 6 + bra *-27 ; 7 + bra *-30 ; 8 + bra *-33 ; 9 + bra *-36 ; 10 + bra *-39 ; 11 + bra *-42 ; 12 + bra *-45 ; 13 + bra *-48 ; 14 + bra *-51 ; 15 + bra *-54 ; 16 + bra *-57 ; 17 + bra *-60 ; 18 + bra *-63 ; 19 + bra *-66 ; 20 + bra *-69 ; 21 + bra *-72 ; 22 + bra *-75 ; 23 + bra *-78 ; 24 + bra *-81 ; 25 + bra *-84 ; 26 + bra *-87 ; 27 + bra *-90 ; 28 + bra *-93 ; 29 + bra *-96 ; 30 + bra *-99 ; 31 + bra *-102 ; 32 + bra *-105 ; 33 + bra *-108 ; 34 + bra *-111 ; 35 + bra *-114 ; 36 + bra *-117 ; 37 + bra *-120 ; 38 + bra *-123 ; 39 + bra *-126 ; 40 + bra *+129 ; 41 + bra *+126 ; 42 + bra *+123 ; 43 + bra *+120 ; 44 + bra *+117 ; 45 + bra *+114 ; 46 + bra *+111 ; 47 + bra *+108 ; 48 + bra *+105 ; 49 + bra *+102 ; 50 + bra *+99 ; 51 + bra *+96 ; 52 + bra *+93 ; 53 + bra *+90 ; 54 + bra *+87 ; 55 + bra *+84 ; 56 + bra *+81 ; 57 + bra *+78 ; 58 + bra *+75 ; 59 + bra *+72 ; 60 + bra *+69 ; 61 + bra *+66 ; 62 + bra *+63 ; 64 + bra *+60 ; 64 + bra *+57 ; 65 + bra *+54 ; 66 + bra *+51 ; 67 + bra *+48 ; 68 + bra *+45 ; 69 + bra *+42 ; 70 + bra *+39 ; 71 + bra *+36 ; 72 + bra *+33 ; 73 + bra *+30 ; 74 + bra *+27 ; 75 + bra *+24 ; 76 + bra *+21 ; 77 + bra *+18 ; 78 + bra *+15 ; 79 + bra *+12 ; 80 + bra *+9 ; 81 -- need to skip over two JMP instructions diff --git a/test/blitter/Template.s b/test/blitter/Template.s index 5cb3713..88c64e6 100644 --- a/test/blitter/Template.s +++ b/test/blitter/Template.s @@ -1,51 +1,206 @@ ; Template and equates for GTE blitter - mx %00 + mx %00 -DP_ADDR equ entry_1-base+1 -BG1_ADDR equ entry_2-base+1 -STK_ADDR equ entry_3-base+1 +DP_ADDR equ entry_1-base+1 +BG1_ADDR equ entry_2-base+1 +STK_ADDR equ entry_3-base+1 -CODE_TOP equ loop-base -CODE_LEN equ top-base +CODE_ENTRY equ entry_jmp-base+1 ; low byte of the page-aligned jump address +CODE_TOP equ loop-base +CODE_LEN equ top-base +CODE_EXIT equ even_exit-base ; Locations that need the page offset added -PagePatches da {long_0-base+2} - da {long_1-base+2} - da {long_2-base+2} - da {long_3-base+2} - da {long_4-base+2} - da {long_5-base+2} - da {long_6-base+2} - da {odd_entry-base+2} - da {loop_exit_1-base+2} - da {loop_exit_2-base+2} - da {loop_back-base+2} - da {loop_exit_3-base+2} -PagePatchNum equ *-PagePatches +PagePatches da {long_0-base+2} + da {long_1-base+2} + da {long_2-base+2} + da {long_3-base+2} + da {long_4-base+2} + da {long_5-base+2} + da {long_6-base+2} + da {odd_entry-base+2} + da {loop_exit_1-base+2} + da {loop_exit_2-base+2} + da {loop_back-base+2} + da {loop_exit_3-base+2} + da {even_exit-base+2} +PagePatchNum equ *-PagePatches -BankPatches da {long_0-base+3} - da {long_1-base+3} - da {long_2-base+3} - da {long_3-base+3} - da {long_4-base+3} - da {long_5-base+3} - da {long_6-base+3} -BankPatchNum equ *-BankPatches +BankPatches da {long_0-base+3} + da {long_1-base+3} + da {long_2-base+3} + da {long_3-base+3} + da {long_4-base+3} + da {long_5-base+3} + da {long_6-base+3} +BankPatchNum equ *-BankPatches -target equ 0 +; Patch out the final JMP to jump to the long JML return code +; +; Y = starting line * $1000 +SetReturn lda #$0280 ; BRA *+4 + sta CODE_EXIT,y + rts + +ResetReturn lda #$004C ; JMP $XX00 + sta CODE_EXIT,y + rts + +; Fill in the even_exit JMP instruction to jump to the next line (all but last line) +SetNextLine lda #$F000+{entry_3-base} + ldy #CODE_EXIT+1 + ldx #15*2 + jmp SetAbsAddrs + +; Patch an 8-bit or 16-bit value into the bank. These are a set up unrolled loops to +; quickly patch in a constanct value, or a value from an array into a given set of +; templates. +; +; Because we have structured everything as parallel code blocks, most updates to the blitter +; reduce to storing a constant value and have an amortized cost of just a single store. +; +; The utility of these routines is that they also handle setting just a range of lines. +; +; X = number of lines * 2, 0 to 32 +; Y = starting line * $1000 +; A = value +; +; Set M to 0 or 1 +SetConst jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-06,:bottom-09 + da :bottom-12,:bottom-15,:bottom-18,:bottom-21 + da :bottom-24,:bottom-27,:bottom-30,:bottom-33 + da :bottom-36,:bottom-39,:bottom-42,:bottom-45 + da :bottom-48 +:top sta $F000,y + sta $E000,y + sta $D000,y + sta $C000,y + sta $B000,y + sta $A000,y + sta $9000,y + sta $8000,y + sta $7000,y + sta $6000,y + sta $5000,y + sta $4000,y + sta $3000,y + sta $2000,y + sta $1000,y + sta $0000,y +:bottom rts + +; SetScreenAddrs +; +; A = initial screen location (largest) +; Y = starting line * $1000 +; X = number of lines +; +; Automatically decrements address by 160 bytes each line +SetScreenAddrs sec + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15 + da :bottom-21,:bottom-27,:bottom-33,:bottom-39 + da :bottom-45,:bottom-51,:bottom-57,:bottom-63 + da :bottom-69,:bottom-75,:bottom-81,:bottom-87 + da :bottom-93 +:top sta STK_ADDR+$F000,y + sbc #160 + sta STK_ADDR+$E000,y + sbc #160 + sta STK_ADDR+$D000,y + sbc #160 + sta STK_ADDR+$C000,y + sbc #160 + sta STK_ADDR+$B000,y + sbc #160 + sta STK_ADDR+$A000,y + sbc #160 + sta STK_ADDR+$9000,y + sbc #160 + sta STK_ADDR+$8000,y + sbc #160 + sta STK_ADDR+$7000,y + sbc #160 + sta STK_ADDR+$6000,y + sbc #160 + sta STK_ADDR+$5000,y + sbc #160 + sta STK_ADDR+$4000,y + sbc #160 + sta STK_ADDR+$3000,y + sbc #160 + sta STK_ADDR+$2000,y + sbc #160 + sta STK_ADDR+$1000,y + sbc #160 + sta STK_ADDR+$0000,y +:bottom rts + +; SetAbsAddres +; +; A = absolute address (largest) +; Y = offset +; X = number of lines +; +; Stores a value and decrements by $1000 for each line +SetAbsAddrs sec + jmp (:tbl,x) +:tbl da :bottom-00,:bottom-03,:bottom-09,:bottom-15 + da :bottom-21,:bottom-27,:bottom-33,:bottom-39 + da :bottom-45,:bottom-51,:bottom-57,:bottom-63 + da :bottom-69,:bottom-75,:bottom-81,:bottom-87 + da :bottom-93 +:top sta $F000,y + sbc #$1000 + sta $E000,y + sbc #$1000 + sta $D000,y + sbc #$1000 + sta $C000,y + sbc #$1000 + sta $B000,y + sbc #$1000 + sta $A000,y + sbc #$1000 + sta $9000,y + sbc #$1000 + sta $8000,y + sbc #$1000 + sta $7000,y + sbc #$1000 + sta $6000,y + sbc #$1000 + sta $5000,y + sbc #$1000 + sta $4000,y + sbc #$1000 + sta $3000,y + sbc #$1000 + sta $2000,y + sbc #$1000 + sta $1000,y + sbc #$1000 + sta $0000,y +:bottom rts + +; Full up a full bank with blitter templates. Currently we can fit 16 lines per bank, so need +; a total of 13 banks to hold the 208 lines to full-screen support + +target equ 0 BuildBank - stz target - sta target+2 + stz target + sta target+2 :next - jsr BuildLine2 - lda target - clc - adc #$1000 - sta target - bcc :next + jsr BuildLine2 + lda target + clc + adc #$1000 + sta target + bcc :next - rts + rts ; this is a relocation subroutine, it is responsible for copying the template to a ; memory location and patching up the necessary instructions. @@ -53,61 +208,61 @@ BuildBank ; X = low word of address (must be a multiple of $1000) ; A = high word of address (bank) BuildLine - stx target - sta target+2 + stx target + sta target+2 BuildLine2 - lda #CODE_LEN ; round up to an even number of bytes - inc - and #$FFFE - beq :nocopy - dec - dec - tay -:loop lda base,y - sta [target],y + lda #CODE_LEN ; round up to an even number of bytes + inc + and #$FFFE + beq :nocopy + dec + dec + tay +:loop lda base,y + sta [target],y - dey - dey - bpl :loop + dey + dey + bpl :loop -:nocopy lda #0 ; copy is complete, now patch up the addresses - sep #$20 +:nocopy lda #0 ; copy is complete, now patch up the addresses + sep #$20 - ldx #0 - lda target+2 ; patch in the bank for the absolute long addressing mode -:dobank ldy BankPatches,x - sta [target],y - inx - inx - cpx #BankPatchNum - bcc :dobank + ldx #0 + lda target+2 ; patch in the bank for the absolute long addressing mode +:dobank ldy BankPatches,x + sta [target],y + inx + inx + cpx #BankPatchNum + bcc :dobank - ldx #0 -:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each - lda [target],y - clc - adc target+1 - sta [target],y - inx - inx - cpx #PagePatchNum - bcc :dopage + ldx #0 +:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each + lda [target],y + clc + adc target+1 + sta [target],y + inx + inx + cpx #PagePatchNum + bcc :dopage :out - rep #$20 - rts + rep #$20 + rts ; start of the template code base -entry_1 ldx #0000 -entry_2 ldy #0000 -entry_3 lda #0000 - tcs +entry_1 ldx #0000 +entry_2 ldy #0000 +entry_3 lda #0000 + tcs long_0 -entry_jmp jmp $0100 - dfb $00 ; if the screen is odd-aligned, then the opcode is set to +entry_jmp jmp $0100 + dfb $00 ; if the screen is odd-aligned, then the opcode is set to ; ; $AF to convert to a LDA long instruction. This puts the ; ; first two bytes of the instruction field in the accumulator ; ; and falls through to the next instruction. @@ -116,21 +271,21 @@ entry_jmp jmp $0100 ; ; update the low-byte of the address, the means it takes only ; ; an amortized 4-cycles per line to set the entry pointbra -right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction - beq r_is_pea ; This costs 6 cycles in the fast-path +right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction + beq r_is_pea ; This costs 6 cycles in the fast-path - bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants - bne r_is_jmp + bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants + bne r_is_jmp -long_1 stal *+4-base - dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path +long_1 stal *+4-base + dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path ; ; gets a 1-cycle penalty, but we save 3 cycles here. -r_is_pea xba ; fast code for PEA - sep #$30 - pha - rep #$30 -odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the +r_is_pea xba ; fast code for PEA + sep #$30 + pha + rep #$30 +odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the ; ; code field. This is OK, even if the entry point was the ; ; last instruction, because there is a JMP at the end of ; ; the code field, so the code will simply jump to that @@ -140,54 +295,79 @@ odd_entry jmp $0100 ; unconditionally jump into the "next" ins ; ; code field is page-aligned, only the low byte needs to ; ; be updated when the scroll position changes -r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte -long_2 ldal entry_jmp+1-base -long_3 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) +r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte +long_2 ldal entry_jmp+1-base +long_3 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) ; This is the spot that needs to be page-aligned. In addition to simplifying the entry address ; and only needing to update a byte instad of a word, because the code breaks out of the ; code field with a BRA instruction, we keep everything within a page to avoid the 1-cycle ; page-crossing penalty of the branch. - ds 204 -loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is -loop_exit_2 jmp even_exit-base ; +3 odd-aligned + ds 204 +loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is +loop_exit_2 jmp even_exit-base ; +3 odd-aligned -loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes - pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap - --^ -loop_back jmp loop-base ; +252 Ensure execution continues to loop around -loop_exit_3 jmp even_exit-base ; +255 +loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes + pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap + --^ +loop_back jmp loop-base ; +252 Ensure execution continues to loop around +loop_exit_3 jmp even_exit-base ; +255 -odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field +odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field ; ; that are replaced by the needed BRA instruction to exit the code field. When the ; ; left edge is odd-aligned, we are able to immediately load the value and perform ; ; similar logic to the right_odd code path above -left_odd bit #$000B - beq l_is_pea +left_odd bit #$000B + beq l_is_pea - bit #$0040 - bne l_is_jmp + bit #$0040 + bne l_is_jmp -long_4 stal *+4-base - dfb $00,$00 -l_is_pea xba - sep #$30 - pha - rep #$30 - bra even_exit -l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte -long_5 ldal entry_jmp+1-base -long_6 stal *+5-base - dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) +long_4 stal *+4-base + dfb $00,$00 +l_is_pea xba + sep #$30 + pha + rep #$30 + bra even_exit +l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte +long_5 ldal entry_jmp+1-base +long_6 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) -even_exit jmp $1000 ; Jump to the next line. We set up the blitter to do 8 or 16 lines at a time -; ; before restoring the machine state and re-enabling interrupts. This makes -; ; the blitter interrupt friendly to allow things like music player to continue -; ; to function. -; -; ; When it's time to exit, the next_entry address points to an alternate exit point +; JMP opcode = $4C, JML opcode = $5C +even_exit jmp $1000 ; Jump to the next line. + ds 1 ; space so that the last line in a bank can be patched into a JML +full_return jml blt_return ; Full exit + +; Special epilogue: skip a number of bytes and jump back into the code field. This is useful for +; large, floating panels in the attract mode of a game, or to overlay solid +; dialog. + +epilogue_1 tsc + sec + sbc #0 + tcs + jmp $0000 ; This jumps back into the code field +:out jmp $0000 ; This jumps to the next epilogue chain element + ds 1 + +; Special epilogue: re-enable interrupts. Used every 8 or 16 lines to allow music to continue playing +epilogue_2 ldal STATE_REG ; Read Bank 0 / Write Bank 0 + and #$FFCF + stal STATE_REG + ldal stk_save ; restore the stack + tcs + cli + nop ; Give a couple of cycles + sei + ldal STATE_REG + ora #$0010 ; Read Bank 0 / Write Bank 1 + stal STATE_REG + jmp $0000 + ds 1 ; These are the special code snippets -- there is a 1:1 relationship between each snippet space ; and a 3-byte entry in the code field. Thus, each snippet has a hard-coded JMP to return to @@ -273,6 +453,9 @@ top + + +