diff --git a/macros/APP.MACS.S b/macros/APP.MACS.S new file mode 100644 index 0000000..8a98133 --- /dev/null +++ b/macros/APP.MACS.S @@ -0,0 +1,32 @@ +; +; Dereference a handle that is on the top of the stack +; +_Deref MAC + phb ; save caller's data bank register + pha ; push high word of handle on stack + plb ; sets B to the bank byte of the pointer + lda |$0002,x ; load the high word of the master pointer + pha ; and save it on the stack + lda |$0000,x ; load the low word of the master pointer + tax ; and return it in X + pla ; restore the high word in A + plb ; pull the handle's high word high byte off the + ; stack + plb ; restore the caller's data bank register + <<< + + +**************************************** +* Basic Error Macro * +**************************************** +_Err mac + bcc NoErr + do ]0 ; (DO if true) + jsr PgmDeath ; this is conditionally compiled if + str ]1 ; we pass in an error statement + else ; (ELSE) + jmp PgmDeath0 ; we just call the simpler error handler + fin ; (FIN) +NoErr eom + + diff --git a/package.json b/package.json index 0cb033b..22534a4 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "A game engine for the Applie IIgs written in 65816 assembly language", "main": "index.js", "config": { - "merlin32": "C:\\Programs\\IIgsXDev\\bin\\Merlin32.exe", + "merlin32": "C:\\Programs\\IIgsXDev\\bin\\Merlin32-1.1.9.exe", "cadius": "C:\\Programs\\IIgsXDev\\bin\\Cadius.exe", "gsport": "C:\\Programs\\gsport\\gsport_0.31\\GSPort.exe", "macros": "C:\\Programs\\BrutalDeluxe\\Merlin32\\Library" diff --git a/src/GTE.Inline.s b/src/GTE.Inline.s new file mode 100644 index 0000000..52ff8ec --- /dev/null +++ b/src/GTE.Inline.s @@ -0,0 +1,132 @@ +; Template and utility function for a single line of the GTE blitter. See the other GTE.Line.s file +; for details on the general structure of this template. +; +; This is a variant that places the snippets inline within the code field. We give up +; the speed of three-byte code sequences, but eliminate the double JMP and simplify the +; handling of odd-alignment. +; +; This mode is best when the scenes are always complicated. +; +; Odd: + MX %00 +entry_1 ldx #0000 ; patch with the address of the direct page tiles. Fixed. +entry_2 ldy #0000 ; patch with the address of the line in the second layer. Set when BG1 scroll position changes. +entry_3 lda #0000 ; patch with the address of the right edge of the line. Set when origin position changes. + tcs + +entry_jmp jmp $2000 ; always jump into the same location. For odd line, the end +; ; of the snippet will be patched to handle the right-edge case. + +right_odd sep #$20 ; enter here from the code field + pha + rep #$20 + jmp $2000 ; jump back into the code field + + + jmp odd_exit + jmp even_exit + +; Code field, each block is N bytes + +loop + lda #1234 ; PEA $0000 becomes LDA #0000 / BRA / PHA + bra l0 + + jmp exit ; 'normal' exit point + jmp left_odd ; handler for pushing a single byte to the left edge + jmp right_odd ; handler for pushing a single byte to the right edge +l0 pha ; always end with a PHA, this is the patch point + + lda (00),y ; + and #MASK + ora #data + bra l1 + + jmp exit ; 'normal' exit point + jmp left_odd ; handler for pushing a single byte to the left edge + jmp right_odd ; handler for pushing a single byte to the right edge +l0 pha ; always end with a PHA, this is the patch point + + + + ... + jmp loop + + + jmp even_exit + + +left_odd sep #$20 + xba + pha + rep #$20 + +exit jmp $0000 ; Jump to the next line. We set up the blitter to do 8 or 16 lines at a time +; ; before restoring the machine state and re-enabling interrupts. This makes +; ; the blitter interrupt friendly to allow things like music player to continue +; ; to function. +; +; ; When it's time to exit, the next_entry address points to an alternate exit point + +; These are the special code snippets -- there is a 1:1 relationship between each snippet space +; and a 3-byte entry in the code field. Thus, each snippet has a hard-coded JMP to return to +; the next code field location +; +; The snippet is required to handle the odd-alignment in-line; there is no facility for +; patching or intercepting these values due to their complexity. The only requirements +; are: +; +; 1. Carry Clear -> 16-bit write and return to the next code field operand +; 2. Carry Set +; a. Overflow set -> Low 8-bit write and return to the next code field operand +; b. Overflow clear -> High 8-bit write and exit the line +; c. Always clear the Carry flags. It's actually OK to leave the overflow bit in +; its passed state, because having the carry bit clear prevent evaluation of +; the V bit. +; +; Snippet Samples: +; +; Standard Two-level Mix (27 bytes) +; +; Optimal = 18 cycles (LDA/AND/ORA/PHA) +; 16-bit write = 23 cycles +; 8-bit low = 35 cycles +; 8-bit high = 36 cycles +; +; start lda (00),y +; and #MASK +; ora #DATA ; 14 cycles to load the data +; bcs 8_bit +; pha +; out jmp next ; Fast-path completes in 9 additional cycles + +; 8_bit sep #$30 ; Switch to 8 bit mode +; bvs r_edge ; Need to switch if doing the left edge +; xba +; r_edge pha ; push the value +; rep #$31 ; put back into 16-bit mode and clear the carry bit, as required +; bvs out ; jmp out and continue if this is the right edge +; jmp even_exit ; exit the line otherwise +; ; +; ; The slow paths have 21 and 22 cycles for the right and left +; ; odd-aligned cases respectively. + +snippets ds 32*82 + + + + + + + + + + + + + + + + + + diff --git a/src/GTE.Line.s b/src/GTE.Line.s index ae9e3ca..9c0eb45 100644 --- a/src/GTE.Line.s +++ b/src/GTE.Line.s @@ -221,3 +221,4 @@ snippets ds 32*82 + diff --git a/test/App.Init.s b/test/App.Init.s index c4651b8..bc93fb8 100644 --- a/test/App.Init.s +++ b/test/App.Init.s @@ -14,49 +14,93 @@ ; - 1 page for pointer to the second background ; - 8 pages for the dynamic tiles - mx %00 + mx %00 -MemInit PushLong #0 ; space for result - PushLong #$008000 ; size (32k) - PushWord UserId - PushWord #%11000000_00010111 ; Fixed location - PushLong #$002000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) - _Deref - sta Buff00+2 - stx Buff00 +MemInit PushLong #0 ; space for result + PushLong #$008000 ; size (32k) + PushWord UserId + PushWord #%11000000_00010111 ; Fixed location + PushLong #$002000 + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + _Deref + stx Buff00 + sta Buff00+2 - PushLong #0 ; space for result - PushLong #$008000 ; size (32k) - PushWord UserId - PushWord #%11000000_00010111 ; Fixed location - PushLong #$012000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) - _Deref - sta Buff01+2 - stx Buff01 + PushLong #0 ; space for result + PushLong #$008000 ; size (32k) + PushWord UserId + PushWord #%11000000_00010111 ; Fixed location + PushLong #$012000 + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + _Deref + stx Buff01 + sta Buff01+2 + + PushLong #0 ; space for result + PushLong #$000A00 ; size (10 pages) + PushWord UserId + PushWord #%11000000_00010101 ; Page-aligned, fixed bank + PushLong #$000000 + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + _Deref + stx ZeroPage + sta ZeroPage+2 + + PushLong #0 + PushLong #$10000 + PushWord UserId + PushWord #%11000000_00011100 + PushLong #0 + _NewHandle + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + _Deref + stx BlitBuff + sta BlitBuff+2 + + rts + +Buff00 ds 4 +Buff01 ds 4 +ZeroPage ds 4 +BlitBuff ds 4 + +; Bank allocator (for one full, fixed bank of memory. Can be immediately deferenced) + +AllocOneBank PushLong #0 + PushLong #$10000 + PushWord UserId + PushWord #%11000000_00011100 + PushLong #0 + _NewHandle ; returns LONG Handle on stack + plx ; base address of the new handle + pla ; high address 00XX of the new handle (bank) + xba ; swap accumulator bytes to XX00 + sta :bank+2 ; store as bank for next op (overwrite $XX00) +:bank ldal $000001,X ; recover the bank address in A=XX/00 + rts + +; Set up the interrupts +; +; oldOneVect = GetVector( oneSecHnd ); +; SetVector( oneSecHnd, (Pointer) ONEHANDLER ); +; IntSource( oSecEnable ); +; SetHeartBeat( VBLTASK ); +IntInit rts + + +; IntSource( oSecDisable ); /* disable one second interrupts */ +; SetVector( oneSecHnd, oldOneVect ); /* reset to the old handler */ +ShutDown rts - PushLong #0 ; space for result - PushLong #$000A00 ; size (10 pages) - PushWord UserId - PushWord #%11000000_00010101 ; Page-aligned, fixed bank - PushLong #$000000 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) - _Deref - sta ZeroPage+2 - stx ZeroPage - rts -Buff00 ds 4 -Buff01 ds 4 -ZeroPage ds 4 diff --git a/test/App.Main.s b/test/App.Main.s index 04d2bcc..9c0b34d 100644 --- a/test/App.Main.s +++ b/test/App.Main.s @@ -1,263 +1,429 @@ ; Test program for graphics stufff... - rel + rel - use Util.Macs.s - use Locator.Macs.s - use Mem.Macs.s - use Misc.Macs.s - put ..\macros\App.Macs.s - put ..\macros\EDS.GSOS.MACS.s + use Util.Macs.s + use Locator.Macs.s + use Mem.Macs.s + use Misc.Macs.s + put ..\macros\App.Macs.s + put ..\macros\EDS.GSOS.MACS.s - mx %00 + mx %00 + +SHADOW_REG equ $E0C035 +NEW_VIDEO_REG equ $E0C029 +BORDER_REG equ $E0C034 ; 0-3 = border 4-7 Text color +VBL_VERT_REG equ $E0C02E +VBL_HORZ_REG equ $E0C02F + +KBD_REG equ $E0C000 +KBD_STROBE_REG equ $E0C010 +VBL_STATE_REG equ $E0C019 ; Typical init - phk - plb + phk + plb ; Tool startup - _TLStartUp ; normal tool initialization - pha - _MMStartUp - _Err ; should never happen - pla - sta MasterId ; our master handle references the memory allocated to us - ora #$0100 ; set auxID = $01 (valid values $01-0f) - sta UserId ; any memory we request must use our own id + _TLStartUp ; normal tool initialization + pha + _MMStartUp + _Err ; should never happen + pla + sta MasterId ; our master handle references the memory allocated to us + ora #$0100 ; set auxID = $01 (valid values $01-0f) + sta UserId ; any memory we request must use our own id + + _MTStartUp + +; Install interrupt handlers + + PushLong #0 + pea $0015 ; Get the existing 1-second interrupt handler and save + _GetVector + PullLong OldOneSecVec + + pea $0015 ; Set the new handler and enable interrupts + PushLong #OneSecHandler + _SetVector + + pea $0006 + _IntSource + + PushLong #VBLTASK ; Also register a Heart Beat Task + _SetHeartBeat ; Start up the graphics engine... - jsr MemInit + jsr MemInit + + lda BlitBuff+2 ; Fill in this bank + jsr BuildBank ; Load a picture and copy it into Bank $E1. Then turn on the screen. - jsr AllocOneBank ; Alloc 64KB for Load/Unpack - sta BankLoad ; Store "Bank Pointer" + jsr AllocOneBank ; Alloc 64KB for Load/Unpack + sta BankLoad ; Store "Bank Pointer" - ldx #ImageName ; Load+Unpack Boot Picture - jsr LoadPicture ; X=Name, A=Bank to use for loading + jsr GrafOn - lda BankLoad ; get address of loaded/uncompressed picture - clc - adc #$0080 ; skip header? - sta :copySHR+2 ; and store that over the 'ldal' address below - ldx #$7FFE ; copy all image data -:copySHR ldal $000000,x ; load from BankLoad we allocated - stal $E12000,x ; store to SHR screen - dex - dex - bpl :copySHR +EvtLoop + jsr WaitForKey + cmp #'q' + bne :1 + brl Exit +:1 cmp #'l' + bne :2 + brl DoLoadPic +:2 cmp #'m' + beq DoMessage + bra EvtLoop - jsr GrafOn - jsr WaitForKey +HexToChar dfb '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' +DoMessage + sep #$20 + ldx #0 + lda BlitBuff+2 + and #$F0 + lsr + lsr + lsr + lsr + tax + lda HexToChar,x + sta Hello+1 -; Deallocate all of our memory - PushWord UserId - _DisposeAll + lda BlitBuff+2 + and #$0F + tax + lda HexToChar,x + sta Hello+2 -Exit _QuitGS qtRec + lda BlitBuff+1 + and #$F0 + lsr + lsr + lsr + lsr + tax + lda HexToChar,x + sta Hello+4 - bcs Fatal -Fatal brk $00 + lda BlitBuff+1 + and #$0F + tax + lda HexToChar,x + sta Hello+5 -WaitForKey sep #$30 -:WFK ldal $00C000 - bpl :WFK - stal $00C010 - rep #$30 - rts + lda BlitBuff + and #$F0 + lsr + lsr + lsr + lsr + tax + lda HexToChar,x + sta Hello+6 + + lda BlitBuff + and #$0F + tax + lda HexToChar,x + sta Hello+7 + + rep #$20 + + lda #Hello + ldx #{60*160+30} + ldy #$7777 + jsr DrawString + jmp EvtLoop + +DoLoadPic + lda BankLoad + ldx #ImageName ; Load+Unpack Boot Picture + jsr LoadPicture ; X=Name, A=Bank to use for loading + + lda BankLoad ; get address of loaded/uncompressed picture + clc + adc #$0080 ; skip header? + sta :copySHR+2 ; and store that over the 'ldal' address below + ldx #$7FFE ; copy all image data +:copySHR ldal $000000,x ; load from BankLoad we allocated + stal $E12000,x ; store to SHR screen + dex + dex + bpl :copySHR + jmp EvtLoop + +Exit + pea $0007 ; disable 1-second interrupts + _IntSource + + PushLong #VBLTASK ; Remove our heartbeat task + _DelHeartBeat + + pea $0015 + PushLong OldOneSecVec ; Reset the interrupt vector + _SetVector + + PushWord UserId ; Deallocate all of our memory + _DisposeAll + + _QuitGS qtRec + + bcs Fatal +Fatal brk $00 + +Hello str '00/0000' **************************************** * Fatal Error Handler * **************************************** -PgmDeath tax - pla - inc - phx - phk - pha - bra ContDeath -PgmDeath0 pha - pea $0000 - pea $0000 -ContDeath ldx #$1503 - jsl $E10000 +PgmDeath tax + pla + inc + phx + phk + pha + bra ContDeath +PgmDeath0 pha + pea $0000 + pea $0000 +ContDeath ldx #$1503 + jsl $E10000 + +; Interrupt handlers. We install a heartbeat (1/60th second and a 1-second timer) +OneSecHandler mx %11 + phb + pha + phk + plb + + rep #$20 + inc OneSecondCounter + sep #$20 + + ldal $E0C032 + and #%10111111 ;clear IRQ source + stal $E0C032 + + pla + plb + clc + rtl + mx %00 +OneSecondCounter dw 0 +OldOneSecVec ds 4 + +VBLTASK hex 00000000 + dw 0 + hex 5AA5 + ; Graphic screen initialization -GrafInit ldx #$7FFE - lda #0000 -:loop stal $E12000,x - dex - dex - bne :loop - rts +GrafInit ldx #$7FFE + lda #0000 +:loop stal $E12000,x + dex + dex + bne :loop + rts +; Return the current border color ($0 - $F) in the accumulator +GetBorderColor lda #0000 + sep #$20 + ldal BORDER_REG + and #$0F + rep #$20 + rts -GrafOn sep #$30 - lda #$81 - stal $00C029 - rep #$30 - rts +; Set the border color to the accumulator value. +SetBorderColor sep #$20 ; ACC = $X_Y, REG = $W_Z + eorl BORDER_REG ; ACC = $(X^Y)_(Y^Z) + and #$0F ; ACC = $0_(Y^Z) + eorl BORDER_REG ; ACC = $W_(Y^Z^Z) = $W_Y + stal BORDER_REG + rep #$20 + rts -; Bank allocator (for one full, fixed bank of memory. Can be immediately deferenced) +; Turn SHR screen On/Off +GrafOn sep #$20 + lda #$81 + stal NEW_VIDEO_REG + rep #$20 + rts -AllocOneBank PushLong #0 - PushLong #$10000 - PushWord UserId - PushWord #%11000000_00011100 - PushLong #0 - _NewHandle ; returns LONG Handle on stack - plx ; base address of the new handle - pla ; high address 00XX of the new handle (bank) - xba ; swap accumulator bytes to XX00 - sta :bank+2 ; store as bank for next op (overwrite $XX00) -:bank ldal $000001,X ; recover the bank address in A=XX/00 - rts +GrafOff sep #$20 + lda #$01 + stal NEW_VIDEO_REG + rep #$20 + rts + +; Enable/Disable Shadowing. +ShadowOn sep #$20 + ldal SHADOW_REG + and #$F7 + stal SHADOW_REG + rep #$20 + rts + +ShadowOff sep #$20 + ldal SHADOW_REG + ora #$08 + stal SHADOW_REG + rep #$20 + rts + +GetVBL sep #$20 + ldal VBL_HORZ_REG + asl + ldal VBL_VERT_REG + rol ; put V5 into carry bit, if needed. See TN #39 for details. + rep #$20 + and #$00FF + rts + +WaitForVBL sep #$20 +:wait1 ldal VBL_STATE_REG ; If we are already in VBL, then wait + bmi :wait1 +:wait2 ldal VBL_STATE_REG + bpl :wait2 ; spin until transition into VBL + rep #$20 + rts + +WaitForKey sep #$20 + stal KBD_STROBE_REG ; clear the strobe +:WFK ldal KBD_REG + bpl :WFK + rep #$20 + and #$007F + rts + +ClearKeyboardStrobe sep #$20 + stal KBD_STROBE_REG + rep #$20 + rts ; Graphics helpers -LoadPicture jsr LoadFile ; X=Nom Image, A=Banc de chargement XX/00 - bcc :loadOK - brl Exit -:loadOK jsr UnpackPicture ; A=Packed Size - rts +LoadPicture + jsr LoadFile ; X=Nom Image, A=Banc de chargement XX/00 + bcc :loadOK + rts +:loadOK + jsr UnpackPicture ; A=Packed Size + rts -UnpackPicture sta UP_PackedSize ; Size of Packed Data - lda #$8000 ; Size of output Data Buffer - sta UP_UnPackedSize - lda BankLoad ; Banc de chargement / Decompression - sta UP_Packed+1 ; Packed Data - clc - adc #$0080 - stz UP_UnPacked ; On remet a zero car modifie par l'appel - stz UP_UnPacked+2 - sta UP_UnPacked+1 ; Unpacked Data buffer +UnpackPicture sta UP_PackedSize ; Size of Packed Data + lda #$8000 ; Size of output Data Buffer + sta UP_UnPackedSize + lda BankLoad ; Banc de chargement / Decompression + sta UP_Packed+1 ; Packed Data + clc + adc #$0080 + stz UP_UnPacked ; On remet a zero car modifie par l'appel + stz UP_UnPacked+2 + sta UP_UnPacked+1 ; Unpacked Data buffer - PushWord #0 ; Space for Result : Number of bytes unpacked - PushLong UP_Packed ; Pointer to buffer containing the packed data - PushWord UP_PackedSize ; Size of the Packed Data - PushLong #UP_UnPacked ; Pointer to Pointer to unpacked buffer - PushLong #UP_UnPackedSize ; Pointer to a Word containing size of unpacked data - _UnPackBytes - pla ; Number of byte unpacked - rts + PushWord #0 ; Space for Result : Number of bytes unpacked + PushLong UP_Packed ; Pointer to buffer containing the packed data + PushWord UP_PackedSize ; Size of the Packed Data + PushLong #UP_UnPacked ; Pointer to Pointer to unpacked buffer + PushLong #UP_UnPackedSize ; Pointer to a Word containing size of unpacked data + _UnPackBytes + pla ; Number of byte unpacked + rts -UP_Packed hex 00000000 ; Address of Packed Data -UP_PackedSize hex 0000 ; Size of Packed Data -UP_UnPacked hex 00000000 ; Address of Unpacked Data Buffer (modified) -UP_UnPackedSize hex 0000 ; Size of Unpacked Data Buffer (modified) +UP_Packed hex 00000000 ; Address of Packed Data +UP_PackedSize hex 0000 ; Size of Packed Data +UP_UnPacked hex 00000000 ; Address of Unpacked Data Buffer (modified) +UP_UnPackedSize hex 0000 ; Size of Unpacked Data Buffer (modified) ; Basic I/O function to load files -LoadFile stx openRec+4 ; X=File, A=Bank/Page XX/00 - sta readRec+5 +LoadFile stx openRec+4 ; X=File, A=Bank/Page XX/00 + sta readRec+5 -:openFile _OpenGS openRec - bcs :openReadErr - lda openRec+2 - sta eofRec+2 - sta readRec+2 +:openFile _OpenGS openRec + bcs :openReadErr + lda openRec+2 + sta eofRec+2 + sta readRec+2 - _GetEOFGS eofRec - lda eofRec+4 - sta readRec+8 - lda eofRec+6 - sta readRec+10 + _GetEOFGS eofRec + lda eofRec+4 + sta readRec+8 + lda eofRec+6 + sta readRec+10 - _ReadGS readRec - bcs :openReadErr + _ReadGS readRec + bcs :openReadErr -:closeFile _CloseGS closeRec - clc - lda eofRec+4 ; File Size - rts +:closeFile _CloseGS closeRec + clc + lda eofRec+4 ; File Size + rts -:openReadErr jsr :closeFile - nop - nop +:openReadErr jsr :closeFile + nop + nop - PushWord #0 - PushLong #msgLine1 - PushLong #msgLine2 - PushLong #msgLine3 - PushLong #msgLine4 - _TLTextMountVolume - pla - cmp #1 - bne :loadFileErr - brl :openFile -:loadFileErr sec - rts + PushWord #0 + PushLong #msgLine1 + PushLong #msgLine2 + PushLong #msgLine3 + PushLong #msgLine4 + _TLTextMountVolume + pla + cmp #1 + bne :loadFileErr + brl :openFile +:loadFileErr sec + rts -msgLine1 str 'Unable to load File' -msgLine2 str 'Press a key :' -msgLine3 str ' -> Return to Try Again' -msgLine4 str ' -> Esc to Quit' +msgLine1 str 'Unable to load File' +msgLine2 str 'Press a key :' +msgLine3 str ' -> Return to Try Again' +msgLine4 str ' -> Esc to Quit' ; Data storage -ImageName strl '1/test.pic' -MasterId ds 2 -UserId ds 2 -BankLoad hex 0000 - -openRec dw 2 ; pCount - ds 2 ; refNum - adrl ImageName ; pathname - -eofRec dw 2 ; pCount - ds 2 ; refNum - ds 4 ; eof - -readRec dw 4 ; pCount - ds 2 ; refNum - ds 4 ; dataBuffer - ds 4 ; requestCount - ds 4 ; transferCount - -closeRec dw 1 ; pCount - ds 2 ; refNum - -qtRec adrl $0000 - da $00 - - put App.Init.s - - - - - - - - - - - - - - - - - - - - - - - - - - - +ImageName strl '1/test.pic' +MasterId ds 2 +UserId ds 2 +BankLoad hex 0000 +openRec dw 2 ; pCount + ds 2 ; refNum + adrl ImageName ; pathname +eofRec dw 2 ; pCount + ds 2 ; refNum + ds 4 ; eof +readRec dw 4 ; pCount + ds 2 ; refNum + ds 4 ; dataBuffer + ds 4 ; requestCount + ds 4 ; transferCount +closeRec dw 1 ; pCount + ds 2 ; refNum +qtRec adrl $0000 + da $00 + put App.Init.s + put font.s + put blitter/Template.s + put blitter/Tables.s + + lda #BG1_ADDR diff --git a/test/blitter/Tables.s b/test/blitter/Tables.s new file mode 100644 index 0000000..5e38d9f --- /dev/null +++ b/test/blitter/Tables.s @@ -0,0 +1,26 @@ +; Collection of data tables +; + +; Tile2CodeOffset +; +; Takes a tile number (0 - 40) and returns the offset into the blitter code +; template. +; +; This is used for rendering tile data into the code field. For example, is we assume that +; we are filling in the operans for a bunch of PEA values, we could do this +; +; ldy tileNumber*2 +; lda #DATA +; ldx Tile2CodeOffset,y +; sta $0001,x +; +; This table is necessary, because due to the data being draw via stack instructions, the +; tile order is reversed. + +PER_TILE_SIZE equ 6 +]step equ 0 +Tile2CodeOffset lup 41 + dw CODE_TOP+{]step*PER_TILE_SIZE} +]step equ ]step+1 + --^ + diff --git a/test/blitter/Template.s b/test/blitter/Template.s new file mode 100644 index 0000000..5cb3713 --- /dev/null +++ b/test/blitter/Template.s @@ -0,0 +1,279 @@ +; Template and equates for GTE blitter + + mx %00 + +DP_ADDR equ entry_1-base+1 +BG1_ADDR equ entry_2-base+1 +STK_ADDR equ entry_3-base+1 + +CODE_TOP equ loop-base +CODE_LEN equ top-base + +; Locations that need the page offset added +PagePatches da {long_0-base+2} + da {long_1-base+2} + da {long_2-base+2} + da {long_3-base+2} + da {long_4-base+2} + da {long_5-base+2} + da {long_6-base+2} + da {odd_entry-base+2} + da {loop_exit_1-base+2} + da {loop_exit_2-base+2} + da {loop_back-base+2} + da {loop_exit_3-base+2} +PagePatchNum equ *-PagePatches + +BankPatches da {long_0-base+3} + da {long_1-base+3} + da {long_2-base+3} + da {long_3-base+3} + da {long_4-base+3} + da {long_5-base+3} + da {long_6-base+3} +BankPatchNum equ *-BankPatches + +target equ 0 +BuildBank + stz target + sta target+2 +:next + jsr BuildLine2 + lda target + clc + adc #$1000 + sta target + bcc :next + + rts + +; this is a relocation subroutine, it is responsible for copying the template to a +; memory location and patching up the necessary instructions. +; +; X = low word of address (must be a multiple of $1000) +; A = high word of address (bank) +BuildLine + stx target + sta target+2 + +BuildLine2 + lda #CODE_LEN ; round up to an even number of bytes + inc + and #$FFFE + beq :nocopy + dec + dec + tay +:loop lda base,y + sta [target],y + + dey + dey + bpl :loop + +:nocopy lda #0 ; copy is complete, now patch up the addresses + sep #$20 + + ldx #0 + lda target+2 ; patch in the bank for the absolute long addressing mode +:dobank ldy BankPatches,x + sta [target],y + inx + inx + cpx #BankPatchNum + bcc :dobank + + ldx #0 +:dopage ldy PagePatches,x ; patch the page addresses by adding the page offset to each + lda [target],y + clc + adc target+1 + sta [target],y + inx + inx + cpx #PagePatchNum + bcc :dopage + +:out + rep #$20 + rts + +; start of the template code +base +entry_1 ldx #0000 +entry_2 ldy #0000 +entry_3 lda #0000 + tcs + +long_0 +entry_jmp jmp $0100 + dfb $00 ; if the screen is odd-aligned, then the opcode is set to +; ; $AF to convert to a LDA long instruction. This puts the +; ; first two bytes of the instruction field in the accumulator +; ; and falls through to the next instruction. +; +; ; We structure the line so that the entry point only needs to +; ; update the low-byte of the address, the means it takes only +; ; an amortized 4-cycles per line to set the entry pointbra + +right_odd bit #$000B ; Check the bottom nibble to quickly identify a PEA instruction + beq r_is_pea ; This costs 6 cycles in the fast-path + + bit #$0040 ; Check bit 6 to distinguish between JMP and all of the LDA variants + bne r_is_jmp + +long_1 stal *+4-base + dfb $00,$00 ; this here to avoid needing a BRA instruction back. So the fast-path +; ; gets a 1-cycle penalty, but we save 3 cycles here. + +r_is_pea xba ; fast code for PEA + sep #$30 + pha + rep #$30 +odd_entry jmp $0100 ; unconditionally jump into the "next" instruction in the +; ; code field. This is OK, even if the entry point was the +; ; last instruction, because there is a JMP at the end of +; ; the code field, so the code will simply jump to that +; ; instruction directly. +; ; +; ; As with the original entry point, because all of the +; ; code field is page-aligned, only the low byte needs to +; ; be updated when the scroll position changes + +r_is_jmp sep #$41 ; Set the C and V flags which tells a snippet to push only the low byte +long_2 ldal entry_jmp+1-base +long_3 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 16 cycles, is there a better way?) + +; This is the spot that needs to be page-aligned. In addition to simplifying the entry address +; and only needing to update a byte instad of a word, because the code breaks out of the +; code field with a BRA instruction, we keep everything within a page to avoid the 1-cycle +; page-crossing penalty of the branch. + ds 204 +loop_exit_1 jmp odd_exit-base ; +0 Alternate exit point depending on whether the left edge is +loop_exit_2 jmp even_exit-base ; +3 odd-aligned + +loop lup 82 ; +6 Set up 82 PEA instructions, which is 328 pixels and consumes 246 bytes + pea $0000 ; This is 41 8x8 tiles in width. Need to have N+1 tiles for screen overlap + --^ +loop_back jmp loop-base ; +252 Ensure execution continues to loop around +loop_exit_3 jmp even_exit-base ; +255 + +odd_exit lda #0000 ; This operand field is *always* used to hold the original 2 bytes of the code field +; ; that are replaced by the needed BRA instruction to exit the code field. When the +; ; left edge is odd-aligned, we are able to immediately load the value and perform +; ; similar logic to the right_odd code path above + +left_odd bit #$000B + beq l_is_pea + + bit #$0040 + bne l_is_jmp + +long_4 stal *+4-base + dfb $00,$00 +l_is_pea xba + sep #$30 + pha + rep #$30 + bra even_exit +l_is_jmp sep #$01 ; Set the C flag (V is always cleared at this point) which tells a snippet to push only the high byte +long_5 ldal entry_jmp+1-base +long_6 stal *+5-base + dfb $4C,$00,$00 ; Jump back to address in entry_jmp (this takes 13 cycles, is there a better way?) + +even_exit jmp $1000 ; Jump to the next line. We set up the blitter to do 8 or 16 lines at a time +; ; before restoring the machine state and re-enabling interrupts. This makes +; ; the blitter interrupt friendly to allow things like music player to continue +; ; to function. +; +; ; When it's time to exit, the next_entry address points to an alternate exit point + +; These are the special code snippets -- there is a 1:1 relationship between each snippet space +; and a 3-byte entry in the code field. Thus, each snippet has a hard-coded JMP to return to +; the next code field location +; +; The snippet is required to handle the odd-alignment in-line; there is no facility for +; patching or intercepting these values due to their complexity. The only requirements +; are: +; +; 1. Carry Clear -> 16-bit write and return to the next code field operand +; 2. Carry Set +; a. Overflow set -> Low 8-bit write and return to the next code field operand +; b. Overflow clear -> High 8-bit write and exit the line +; c. Always clear the Carry flags. It's actually OK to leave the overflow bit in +; its passed state, because having the carry bit clear prevent evaluation of +; the V bit. +; +; Snippet Samples: +; +; Standard Two-level Mix (27 bytes) +; +; Optimal = 18 cycles (LDA/AND/ORA/PHA) +; 16-bit write = 23 cycles +; 8-bit low = 35 cycles +; 8-bit high = 36 cycles +; +; start lda (00),y +; and #MASK +; ora #DATA ; 14 cycles to load the data +; bcs 8_bit +; pha +; out jmp next ; Fast-path completes in 9 additional cycles + +; 8_bit sep #$30 ; Switch to 8 bit mode +; bvs r_edge ; Need to switch if doing the left edge +; xba +; r_edge pha ; push the value +; rep #$31 ; put back into 16-bit mode and clear the carry bit, as required +; bvs out ; jmp out and continue if this is the right edge +; jmp even_exit ; exit the line otherwise +; ; +; ; The slow paths have 21 and 22 cycles for the right and left +; ; odd-aligned cases respectively. + +; snippets ds 32*82 +top + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/font.s b/test/font.s new file mode 100644 index 0000000..de27ebb --- /dev/null +++ b/test/font.s @@ -0,0 +1,645 @@ +**************************************** +* FONT ENGINE (v3?) * +* * +* Dagen Brock * +* 2013-07-20 * +**************************************** +* A= ptr to string preceded by length * +* X= screen location * +**************************************** +; each char: +; draw char at loc +; update loc +; see if length hit - no? back to draw char + rel + mx %00 +]F_Length ds 2 ;length of string (only one byte currently used) +]F_CharIdx ds 2 ;index of current character +]F_CurrentPos ds 2 ;current top left char position +]F_StrPtr equ $00 ;pointer to string (including length byte) / DP +]F_StrClr equ $02 + +DrawString + sta ]F_StrPtr ;store at dp 0 ($00) for indirect loads + stx ]F_CurrentPos + sty ]F_StrClr + stz ]F_CharIdx + lda (]F_StrPtr) + and #$00ff ;strip off first char (len is only one byte) + sta ]F_Length ;get our length byte + +NextChar lda ]F_CharIdx + cmp ]F_Length + bne :notDone + rts ;DONE! Return to caller + +:notDone inc ]F_CharIdx + ldy ]F_CharIdx + lda ($00),y ;get next char! + and #$00FF ;mask high byte + sec + sbc #' ' ;our table starts with space ' ' + asl ;*2 + tay + ldx ]F_CurrentPos + jsr :drawChar + inc ]F_CurrentPos ;compare to addition time (?) + inc ]F_CurrentPos + inc ]F_CurrentPos + inc ]F_CurrentPos ;update screen pos (2 words=8 pixels) + bra NextChar + +;x = TopLeft screen pos +;y = char table offset +:drawChar lda FontTable,y ;get real address of char data + sec + sbc #FontData ;pivot offset - now a is offset of fontdata + tay ;so we'll index with that + + lda FontData,y + and ]F_StrClr + stal $E12000,x + + lda FontData+2,y + and ]F_StrClr + stal $E12000+2,x + + lda FontData+4,y + and ]F_StrClr + stal $E12000+160,x + + lda FontData+6,y + and ]F_StrClr + stal $E12000+160+2,x + + lda FontData+8,y + and ]F_StrClr + stal {$E12000+160*2},x + + lda FontData+10,y + and ]F_StrClr + stal {$E12000+160*2+2},x + + lda FontData+12,y + and ]F_StrClr + stal {$E12000+160*3},x + + lda FontData+14,y + and ]F_StrClr + stal {$E12000+160*3+2},x + + lda FontData+16,y + and ]F_StrClr + stal {$E12000+160*4},x + + lda FontData+18,y + and ]F_StrClr + stal {$E12000+160*4+2},x + + lda FontData+20,y + and ]F_StrClr + stal {$E12000+160*5},x + + lda FontData+22,y + and ]F_StrClr + stal {$E12000+160*5+2},x + rts + +FontTable dw s_Space + dw s_Exclaim + dw s_Quote + dw s_Number + dw s_Dollar + dw s_Percent + dw s_Amper + dw s_Single + dw s_OpenParen + dw s_CloseParen + dw s_Asterix + dw s_Plus + dw s_Comma + dw s_Minus + dw s_Period + dw s_Slash + dw s_N0 + dw s_N1 + dw s_N2 + dw s_N3 + dw s_N4 + dw s_N5 + dw s_N6 + dw s_N7 + dw s_N8 + dw s_N9 + dw s_Colon + dw s_Semi + dw s_LAngle + dw s_Equal + dw s_RAngle + dw s_Question + dw s_At + dw s_A + dw s_B + dw s_C + dw s_D + dw s_E + dw s_F + dw s_G + dw s_H + dw s_I + dw s_J + dw s_K + dw s_L + dw s_M + dw s_N + dw s_O + dw s_P + dw s_Q + dw s_R + dw s_S + dw s_T + dw s_U + dw s_V + dw s_W + dw s_X + dw s_Y + dw s_Z + dw s_LBracket + dw s_BackSlash + dw s_RBracket + dw s_Carot + dw s_UnderLine + +FontData = * +s_Space hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + +s_Exclaim hex 000FF000 + hex 000FF000 + hex 000FF000 + hex 000FF000 + hex 00000000 + hex 000FF000 + +s_Quote hex 0FF00FF0 + hex 00F000F0 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + +s_Number hex 00000000 + hex 00F00F00 + hex 0FFFFFF0 + hex 00F00F00 + hex 0FFFFFF0 + hex 00F00F00 + +s_Dollar hex 000F0F00 + hex 00FFFFF0 + hex 0F0F0F00 + hex 00FFFF00 + hex 000F0FF0 + hex 0FFFFF00 + +s_Percent hex 0FF000F0 + hex 00000F00 + hex 0000F000 + hex 000F0000 + hex 00F00000 + hex 0F000FF0 + +s_Amper hex 000FF000 + hex 00F00F00 + hex 0F00F000 + hex 00F000F0 + hex 0F0FFF00 + hex 00F0F000 + +s_Single hex 000FF000 + hex 0000F000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + +s_OpenParen hex 000FF000 + hex 00FF0000 + hex 0FF00000 + hex 0FF00000 + hex 00FF0000 + hex 000FF000 + +s_CloseParen hex 000FF000 + hex 0000FF00 + hex 00000FF0 + hex 00000FF0 + hex 0000FF00 + hex 000FF000 + + +s_Asterix hex 00000000 + hex 00F0F0F0 + hex 000FFF00 + hex 00FFFFF0 + hex 000FFF00 + hex 00F0F0F0 + +s_Plus hex 000F0000 + hex 000F0000 + hex 0FFFFF00 + hex 000F0000 + hex 000F0000 + hex 00000000 + +s_Comma hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 0000FF00 + hex 0000F000 + +s_Minus hex 00000000 + hex 00000000 + hex 0FFFFF00 + hex 00000000 + hex 00000000 + hex 00000000 + + +s_Period hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 0000FF00 + hex 0000FF00 + +s_Slash hex 000000F0 + hex 00000F00 + hex 0000F000 + hex 000F0000 + hex 00F00000 + hex 0F000000 + +s_N0 hex 00FFFF00 + hex 0F000FF0 + hex 0F00F0F0 + hex 0F0F00F0 + hex 0FF000F0 + hex 00FFFF00 + +s_N1 hex 000F0000 + hex 00FF0000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 00FFF000 + +s_N2 hex 00FFFF00 + hex 0F0000F0 + hex 00000F00 + hex 000FF000 + hex 00F00000 + hex 0FFFFFF0 + +s_N3 hex 00FFFF00 + hex 000000F0 + hex 000FFF00 + hex 000000F0 + hex 000000F0 + hex 00FFFF00 + +s_N4 hex 0000FF00 + hex 000F0F00 + hex 00F00F00 + hex 0FFFFFF0 + hex 00000F00 + hex 00000F00 + +s_N5 hex 0FFFFFF0 + hex 0F000000 + hex 0FFFFF00 + hex 000000F0 + hex 0F0000F0 + hex 00FFFF00 + +s_N6 hex 000FFF00 + hex 00F00000 + hex 0F000000 + hex 0FFFFF00 + hex 0F0000F0 + hex 00FFFFF0 + +s_N7 hex 0FFFFFF0 + hex 000000F0 + hex 00000F00 + hex 0000F000 + hex 000F0000 + hex 000F0000 + +s_N8 hex 00FFFF00 + hex 0F0000F0 + hex 00FFFF00 + hex 0F0000F0 + hex 0F0000F0 + hex 00FFFF00 + +s_N9 hex 00FFFF00 + hex 0F0000F0 + hex 00FFFF00 + hex 0000F000 + hex 000F0000 + hex 00F00000 + +s_Colon hex 000FF000 + hex 000FF000 + hex 00000000 + hex 000FF000 + hex 000FF000 + hex 00000000 + +s_Semi hex 00000000 + hex 000FF000 + hex 000FF000 + hex 00000000 + hex 000FF000 + hex 000F0000 + +s_LAngle hex 0000F000 + hex 000F0000 + hex 00F00000 + hex 000F0000 + hex 0000F000 + hex 00000000 + +s_Equal hex 00000000 + hex 00000000 + hex 0FFFFF00 + hex 00000000 + hex 0FFFFF00 + hex 00000000 + +s_RAngle hex 0000F000 + hex 00000F00 + hex 000000F0 + hex 00000F00 + hex 0000F000 + hex 00000000 + +s_Question hex 00FFF000 + hex 0F000F00 + hex 00000F00 + hex 000FF000 + hex 00000000 + hex 000FF000 + +s_At hex 00FFFF00 + hex 0F0000F0 + hex 0F00F0F0 + hex 0FFFF0F0 + hex 000000F0 + hex 0FFFFF00 + +s_A hex 000FF000 + hex 00F00F00 + hex 0F0000F0 + hex 0FFFFFF0 + hex 0F0000F0 + hex 0F0000F0 + +s_B hex 0FFFFF00 + hex 0F0000F0 + hex 0FFFFF00 + hex 0F0000F0 + hex 0F0000F0 + hex 0FFFFF00 + +s_C hex 00FFFFF0 + hex 0F000000 + hex 0F000000 + hex 0F000000 + hex 0F000000 + hex 00FFFFF0 + +s_D hex 0FFFFF00 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0FFFFF00 + +s_E hex 0FFFFFF0 + hex 0F000000 + hex 0FFFF000 + hex 0F000000 + hex 0F000000 + hex 0FFFFFF0 + +s_F hex 0FFFFFF0 + hex 0F000000 + hex 0FFFF000 + hex 0F000000 + hex 0F000000 + hex 0F000000 + +s_G hex 00FFFFF0 + hex 0F000000 + hex 0F000000 + hex 0F00FFF0 + hex 0F0000F0 + hex 00FFFF00 + +s_H hex 0F0000F0 + hex 0F0000F0 + hex 0FFFFFF0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + +s_I hex 0FFFFF00 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 0FFFFF00 + +s_J hex 000000F0 + hex 000000F0 + hex 000000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 00FFFF00 + +s_K hex 0F000F00 + hex 0F00F000 + hex 0FFF0000 + hex 0F00F000 + hex 0F000F00 + hex 0F000F00 + +s_L hex 0F000000 + hex 0F000000 + hex 0F000000 + hex 0F000000 + hex 0F000000 + hex 0FFFFFF0 + +s_M hex 0F0000F0 + hex 0FF00FF0 + hex 0F0FF0F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + +s_N hex 0F0000F0 + hex 0FF000F0 + hex 0F0F00F0 + hex 0F00F0F0 + hex 0F000FF0 + hex 0F0000F0 + +s_O hex 00FFFF00 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 00FFFF00 + +s_P hex 0FFFFF00 + hex 0F0000F0 + hex 0FFFFF00 + hex 0F000000 + hex 0F000000 + hex 0F000000 + +s_Q hex 00FFFF00 + hex 0F0000F0 + hex 0F0000F0 + hex 0F00F0F0 + hex 0F000FF0 + hex 00FFFFF0 + +s_R hex 0FFFFF00 + hex 0F0000F0 + hex 0FFFFF00 + hex 0F000F00 + hex 0F0000F0 + hex 0F0000F0 + +s_S hex 00FFFFF0 + hex 0F000000 + hex 00FFFF00 + hex 000000F0 + hex 000000F0 + hex 0FFFFF00 + +s_T hex 0FFFFF00 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + +s_U hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 00FFFF00 + +s_V hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 00F00F00 + hex 000FF000 + +s_W hex 0F0000F0 + hex 0F0000F0 + hex 0F0000F0 + hex 0F0FF0F0 + hex 0FF00FF0 + hex 0F0000F0 + +s_X hex 0F0000F0 + hex 00F00F00 + hex 000FF000 + hex 000FF000 + hex 00F00F00 + hex 0F0000F0 + +s_Y hex F00000F0 + hex 0F000F00 + hex 00F0F000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + +s_Z hex 0FFFFFF0 + hex 00000F00 + hex 0000F000 + hex 000F0000 + hex 00F00000 + hex 0FFFFFF0 + +s_LBracket hex 000FFF00 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 000F0000 + hex 000FFF00 + +s_BackSlash hex 0F000000 + hex 00F00000 + hex 000F0000 + hex 0000F000 + hex 00000F00 + hex 000000F0 + +s_RBracket hex 00FFF000 + hex 0000F000 + hex 0000F000 + hex 0000F000 + hex 0000F000 + hex 00FFF000 + +s_Carot hex 0000F000 + hex 000F0F00 + hex 00F000F0 + hex 00000000 + hex 00000000 + hex 00000000 + +s_UnderLine hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex FFFFFFF0 + +s_Template hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + hex 00000000 + + + + + + + + + + + + + + + +