mirror of
https://github.com/elliotnunn/mac-rom.git
synced 2025-01-01 11:29:27 +00:00
0ba83392d4
Resource forks are included only for .rsrc files. These are DeRezzed into their data fork. 'ckid' resources, from the Projector VCS, are not included. The Tools directory, containing mostly junk, is also excluded.
1340 lines
46 KiB
Plaintext
1340 lines
46 KiB
Plaintext
;
|
||
; File: ScriptMgrUtilText.a (formerly SMgrUtilText.a)
|
||
;
|
||
; Contains: Script Mgr text utilities
|
||
;
|
||
; Written by: MED Mark Davis
|
||
; DRS daan Strebe
|
||
; LDC Lee Collins
|
||
; PKE Peter Edberg
|
||
;
|
||
; Copyright: © 1987-1992 by Apple Computer, Inc., all rights reserved.
|
||
;
|
||
; This file is used in these builds: Mac32 System CubeE
|
||
;
|
||
; Change History (most recent first):
|
||
;
|
||
; <11> 11/17/92 PKE Add include of new IntlResourcesPriv.a for IntlTokenize equates.
|
||
; <10> 6/17/92 HA #1029756,<PKE>: Made sure that we stuff a dummy address in the
|
||
; ram based vector table for StyledLineBreak after removing it
|
||
; from ptch 4 and adding it to ptch 27.
|
||
; <9> 4/30/92 FM Remove obsolete conditionals:shrinkIntlTokenize, smgrUsesStdExit
|
||
; <8> 9/20/91 PKE Remove CubeE conditional, since the new plan (according to
|
||
; Darin) is that everything for Cube-E goes in without
|
||
; conditionals. While I'm at it, get rid of some ">=$604",
|
||
; ">=$605", and ">=$700" conditionals.
|
||
; <7> 9/15/91 PKE smb, #1011961: For Bruges/Cube-E, roll in from 6.1 - Fix
|
||
; trashing of d3 by GetFormatOrder.
|
||
;
|
||
; <6> 9/14/90 BG Removed <4>. 040s are now behaving more reliably.
|
||
; <5> 7/23/90 PKE Changed IntlTokenize (under control of shrinkIntlTokenize
|
||
; symbol) to use BSRs instead of macros with inline code. On
|
||
; Plus/SE/II, this saves 620 bytes but makes IntlTokenize 13%
|
||
; slower, so we may reverse this.
|
||
; <4> 7/17/90 BG Added EclipseNOPs for flakey 040s.
|
||
; <3> 4/10/90 PKE Deleted conditionalized definitions of forROM, SysVers,
|
||
; Scripts604, and TestScriptManager. Used smgrUseStdExit,
|
||
; smgrSysVers, and smgrROMVers symbols instead of buildLevel.
|
||
; <2> 1/4/90 PKE Updated conditionals to put SS-6.0.4 changes in 6.0.5 as well as
|
||
; 7.0. Updated header to BBS format.
|
||
; <1> 12/18/89 CCH Adding for the first time into BBS. Changed include 'inc.sum.a'
|
||
; to load 'StandardEqu.d'. Updated file name references.
|
||
;
|
||
; (BBS versions above, EASE versions below:)
|
||
; <2.2> 11/27/89 PKE Use bug fix in <1.8> for Scripts604 build too.
|
||
; <2.1> 8/26/89 PKE Cleaned up some conditionals, moved a comment.
|
||
; <2.0> 8/22/89 SES Removed references to nFiles.
|
||
; <1.9> 7/12/89 PKE NEEDED FOR AURORA: In StyledLineBreak, replace 'mulu #4' with
|
||
; 'lsl.w #2' for efficiency.
|
||
; <1.8> 6/27/89 PKE NEEDED FOR AURORA - Fix GetFormatOrder bug discovered by Sue:
|
||
; If firstFormat=lastFormat, it did a move.l (instead of .w) of
|
||
; firstFormat into the first word of the ordering array. This
|
||
; move was redundant anyway, so I just skipped it.
|
||
; <1.7> 6/23/89 PKE Skip definition of buildLevel (done in ScriptPriv.a).
|
||
; <1.6> 6/9/89 PKE Change StyledLineBreak to use input offset value as per
|
||
; documentation: if 0, returned offset >= 1 and char breaks
|
||
; possible; otherwise, returned offset >= 0 and char breaks not
|
||
; possible.
|
||
; <1.5> 5/30/89 PKE Fixed bug in StyledLineBreak for 2-byte characters.
|
||
; <1.4> 5/24/89 EMT Renamed alpha symbol to not conflict with development stage.
|
||
; <1.3> 2/21/89 PKE Replaced with RomProj version, which already had system and Rom
|
||
; sources merged.
|
||
; (EASE ROMproj history below:)
|
||
; <1.6> 2/21/89 PKE Fix up LOADs: always use include 'StandardEqu.d'.
|
||
; 2/21/89 pke Fix up includes: always use include 'inc.sum.a'.
|
||
; <1.5> 2/14/89 PKE Fixed bugs in LineBreak/FindCarriage that broke new TextEdit
|
||
; <1.4> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equate names
|
||
; <1.3> 2/3/89 PKE Merged with current system sources (CCH's 01/16/1989 merge of
|
||
; 6.0.3 and 7.0 sources, done in RES.sys:smgr)
|
||
; 2/2/89 pke Merged system sources and ROM sources
|
||
; <1.2> 11/14/88 PKE Synchronize EASE and Projector
|
||
; <1.1> 11/11/88 CCH Fixed Header.
|
||
; <1.0> 11/9/88 CCH Adding to EASE.
|
||
; (old EASE ROMproj history below:)
|
||
; <1.7> 10/27/88 LDC Mods by Carl Hewitt and Brian McGhie in the process of putting
|
||
; Script Manager in ROM
|
||
; (EASE SYSproj history below:)
|
||
; <1.2> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equates
|
||
; <1.1> 1/16/89 CCH Merged 6.0.3 final sources into 7.0.
|
||
; <1.0> 11/16/88 CCH Added to EASE.
|
||
; (pre-EASE history; recent changes here went into EASE ROMproj, not SYSproj:)
|
||
; 9/16/88 ldc fixed StyledLineBreak to get correct itlb values
|
||
; 6/16/88 ldc added StdUnlink
|
||
; 6/14/88 ldc added check for valid token value [0..tokenNil)
|
||
; 6/13/88 ldc removed delimMap for move to ROM
|
||
; changes above here are for ROM or buildLevel >= 2
|
||
; 2/10/88 med Changed GetFormatOrder pascal interface
|
||
; 2/10/88 med LineRight parameter is high byte from pascal
|
||
; 2/9/88 med Change pascal interface
|
||
; 1/7/88 med itlResource loaded if purged; minor changes in Tokenizer
|
||
; 1/4/88 med itlResource in Tokenizer is now handle
|
||
; 11/20/87 med Changed CheckSelector
|
||
; 11/16/87 med Minor cleanup of IntlTokenize
|
||
; 10/28/87 med Added IntlTokenize Routine
|
||
; 10/26/87 med Added StyledLineBreak algorithm, FormatOrder
|
||
;___________________________________________________________________________________________________
|
||
; To Do:
|
||
;
|
||
;___________________________________________________________________________________________________
|
||
|
||
debugging equ 0
|
||
|
||
BLANKS ON
|
||
STRING ASIS
|
||
|
||
load 'StandardEqu.d'
|
||
include 'ScriptPriv.a'
|
||
include 'IntlResourcesPriv.a' ; <11>
|
||
|
||
|
||
export FormatOrder,IntlTokenize ; smSelStyledLineBreak
|
||
|
||
;____________________________________________________________
|
||
;
|
||
; StyledLineBreak Algorithm
|
||
;
|
||
; by Mark E. Davis
|
||
;
|
||
; Modification History
|
||
;
|
||
; 7/25/87 med Finished first version of line-break
|
||
; 7/28/87 med Pretty final version for AIS, needs truncation fixing
|
||
; 7/29/87 med Added check for CR
|
||
; 7/31/87 med CR cannot be part of white-space
|
||
; 7/31/87 med Add check for old SISs with bad word-break table
|
||
;<8/2/87med> Use direct calls on script routines
|
||
;<8/2/87med> Moved CheckFont
|
||
;<8/4/87med> Use blockflags instead of sisReverse
|
||
;____________________________________________________________
|
||
|
||
;____________________________________________________________
|
||
; Routine StyledLineBreak (
|
||
; textPtr: Ptr;
|
||
; textLen: Longint; {must be < 32K ;<2/14/89 pke>}
|
||
; textStart: Longint;
|
||
; textEnd: Longint;
|
||
; flags: Longint;
|
||
; var textWidth: Fixed; {on exit, set if too long}
|
||
; var textOffset: Longint;
|
||
; ): LineCode;
|
||
;____________________________________________________________
|
||
|
||
IF 0 THEN
|
||
|
||
LineBreakFrame record {oldA6},decrement
|
||
resultSize equ 2
|
||
result ds.w 1
|
||
argSize equ *-8
|
||
textPtr ds.l 1
|
||
textLen ds.l 1
|
||
textStart ds.l 1
|
||
textEnd ds.l 1
|
||
flags ds.l 1
|
||
textWidth ds.l 1
|
||
textOffset ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
offsets ds.l 3
|
||
leftSide ds.b 1
|
||
savedReverse ds.b 1
|
||
lineDirection ds.w 1
|
||
oldBlockFlags ds.w 1 ; <8/4/87med>
|
||
localFrame equ *
|
||
endR
|
||
|
||
FixMinusOne equ $FFFF0000
|
||
|
||
LineBreakRegs reg a2-a3/d4-d6
|
||
;____________________________________________________________
|
||
|
||
IF 0 THEN
|
||
; StyledLineBreak Equates
|
||
|
||
smcTypeMask equ $000F
|
||
smcReserved equ $00F0
|
||
smcClassMask equ $0F00
|
||
smcReserved12 equ $1000
|
||
smcRightMask equ $2000
|
||
smcUpperMask equ $4000
|
||
smcDoubleMask equ $8000
|
||
|
||
; CharType character types.
|
||
|
||
smCharPunct equ 0
|
||
smCharAscii equ 1
|
||
smCharEuro equ 7
|
||
|
||
; CharType punctuation types.
|
||
|
||
smPunctNormal equ $0000
|
||
smPunctNumber equ $0100
|
||
smPunctSymbol equ $0200
|
||
smPunctBlank equ $0300 ; NBSP is PunctNormal
|
||
ENDIF
|
||
;____________________________________________________________
|
||
|
||
StyledLineBreak proc
|
||
|
||
import FindCarriage
|
||
import StdUnlink
|
||
|
||
; setup
|
||
with LineBreakFrame,smgrRecord,scriptRecord, ItlbRecord
|
||
CheckSelector
|
||
link a6,#localFrame
|
||
movem.l LineBreakRegs,-(sp)
|
||
|
||
; common args
|
||
move.l textWidth(a6),a3 ; get @width
|
||
move.l textPtr(a6),d5 ; pointer
|
||
add.l textStart(a6),d5 ; pointer+start
|
||
|
||
; save environment and reset
|
||
;!!! generalize for left-right
|
||
move.w teSysJust,lineDirection(a6) ; save just
|
||
move.w #0,teSysJust ; set to LR
|
||
|
||
; get current script
|
||
move.w #0,-(sp) ; allocate _FontScript return
|
||
_FontScript ; get current script
|
||
move.w (sp)+,d0 ; current script
|
||
|
||
; get core globals and test: inline instead of trap, for speed
|
||
|
||
lsl.w #2,d0 ; make script code a longword offset <1.9>
|
||
GetSmgrCore a2 ; smgr core globals
|
||
move.l smgrRecord.smgrEntry(a2,d0),a2 ; core location
|
||
|
||
;<9/16/88ldc> changed following two lines to access correct itlb values <06/09/89 pke><3>
|
||
move.b ScriptRecord.scriptBundle.itlbFlags(a2),oldBlockFlags(a6) ; <9/16/88ldc>
|
||
bclr.b #smsfReverse,ScriptRecord.scriptBundle.itlbFlags(a2) ; set left-right <8/4/87med> ; <9/16/88ldc>
|
||
|
||
; quick check on length
|
||
move.l textEnd(a6),d4 ; get length
|
||
sub.l textStart(a6),d4 ; segment length
|
||
ble @TooLong ; if zero, bail
|
||
tst.l (a3) ; no width (degenerate)
|
||
; changed this so we handle blanks at start of 0-width line. <06/09/89 pke><3>
|
||
bmi @FixNoChars ; fix neg width
|
||
|
||
; get offset
|
||
;!!! fix long length later
|
||
move.w #0,-(sp) ; return space
|
||
move.l d5,-(sp) ; pointer
|
||
move.w d4,-(sp) ; length (mod)
|
||
move.w #0,-(sp) ; slop (truncate)
|
||
move.w (a3),-(sp) ; width (truncate)
|
||
pea leftSide(a6) ; @leftSide
|
||
_Pixel2Char ; use trap
|
||
move.l #0,d6 ; longize
|
||
move.w (sp)+,d6 ; get offset
|
||
|
||
; if width is zero, fix leftSide for FindWord etc. (change for 604) <06/09/89 pke><3>
|
||
tst.l (a3) ; zero width?
|
||
bne.s @doneFixLeftSide ;
|
||
st leftSide(a6) ; if so, fix leftSide
|
||
@doneFixLeftSide
|
||
|
||
; Check for CR <7/29/87med>
|
||
|
||
move.l d5,a0 ; text ptr
|
||
move.w d6,d0 ; copy of non-offsetted length
|
||
bsr FindCarriage ; a0,d0.w: return d0.b=t/f, length in d1.w
|
||
add.w textStart+2(a6),d1 ; need offset from textPtr <added 2/14/89 pke>
|
||
tst.b d0 ; found?
|
||
bne @ReturnWord ; yes, return
|
||
|
||
; check for position being too long
|
||
|
||
move.l d6,d0 ; copy un-offsetted length
|
||
add.l textStart(a6),d6 ; get char offset
|
||
|
||
; if offset > length, return width of text
|
||
|
||
cmp.l d4,d0 ; position at end?
|
||
blt.s @GotCharPos ; no, skip
|
||
tst.b leftSide(a6) ; leftSide?
|
||
bne.s @TooLong ; yes, too long
|
||
@GotCharPos
|
||
|
||
; get word around char offset
|
||
|
||
move.l textPtr(a6),-(sp) ; ptr
|
||
move.w textLen+2(a6),-(sp) ; length (mod)
|
||
move.w d6,-(sp) ; offset (truncate)
|
||
move.b leftSide(a6),-(sp) ; leftSide
|
||
move.l MinusOne,-(sp) ; line break table
|
||
pea offsets(a6) ; offset table
|
||
_FindWord ; use trap
|
||
|
||
; test character to see if it is white-space
|
||
; Mark suggests checking to see if VisibleLength would work better hereÉ <06/09/89 pke>
|
||
|
||
move.w #0,-(sp) ; allocate return
|
||
move.l textPtr(a6),-(sp) ; ptr
|
||
move.w offsets(a6),-(sp) ; offset
|
||
_CharType ; use trap
|
||
move.w (sp)+,d0 ; get type
|
||
move.l #0,d1 ; longize
|
||
move.w offsets(a6),d1 ; assume second offset ***
|
||
and.w #smcTypeMask+smcClassMask,d0 ; type and class
|
||
cmp.w #smCharPunct+smPunctBlank,d0 ; blank?
|
||
bne.s @ReturnOffset ; no, continue
|
||
btst.b #smsfReverse,oldBlockFlags(a6) ; test reverse block flag <8/4/87med>
|
||
sne d0 ; 0/-1 left/right <8/4/87med>
|
||
cmp.b LineDirection+1(a6),d0 ; line = char? (bottom byte of line) <8/4/87med>
|
||
bne.s @ReturnOffset ; yes, skip to end
|
||
|
||
; here, word is whitespace and direction is ok (script direction = line direction),
|
||
; so return second offset.
|
||
|
||
; add check for CR for old SISs with bad word-break table. <7/31/87med>
|
||
; All new SISs should be fixed, so we should be able to skip the FindCarriage
|
||
; stuff here and just set d1 to offsets+2(a6). <06/09/89 pke>
|
||
|
||
move.l textPtr(a6),a0 ; get ptr
|
||
|
||
moveq #0,d6 ; make long <2/14/89 pke><3>
|
||
move.w offsets(a6),d6 ; get first offset
|
||
add.l d6,a0 ; now at first offset <2/14/89 pke><3>
|
||
|
||
move.w offsets+2(a6),d0 ; take second offset
|
||
sub.w d6,d0 ; now length
|
||
bsr FindCarriage ; find in d1
|
||
add.w d6,d1 ; d1 either after cr or at end
|
||
bra.s @ReturnWord ; return
|
||
|
||
@TooLong
|
||
; return width
|
||
move.w #0,-(sp) ; return space
|
||
move.l d5,-(sp) ; pointer
|
||
move.w #0,-(sp) ; firstByte
|
||
move.w d4,-(sp) ; length
|
||
_TextWidth
|
||
move.w (sp)+,d0 ; pop width
|
||
swap d0 ; swap for long
|
||
clr.w d0 ; longize
|
||
; move.l (a5),a0 ; graf globals
|
||
; move.l fontAdj(a0),d0 ; get fixed width
|
||
sub.l d0,(a3) ; final width
|
||
|
||
move.l textEnd(a6),d6 ; set return
|
||
move.l #smBreakOverflow,d0 ; set type
|
||
bra.s @Done ; return
|
||
|
||
@ReturnOffset
|
||
tst.l d1 ; break at start?
|
||
bne.s @ReturnWord ; yes, return char
|
||
|
||
move.l textOffset(a6),a0 ; get @textOffset <06/09/89 pke><3>
|
||
tst.l (a0) ; was offset 0 at entry? <06/09/89 pke><3>
|
||
beq.s @ReturnWord ; if so, go return zero offset <06/09/89 pke><3>
|
||
|
||
; have to break word, so backup until length is less.
|
||
|
||
; The most we have to backup is one char, and we only have to do that if <06/09/89 pke><3>
|
||
; leftSide is false <06/09/89 pke>. We also need to catch the case of d6=0.
|
||
|
||
tst.b leftSide(a6) ; leftSide?
|
||
bne.s @checkNullOffset ; if T, skip decrement
|
||
sub.l #1,d6 ; decrement
|
||
clr.w -(sp) ; allocate return
|
||
move.l d5,-(sp) ; pass text ptr
|
||
move.l d6,d0 ; cur position
|
||
sub.l textStart(a6),d0 ; start of real text
|
||
move.w d0,-(sp) ; count (mod!!!)
|
||
_CharByte
|
||
tst.w (sp)+ ; return 0=single,-1=first,1=last
|
||
ble.s @checkNullOffset ; bail if -1 or 0, ok
|
||
sub.l #1,d6 ; fix for 2-byte chars
|
||
@checkNullOffset:
|
||
tst.l d6 ; are we still >0?
|
||
bgt.s @ReturnChar ; if so, go return a char
|
||
bra.s @FixNoChars1 ; don't redo test for orig offset 0
|
||
|
||
@FixNoChars
|
||
move.l textOffset(a6),a0 ; get @textOffset <06/09/89 pke><4>
|
||
tst.l (a0) ; was offset 0 at entry? <06/09/89 pke><4>
|
||
bne.s @FixNoChars1 ; if not, return 1 char <06/09/89 pke><4>
|
||
moveq #0,d1 ; if so, set zero offset <06/09/89 pke><4>
|
||
bra.s @ReturnWord ; and go return word break <06/09/89 pke><4>
|
||
@FixNoChars1
|
||
|
||
; string is now zero length, make it at least 1 char
|
||
|
||
move.l #1,d6 ; include first char
|
||
|
||
clr.w -(sp) ; allocate return <05/30/89 pke><3>
|
||
|
||
move.l d5,-(sp) ; pass text ptr
|
||
move.w #1,-(sp) ; offset
|
||
_CharByte
|
||
|
||
tst.w (sp)+ ; return 0=single,-1=first,1=last <05/30/89 pke><3>
|
||
|
||
ble.s @ReturnChar ; bail if -1 or 0, ok
|
||
add.l #1,d6 ; fix for 2-byte chars
|
||
|
||
@ReturnChar
|
||
move.l #smBreakChar,d0 ; so, word too long.
|
||
bra.s @Done ; exit
|
||
|
||
@ReturnWord
|
||
moveq #0,d6 ; make long <2/14/89 pke><3>
|
||
move.w d1,d6 ; set to word bound <2/14/89 pke><3>
|
||
|
||
move.l #smBreakWord,d0 ; break at word
|
||
|
||
@Done
|
||
move.l textOffset(a6),a0 ; get @textOffset
|
||
move.l d6,(a0) ; set to char position
|
||
move.b d0,result(a6) ; set result type
|
||
|
||
; restore environment
|
||
; <9/16/88ldc> fixed following line
|
||
|
||
move.b oldBlockFlags(a6),ScriptRecord.scriptBundle.itlbFlags(a2) ; <9/16/88ldc> use for 6.0.4 too <06/09/89 pke><3>
|
||
|
||
move.w lineDirection(a6),teSysJust ; restore just
|
||
|
||
; cleanup
|
||
movem.l (sp)+,LineBreakRegs
|
||
CheckA6
|
||
|
||
move #argSize, d0 ; for StdUnlink <6/16/88ldc>
|
||
bra StdUnlink ; standard exit <6/16/88ldc>
|
||
endWith
|
||
endProc
|
||
|
||
ENDIF
|
||
|
||
|
||
;____________________________________________________________
|
||
; PROCEDURE FormatOrder (
|
||
; ordering : FormatOrderPtr;
|
||
; firstFormat : Integer;
|
||
; lastFormat : Integer;
|
||
; lineRight : Boolean;
|
||
; FUNCTION DirProc (theFormat : Integer; dirParam : Ptr) : Boolean;
|
||
; dirParam : Ptr
|
||
; );
|
||
;____________________________________________________________
|
||
|
||
FormatOrderFrame record {oldA6},decrement
|
||
resultSize equ 0
|
||
argSize equ *-8
|
||
ordering ds.l 1
|
||
firstFormat ds.w 1
|
||
lastFormat ds.w 1
|
||
filler ds.b 1
|
||
lineRight ds.b 1 ; high byte from pascal <2/10/88med>
|
||
DirProc ds.l 1
|
||
;;filler2 ds.l 1 ; why does pascal want this? Changed pascal interface <2/9/88med>
|
||
dirParam ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
|
||
localFrame equ *
|
||
endR
|
||
|
||
FormatOrderRegs reg a2-a3/d3-d6 ; <7>
|
||
;____________________________________________________________
|
||
|
||
FormatOrder proc
|
||
|
||
import StdUnlink
|
||
|
||
with FormatOrderFrame
|
||
CheckSelector
|
||
|
||
link a6,#localFrame
|
||
movem.l FormatOrderRegs,-(sp)
|
||
|
||
; consistency check
|
||
|
||
move.l ordering(a6),a2 ; get ordering
|
||
move.w firstFormat(a6),d3 ; get start
|
||
move.w lastFormat(a6),d4 ; get end
|
||
cmp.w d3,d4 ; same?
|
||
|
||
ble.s @Exit ; lastFormat ² firstFormat, leave array alone. <1.8><2.2><3>
|
||
|
||
@FillArray
|
||
; {fill the array with initial values, going in lineDirection order}
|
||
|
||
move.l DirProc(a6),a3 ; get dirProc
|
||
move.l #2,d5 ; set pointer increment
|
||
move.b lineRight(a6),d6 ; right-left?
|
||
beq.s @InvertLoop ; no, continue
|
||
move.w d4,d0 ; get last element
|
||
sub.w d3,d0 ; last-first
|
||
add.w d0,d0 ; for word array
|
||
add.w d0,a2 ; last element
|
||
neg.w d5 ; invert pointer increment
|
||
|
||
@InvertLoop
|
||
clr.b -(sp) ; allocate return
|
||
move.w d3,-(sp) ; pass i
|
||
move.l dirParam(a6),-(sp) ; pass dirParam
|
||
jsr (a3) ; call it
|
||
move.w d3,d1 ; set temp
|
||
cmp.b (sp)+,d6 ; get result: same as lineRight?
|
||
beq.s @1 ; yes, skip
|
||
not.w d1 ; invert
|
||
@1
|
||
move.w d1,(a2) ; set value
|
||
add.w d5,a2 ; increment array pointer
|
||
add.w #1,d3 ; i := i+1
|
||
cmp.w d3,d4 ; same?
|
||
bge.s @InvertLoop ; no, keep looping
|
||
|
||
; {walk through the array, reversing the odd direction clusters (inverted)}
|
||
@WalkArray
|
||
sub.w firstFormat(a6),d4 ; now # elements + 1
|
||
move.l #-1,d5 ; mark as no first backwards element
|
||
clr.w d3 ; i := 0
|
||
move.l ordering(a6),a2 ; get ordering
|
||
move.l a2,a3 ; for increment
|
||
@RevLoop
|
||
tst.w (a3)+ ; get value
|
||
bge.s @BadOrder ; correct order
|
||
tst.w d5 ; have first backwards element?
|
||
bge.s @RevContinue ; yes, continue
|
||
move.w d3,d5 ; mark first backwards element
|
||
bra.s @RevContinue ; continue
|
||
@BadOrder
|
||
tst.w d5 ; have first backwards element?
|
||
blt.s @RevContinue ; no, continue
|
||
bsr.s ReverseCluster ; ReverseCluster(j, i - 1)
|
||
move.l #-1,d5 ; mark no first backwards element.
|
||
@RevContinue
|
||
add.w #1,d3 ; inc
|
||
cmp.w d3,d4 ; at end?
|
||
bge.s @RevLoop ; no, continue
|
||
|
||
tst.w d5 ; got one?
|
||
blt.s @Exit ; no, continue
|
||
bsr.s ReverseCluster ; ReverseCluster(j, limit);
|
||
|
||
@Exit
|
||
; cleanup
|
||
movem.l (sp)+,FormatOrderRegs
|
||
CheckA6
|
||
|
||
move #argSize, d0 ; for StdUnlink <6/16/88ldc>
|
||
bra StdUnlink ; standard exit <6/16/88ldc>
|
||
|
||
|
||
;____________________________________________________________
|
||
; PROCEDURE ReverseCluster (first, last : Integer);
|
||
; e.g. d5=3, d3 = 6 should reverse 3,4,5
|
||
;____________________________________________________________
|
||
ReverseCluster
|
||
|
||
move.w d3,d2 ; get last + 1
|
||
sub.w d5,d2 ; diff = first - last
|
||
sub.w #1,d2 ; sum -1
|
||
asr.w #1,d2 ; middle = (sum-1)/2 (6,3 =>
|
||
move.w d5,d0 ; get j
|
||
add.w d0,d0 ; double for word address
|
||
lea 0(a2,d0.w),a0 ; end address
|
||
move.w d3,d0 ; get i
|
||
add.w d0,d0 ; double for word address
|
||
lea 0(a2,d0.w),a1 ; end address (after end of element)
|
||
; e.g. for 3,4 items, dbra = 1 (two iterations);
|
||
|
||
@RevClusLoop ; reverse outer elements
|
||
move.w (a0),d0 ; temp = ordering^[i]
|
||
not.w d0 ; restore it
|
||
move.w -(a1),d1 ; temp2 = ordering^[j]
|
||
not.w d1 ; restore it
|
||
move.w d0,(a1) ; mix
|
||
move.w d1,(a0)+ ; ditto
|
||
dbra d2,@RevClusLoop ; til done
|
||
|
||
; exit
|
||
rts
|
||
endProc
|
||
;____________________________________________________________
|
||
; Routine IntlTokenize
|
||
;____________________________________________________________
|
||
|
||
;The tokenizer's job is to lexically analyze a block of characters
|
||
;(irrespective of script) and return a block of standardized tokens. The
|
||
;character block is interpreted as an expression from any normal computer
|
||
;language.
|
||
;The tokenizer lexes the following atoms:
|
||
; Unknown symbol (a character outside the tokenizer's vocabulary)
|
||
; White space (consecutive white space characters generate one token)
|
||
; Alphabetic (sequence of textual characters (and, optionally, digits))
|
||
; Numeric (sequence of digits)
|
||
; Alternate numeric (digits from foreign scripts)
|
||
; Real numeric (digits, decimal, [digits])
|
||
; Newline
|
||
; Comment (delimiters chosen by application)
|
||
; Literal (with delimiters chosen by the application)
|
||
; Escape character (disable special meaning of following character)
|
||
; Symbols (non-alphanumeric special symbols)
|
||
; End of Parse (source string ran out)
|
||
;--------------------------------------------------------------------------
|
||
;The following symbols are unique tokens:
|
||
; ( ) [ ] { } Ç È + - * Ö ± / \ > <
|
||
; = <= ² >= ³ == := <> != ! ~ , . Ò Ó Ô
|
||
; Õ " ' ; % ^ _ & @ | ? ¹ Ã · º µ ¸
|
||
; ° : # $
|
||
;--------------------------------------------------------------------------
|
||
;The tokenizer is a function:
|
||
; FUNCTION IntlTokenize( tokenParam: tokenBlock ): tokenResult;
|
||
;where
|
||
; tokenResult = (tokenOK, tokenOverflow, stringOverflow, badDelim, unknown);
|
||
; tokenBlock = ^paramBlock;
|
||
; paramBlock = RECORD
|
||
; source: Ptr; (*pointer to stream of characters*)
|
||
; sourceLength: LongInt; (*length of source stream*)
|
||
; tokenList: Ptr; (*pointer to array of tokens*)
|
||
; tokenLength: LongInt; (*maximum length of TokenList*)
|
||
; tokenCount: LongInt; (*number of tokens generated by tokenizer*)
|
||
; stringList: Ptr; (*pointer to stream of identifiers*)
|
||
; stringLength: LongInt; (*length of string list*)
|
||
; stringCount: LongInt; (*number of bytes currently used*)
|
||
; doString: Boolean; (*make strings & put into StringLIst*)
|
||
; doAppend: Boolean; (*append to TokenList rather than replace*)
|
||
; doAlphanumeric: Boolean;(*identifiers may include numeric*)
|
||
; leftDelims, rightDelims: ARRAY[0..1] OF tokenType;
|
||
; leftComment, rightComment: ARRAY[0..3] OF tokenType;
|
||
; escapeCode: tokenType;
|
||
; decimalCode: tokenType; (*what symbol to use for decimal mark*)
|
||
; END;
|
||
; TokenRec = RECORD
|
||
; theToken: tokenType;
|
||
; position: Ptr; (*pointer into original Source*)
|
||
; length: LongInt; (*length of text in original source*)
|
||
; stringPosition: Ptr; (*Pascal/C string copy of identifier*)
|
||
; END;
|
||
; tokenType : Integer; (*defined below*)
|
||
; (* The only value in tokenBlock directly modified by the tokenizer is
|
||
; the tokenCount field. The tokenizer modifies the memory pointed to
|
||
; by tokenList and stringList. stringList is a sequence of null-
|
||
; terminated, even-boundaried Pascal strings that is generated if
|
||
; doString is true but ignored otherwise; likewise, the
|
||
; stringPosition fields of the TokenRec records are ignored if doString
|
||
; is false. *)
|
||
; (* The Left and right delimiters are encoded as tokenType; two may be
|
||
; specified. If only one is needed, the second must be 127.
|
||
; These delimit literal strings. The position of a delimiter in the
|
||
; rightDelims array must match the position of its counterpart in
|
||
; the leftDelims array. As an example, the pairs ÒÓ and ÔÕ are [47,49]
|
||
; and [48,50] respectively, where the codes are given below. *)
|
||
; (* The leftComment and rightComment are also encoded as tokenType, but
|
||
; unlike delimiters, two tokens must be specified per comment symbol
|
||
; (postions 0 and 1 in the array are one comment symbol).
|
||
; If a single token only is needed then the second must be -1. One
|
||
; or two comment symbols are available; if only one is used then the
|
||
; second comment's tokens must both be 127. The pairs (* *) are
|
||
; [16, 26, 127, 127], [26, 17, 127, 127] *)
|
||
; (* The escape character is significant only when followed by a right
|
||
; delimiter within a literal. It nullifies the special meaning of the
|
||
; delimiter. If there is no escape character, use 127 *)
|
||
; (* The decimal symbol is has special meaning only when flanked or
|
||
; preceded by numeric or AltNumeric tokens. If RealNumber parsing is to
|
||
; be disabled, decimalCode should be 127 *)
|
||
;--------------------------------------------------------------------------
|
||
;Ordinal values of tokenType are:
|
||
;-1 EndParse
|
||
;0 Unknown 1 WhiteSpace 2 LeftLitDelim 3 RightLitDelim
|
||
;4 Alpha 5 Numeric 6 NewLine 7 LeftCommentDelim
|
||
;8 RightCommentDelim 9 literal 10 Escape
|
||
;11 Alternate numeric 12 RealNumber 13 AltRealNumber
|
||
;16 ( 17 ) 18 [ 19 ] 20 { 21 } 22 Ç 23 È 24 +
|
||
;25 - 26 * 27 Ö 28 ± 29 / 30 \ 31 < 32 > 33 =
|
||
;34 <= 35 ² 36 >= 37 ³ 38 == 39 := 40 41 <> 42 !=
|
||
;43 ! 44 ~ 45 , 46 . 47 Ò 48 Ó 49 Ô 50 Õ 51 "
|
||
;52 ' 53 ; 54 % 55 ^ 56 _ 57 & 58 @ 59 | 60 ?
|
||
;61 ¹ 62 Ã 63 · 64 º 65 µ 66 ¸ 67 ° 68 : 69 #
|
||
;70 $
|
||
;71 non-breaking space
|
||
;72 fraction
|
||
;73 intlCurrency
|
||
;74 leftSingGuillemet
|
||
;75 rightSingGuillemet
|
||
;76 perThousand
|
||
;77 ellipsis
|
||
;78 centerDot
|
||
;127 <nil>
|
||
;--------------------------------------------------------------------------
|
||
;--------------------------------------------------------------------------
|
||
;---------------------------------------------------------------------------
|
||
; IMPLEMENTATION NOTES
|
||
;
|
||
; This header file should be included in any version of tokenizer that is
|
||
;created. In the local version's file, this header file should be "included"
|
||
;as the first significant text. The local version must then implement a
|
||
;BaseSym macro after the include. The characteristics of the macro are:
|
||
; a) the first line is movea.l srcA,lastA
|
||
; b) the number of bytes in the character must be subtracted from srcL
|
||
; c) the symbol class of the character must be placed in the low word
|
||
; of csym. The upper word must remain undisturbed.
|
||
; d) after BaseSym, srcA must point to the byte following the last byte
|
||
; of the character just retrieved.
|
||
; e) if the available bytes run out before the character does, BaseSym
|
||
; must clean up its own portion of the stack (if any), place the
|
||
; constant #unexpectedEndER into temp, and branch to cleanUp.
|
||
; f) no registers except temp and the low word of csym may be modified.
|
||
;
|
||
; Following the BaseSym macro, the tokenBase.a file must be included. This
|
||
;file contains all state transition information.
|
||
; Finally, StringCopy must be implemented by the local version. String-
|
||
;copy's characteristics are:
|
||
; a) it is a subroutine.
|
||
; b) it may freely use any registers it pleases.
|
||
; c) it creates even-boundaried, null-terminated Pascal strings corres-
|
||
; ponding to those that each token's record points to.
|
||
; d) assuming that the return address from StringCopy is at the top of the
|
||
; stack, then the next word down is the current error condition. The
|
||
; following longword is the address of the effective token list, which
|
||
; is different from the pBlok tokenList address when doAppend is true.
|
||
; In other words, StringCopy must use the stack value rather than the
|
||
; parameter block value to determine the start of the token list.
|
||
; e) it must transliterate altNum and altReal tokens into ascii real
|
||
; strings and put the transliterated numbers, rather than the original
|
||
; numbers, into the Pascal strings. The same goes for roman (and only
|
||
; roman) letters. Additionally, the decimal character should be changed
|
||
; to a canonical '.'. All other text and symbols are to be copied
|
||
; verbatim.
|
||
; f) if doAppend is true, then it must compute the effective address of
|
||
; stringList by adding stringCount to it.
|
||
; g) it must compute the total amount of space used by strings by sub-
|
||
; tracting stringList(pBlok) from the next legal string address
|
||
; following the last null character of the last Pascal string, and
|
||
; place that computed value into stringCount(pBlok).
|
||
; h) it must detect overflow of the string space. If an overflow occurs,
|
||
; it must place #stringOverflow in the error condition space on the stack
|
||
; and return immediately.
|
||
; i) it must reverse the order of digits for the zany countries that use
|
||
; reversed digit sequences -- Arabic and Hebrew. In real numbers, the
|
||
; decimal point should be considered part of the string of digits and
|
||
; reversed right along with them.
|
||
; The local version contains its own mapping tables to decide the class
|
||
;of a character.
|
||
;---------------------------------------------------------------------------
|
||
;---------------------------------------------------------------------------
|
||
|
||
TokenFrame record {oldA6},decrement
|
||
resultSize equ 2
|
||
result ds.w 1
|
||
argSize equ *-8
|
||
paramSpace ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
localFrame EQU *
|
||
ENDR
|
||
|
||
;---
|
||
; put these here for reference; defined in IntlResourcesPriv.a
|
||
IF 0 THEN
|
||
srcA EQU a0
|
||
srcL EQU d0
|
||
toks EQU a1
|
||
tokL EQU d1
|
||
holdA EQU a2
|
||
cSym EQU d2
|
||
lastA EQU a3
|
||
ld EQU d3
|
||
map EQU a4
|
||
lc1 EQU d4
|
||
pBlok EQU a5
|
||
lc2 EQU d5
|
||
temp EQU d6
|
||
dTab EQU a6
|
||
temp1 EQU d7
|
||
|
||
rd EQU ld
|
||
escRD EQU lc1
|
||
rc EQU ld
|
||
ENDIF
|
||
;---
|
||
|
||
EXPORT IntlTokenize
|
||
|
||
|
||
IntlTokenize func
|
||
WITH TokenRec,tokenBlock,TokenFrame,TokenResults,itl4Rec
|
||
CheckSelector
|
||
|
||
MACRO
|
||
BaseSym
|
||
bsr BaseSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO ;get before swap
|
||
GetSym
|
||
bsr GetSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO ;swap before get
|
||
NextSym
|
||
bsr NextSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO
|
||
DoToken &TokenRec
|
||
move.w &TokenRec,temp ; <5>
|
||
bsr DoTokenRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO
|
||
makeSym &symNum
|
||
move.w &symNum,-(sp) ; <5>
|
||
bra makeSymRoutine ; <5>
|
||
ENDM
|
||
|
||
|
||
MACRO
|
||
setDel ® ;set the bit position in the delimMap
|
||
|
||
;<6/14/88ldc> move byte, not word <3>
|
||
clr.w temp
|
||
move.b ®,temp ;compute delimiter map position
|
||
|
||
;<6/14/88ldc> skip -1, -2 <3>
|
||
bmi.s @999 ;skip if temp < 0
|
||
asr.w #3,temp ;offset address
|
||
bset ®,(dTab,temp) ;set that puppy
|
||
@999
|
||
ENDM
|
||
|
||
entry
|
||
link a6,#0
|
||
movem.l d2-d7/a2-a6,-(sp) ;just save it all
|
||
movea.l TokenFrame.paramSpace(a6),pBlok ;parameter block address
|
||
movea.l itlResource(pBlok),Map ;get the itl resource block <1/7/88med>
|
||
move.l (Map),temp
|
||
bne.s @27
|
||
move.l Map,-(a7)
|
||
_LoadResource
|
||
move.l (Map),temp
|
||
@27
|
||
move.l temp,Map
|
||
movea.l source(pBlok),srcA
|
||
move.l sourceLength(pBlok),srcL
|
||
movea.l tokenList(pBlok),toks
|
||
tst.b doAppend(pBlok) ;is this an append or replace operation?
|
||
beq.s @1
|
||
move.l tokenCount(pBlok),temp ;compute end of list
|
||
mulu #tokenRecSize,temp ;that's how much is in there now
|
||
adda.l temp, toks ;viola
|
||
@1
|
||
move.l toks,-(sp) ;we need to save this for StringCopy
|
||
move.l tokenLength(pBlok),tokL
|
||
move.l leftDelims(pBlok),ld
|
||
move.l leftComment(pBlok),lc1
|
||
move.l leftComment+4(pBlok),lc2
|
||
move.l MapOffset(Map),temp ;itl character map table
|
||
lea (Map,temp),Map
|
||
lea cleanUp,dTab
|
||
move.l dTab,mapEmergOffset(Map) ;emergency exit address for itl extendFetch
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
lea -2(dTab),holdA ; holdA free for now!!! <1/7/88med>
|
||
clr.w (holdA)+ ;clear delimiter map positions
|
||
clr.l (holdA)+
|
||
clr.l (holdA)+
|
||
clr.l (holdA)+
|
||
|
||
clr.l (holdA)+ ; need to add one more word <6/14/88ldc><3>
|
||
|
||
movea.l srcA,holdA
|
||
|
||
; start wild and wacky macro calls (expression borrowed from JT).
|
||
|
||
setDel ld ;set up delimiter map
|
||
swap ld
|
||
setDel ld
|
||
swap ld
|
||
setDel lc1
|
||
setDel lc2
|
||
clr.l cSym ;clear 2nd byte of each char attr.
|
||
GetSym ;start 2-symbol pipe
|
||
bra.s main ;hop over to start
|
||
|
||
cont
|
||
DoToken cSym
|
||
main
|
||
movea.l lastA,holdA ;retrieve address of beginning of token
|
||
GetSym ;get NextSym
|
||
move.w cSym,temp ;compute delimMap address
|
||
asr.w #3,temp ;byte offset
|
||
btst cSym,(dtab,temp.w) ;is it a possible delimiter?
|
||
|
||
bne delimRun ; <05/30/89 pke><3>
|
||
|
||
notDelim ;just as fast on the averageÉ
|
||
cmpi.w #4,cSym ;Éas a jump table
|
||
beq alphabetic
|
||
cmpi.w #1,cSym
|
||
beq.s whiteSpace
|
||
cmpi.w #16,cSym
|
||
bge TokSymbol
|
||
cmpi.w #5,cSym
|
||
beq.s TokNumeric
|
||
cmpi.w #6,cSym ;check newline
|
||
beq.s cont
|
||
cmp.w #11,cSym ;alternate numeral
|
||
beq altnum
|
||
tst.w cSym ;check unknown symbol
|
||
beq.s cont
|
||
bmi endparse
|
||
bra unknownER ;shouldn't be able to get here
|
||
|
||
;-------------------------------------------------------------------------------
|
||
whitespace
|
||
swap cSym ;need to check the nextSym
|
||
@1
|
||
cmpi.w #1,cSym ;check for recurring whitespace
|
||
bne.s @3 ;no more whitespace
|
||
NextSym ;get NextSym
|
||
bra.s @1
|
||
@3
|
||
swap cSym ;get cSym back into sync
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
TokNumeric
|
||
swap cSym ;check NextSym for numeric
|
||
moveq.l #5,temp1 ;numeric code
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done ;not numeric; bail out
|
||
NextSym
|
||
bra.s @nloop ;cycle again
|
||
@done
|
||
cmp.w decimalCode(pBlok),cSym ;could be real number
|
||
beq realNumber
|
||
swap cSym ;repair cSym order
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
alphabetic
|
||
swap cSym ;check NextSym
|
||
moveq.l #4,temp1 ;alphabetic code
|
||
tst.b doAlphanumeric(pBlok) ;are numerics legal?
|
||
beq.s @aloopA ;no
|
||
@aloopN
|
||
cmp.w temp1,cSym ;is it alpha?
|
||
beq.s @1 ;no, but it might be numeric
|
||
cmpi.w #5,cSym ;is it numeric?
|
||
beq.s @1 ;no, but could be alternate numeric
|
||
cmpi.w #11,cSym
|
||
bne.s @done
|
||
@1 NextSym
|
||
bra.s @aloopN ;and re-enter loop
|
||
@aloopA
|
||
cmp.w temp1,cSym ;is it alpha?
|
||
bne.s @done ;nope so we're done
|
||
NextSym
|
||
bra.s @aloopA
|
||
@done
|
||
swap cSym ;repair cSym order
|
||
DoToken #4 ;create alpha token
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
leftLit1
|
||
move.l rightDelims(pBlok),rd ;get matching delimiter
|
||
swap rd ;match was in upper word
|
||
bra.s litMain
|
||
leftLit
|
||
move.l rightDelims(pBlok),rd ;get matching delimiter
|
||
litMain
|
||
move.w escapeCode(pBlok),escRD ;need escape code
|
||
DoToken #2 ;create leftDelim token
|
||
movea.l lastA,holdA ;save beginning of literal string
|
||
swap cSym ;look ahead
|
||
@litLoop
|
||
cmp.w cSym,rd ;see if we found an end delimiter
|
||
beq @endlit ;sure 'nuff
|
||
cmp.w cSym,escRD ;see if it's an escape
|
||
bne.s @1 ;nope, continue
|
||
DoToken #9 ;portion of literal before escape
|
||
movea.l lastA,holdA ;beginning of escape code
|
||
BaseSym
|
||
bmi badDelER ;don't stop now
|
||
DoToken #10 ;escape code
|
||
movea.l lastA,holdA ;beginning of symbol following escape
|
||
NextSym
|
||
swap cSym
|
||
DoToken cSym ;symbol following escape
|
||
movea.l lastA,holdA ;beginning of rest of literal
|
||
swap cSym
|
||
bra.s @litloop ;continue
|
||
@1 tst.w cSym ;make sure we've not exhausted the source
|
||
bmi badDelER ;hanging delimiter
|
||
NextSym ;get NextSym
|
||
bra @litLoop ;continue
|
||
@endLit
|
||
DoToken #9 ;create literal token
|
||
move.l leftDelims(pBlok),rd ;repair delim and comment regs
|
||
move.l leftComment(pBlok),escRD ;escRD is lc1
|
||
move.l lastA,holdA ;need address of closing delimiter
|
||
GetSym ;start filling pipe
|
||
DoToken #3 ;create right delimiter token
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
|
||
leftCom2A
|
||
move.l rightComment+4(pBlok),rc ;get matching delimiter
|
||
move.w #1,dTab ;this reg becomes the nest counter
|
||
bra.s comMain2
|
||
leftCom2
|
||
move.l lc1,lc2 ;lc2 becomes info for nested comment
|
||
move.w #1,dTab ;this reg becomes the nest counter
|
||
move.l rightComment(pBlok),rc
|
||
comMain2
|
||
GetSym ;NextSym
|
||
DoToken #7 ;create left comment symbol
|
||
move.l lastA,holdA ;beginning of literal string
|
||
tst.l rc ;see if right delim is 1 or 2 symbols
|
||
bmi.s endCom1 ;branch accordingly
|
||
bra endCom2
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
leftCom1A
|
||
move.l rightComment+4(pBlok),rc ;get matching delimiter
|
||
bra.s comMain1
|
||
leftCom1
|
||
move.l rightComment(pBlok),rc
|
||
comMain1
|
||
move.w #1,dTab ;for compatibility reasons (nesting)
|
||
DoToken #7 ;create left comment symbol
|
||
move.l lastA,holdA ;beginning of literal string
|
||
tst.l rc ;see if right delim is 1 or 2 symbols
|
||
bmi.s endCom1 ;branch accordingly
|
||
bra.s endCom2
|
||
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
endCom1
|
||
swap cSym ;look at the nextSym
|
||
@cloop
|
||
cmp.w cSym,rc ;check for end
|
||
beq.s @end
|
||
NextSym ;go for the next symbol
|
||
bpl.s @cloop ;may have run out before delimiter
|
||
bra badDelER
|
||
@end DoToken #9 ;create the literal token
|
||
movea.l lastA,holdA ;start of end delimiter
|
||
NextSym ;get one beyond end delimiter
|
||
swap cSym ;fix the order
|
||
move.l leftDelims(pBlok),rc ;repair register
|
||
DoToken #8 ;create end delimiter
|
||
move.l leftComment+4(pBlok),lc2
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
bra main
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
endCom2
|
||
@cloop
|
||
move.l lastA,temp1 ;effectively a 2-symbol look-behind
|
||
GetSym ;try the next puppy
|
||
cmp.l cSym,lc2 ;check for nested comment
|
||
beq.s @nest
|
||
@nogo
|
||
cmp.l cSym,rc ;check for end delimiter sequence
|
||
beq.s @end
|
||
tst.w cSym ;may have run out of symbols
|
||
bpl.s @cloop ;still okay
|
||
bra badDelER
|
||
@nest
|
||
tst.b doNest(pBlok) ;how's the weather?
|
||
beq.s @nogo
|
||
move.l temp1,lastA ;do literal token
|
||
DoToken #9
|
||
move.l temp1,holdA ;start of nested left comment
|
||
addq.w #1,dTab ;nest counter
|
||
bra comMain2
|
||
@end
|
||
move.l temp1,lastA ;fix up the pipeline
|
||
DoToken #9 ;create literal
|
||
move.l temp1,holdA ;start address of right delimiter
|
||
GetSym ;start up normal piping again
|
||
DoToken #8 ;create right delimiter
|
||
movea.l lastA,holdA ;start of whatever follows
|
||
subq.w #1,dTab ;see how the nesting isÉ
|
||
cmpa.w #0,dTab
|
||
bne endCom2 ;Éand continue if necessary
|
||
move.l leftDelims(pBlok),rc ;repair register
|
||
move.l leftComment+4(pBlok),lc2
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
altnum
|
||
swap cSym ;check NextSym for numeric
|
||
moveq.l #11,temp1 ;numeric code
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done ;not numeric; bail out
|
||
NextSym
|
||
bra.s @nloop ;cycle again
|
||
@done
|
||
cmp.w decimalCode(pBlok),cSym ;could be real number
|
||
beq.s realAlt
|
||
swap cSym ;repair cSym order
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
realNumber ;entry from altNum or numeric
|
||
NextSym
|
||
moveq.l #5,temp1
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done
|
||
NextSym
|
||
bra.s @nloop
|
||
@done
|
||
swap cSym
|
||
move.w #12,cSym ;code for real number
|
||
bra cont
|
||
;-------------------------------------------------------------------------------
|
||
realAlt ;entry from altNum or numeric
|
||
NextSym
|
||
moveq.l #11,temp1
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done
|
||
NextSym
|
||
bra.s @nloop
|
||
@done
|
||
swap cSym
|
||
move.w #13,cSym ;code for real number
|
||
bra cont
|
||
;-------------------------------------------------------------------------------
|
||
TokSymbol
|
||
swap cSym ;we'll be checking NextSym
|
||
cmpi.w #33,cSym ;is NextSym "="?
|
||
bne @cont
|
||
|
||
swap cSym
|
||
cmpi.w #31,cSym ;check for <=
|
||
bne.s @c1
|
||
makeSym #34
|
||
@c1
|
||
cmpi.w #32,cSym ;check for >=
|
||
bne.s @c2
|
||
makeSym #36
|
||
@c2
|
||
cmpi.w #68,cSym ;check for :=
|
||
bne.s @c3
|
||
makeSym #39
|
||
@c3
|
||
cmpi.w #33,cSym ;check for ==
|
||
bne.s @c4
|
||
makeSym #38
|
||
@c4
|
||
cmpi.w #43,cSym ;check for !=
|
||
bne.s @cont1 ;if that's not it then it ain't nothin'
|
||
makeSym #42
|
||
@cont
|
||
swap cSym ;re-adjust it
|
||
cmpi.l #$001F0020,cSym ;is it "<>"?
|
||
bne.s @cont1
|
||
makeSym #41
|
||
@cont1
|
||
move.l lastA,temp1 ;was not a composite symbol
|
||
sub.l holdA,temp1 ;compute size
|
||
bra cont ;get back into the thick of things
|
||
|
||
;-------------------------------------------------------------------------------
|
||
delimRun
|
||
cmp.w cSym,ld ;check for left delimiter
|
||
beq leftLit
|
||
swap ld ;check for other delimiter
|
||
cmp.w cSym,ld
|
||
beq leftLit1
|
||
swap ld
|
||
tst.l lc1 ;is comment sequence two symbols?
|
||
bpl.s @1
|
||
cmp.w cSym,lc1 ;check for left comment sequence
|
||
bne.s @2
|
||
bra leftCom1
|
||
@1 cmp.l cSym,lc1
|
||
beq leftCom2
|
||
@2 tst.l lc2 ;is other comment sequence two symbols?
|
||
bpl.s @3
|
||
cmp.w cSym,lc2 ;check for other left comment
|
||
beq leftCom1A
|
||
bra notDelim
|
||
@3 cmp.l cSym,lc2
|
||
beq leftCom2A
|
||
bra notDelim
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;itl4 call interfacing
|
||
;
|
||
extendFetch
|
||
tst.l srcL ;did we come because of a double-byter?
|
||
bpl.s @1 ;yes.
|
||
move.w #-1,cSym ;we need a full word negative for dTab's benefit
|
||
rts ;we came because source ran out; return home
|
||
@1
|
||
move.l mapExtendOffset(Map),temp ;offset of the extended fetch routine
|
||
jmp (Map,temp.l) ;finish the fetch
|
||
|
||
StringCopy
|
||
movea.l itlResource(pBlok),dTab ;get the itl resource block
|
||
move.l (dTab),dTab ;dereference! <1/7/88med>
|
||
move.l strOffset(dTab),temp ;offset of the stringCopy
|
||
jmp (dTab,temp.l) ;finish the fetch
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;Tail End
|
||
;-------------------------------------------------------------------------------
|
||
endParse
|
||
moveq.l #0,temp ;status if nothing went wrong
|
||
cleanUp
|
||
move.w temp, -(sp) ;save status
|
||
move.l toks,temp ;find out how many tokens we created
|
||
sub.l tokenList(pBlok),temp
|
||
divs #tokenRecSize,temp ;divide by size of TokenRec record
|
||
move.l temp,tokenCount(pBlok) ;save the value (instead of whales)É
|
||
;Éif something went wrong then upper word is non-zero
|
||
tst.b doString(pBlok) ;do we need to create string list?
|
||
beq.s @end1
|
||
bsr.s StringCopy
|
||
@end1
|
||
move.w (sp)+,d0 ;status report
|
||
addq.l #4,sp ;yank off the token start address
|
||
movem.l (sp)+,d2-d7/a2-a6 ;restore it
|
||
CheckA6 ; check stack
|
||
unlk a6
|
||
movea.l (sp)+,a0 ;return address
|
||
add.l #argSize,sp ;dump original parameters
|
||
move.b d0,(sp) ;status report
|
||
jmp (a0) ;go home
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;IntlTokenize error branches
|
||
;-------------------------------------------------------------------------------
|
||
|
||
tokenOVER
|
||
move.w #tokenOverflow,temp
|
||
bra.s cleanUp
|
||
|
||
badDelER
|
||
move.w #badDelim,temp
|
||
bra.s cleanUp
|
||
|
||
unknownER
|
||
move.w #crash,temp
|
||
bra.s cleanUp
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;Common routines for code savings <5>
|
||
;-------------------------------------------------------------------------------
|
||
|
||
NextSymRoutine ; <5>
|
||
swap cSym ; <5>
|
||
BaseSymRoutine ; <5>
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
rts ; <5>
|
||
|
||
GetSymRoutine ; <5>
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
swap cSym ; <5>
|
||
rts ; <5>
|
||
|
||
DoTokenRoutine
|
||
; has parameter in temp
|
||
;move first subtraction below <5>
|
||
subq.l #1,tokL ;decrement available size of token space
|
||
bmi.s toTokenOVER ;ran out of room <5>
|
||
move.w temp,(toks)+ ;token <5>
|
||
move.l holdA,(toks)+ ;adr in srcA of token start
|
||
move.l lastA,temp ;find length of token <5>
|
||
sub.l holdA,temp ; <5>
|
||
move.l temp,(toks)+ ;size of token string
|
||
clr.l (toks)+ ;space for string ptr
|
||
rts ; <5>
|
||
toTokenOVER
|
||
addq #4,sp ; strip DoTokenRoutine return addressÉ <5>
|
||
; Éor makeSymRoutine parameter.
|
||
bra tokenOVER ; go to err handling routine <5>
|
||
|
||
makeSymRoutine
|
||
; has DoToken parameter on the stack.
|
||
;GetSym part
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
swap cSym ; <5>
|
||
;DoToken part
|
||
subq.l #1,tokL ;decrement available size of token space
|
||
bmi.s toTokenOVER ;ran out of room <5>
|
||
move.w (sp)+,(toks)+ ;token <5>
|
||
move.l holdA,(toks)+ ;adr in srcA of token start
|
||
move.l lastA,temp ;find length of token <5>
|
||
sub.l holdA,temp ; <5>
|
||
move.l temp,(toks)+ ;size of token string
|
||
clr.l (toks)+ ;space for string ptr
|
||
;don't return, just branch
|
||
bra main
|
||
|
||
ENDWITH
|
||
ENDF
|
||
END
|
||
|