mirror of
https://github.com/elliotnunn/sys7.1-doc-wip.git
synced 2024-11-19 06:30:59 +00:00
1340 lines
46 KiB
Plaintext
1340 lines
46 KiB
Plaintext
;
|
||
; File: ScriptMgrUtilText.a (formerly SMgrUtilText.a)
|
||
;
|
||
; Contains: Script Mgr text utilities
|
||
;
|
||
; Written by: MED Mark Davis
|
||
; DRS daan Strebe
|
||
; LDC Lee Collins
|
||
; PKE Peter Edberg
|
||
;
|
||
; Copyright: © 1987-1992 by Apple Computer, Inc., all rights reserved.
|
||
;
|
||
; This file is used in these builds: Mac32 System CubeE
|
||
;
|
||
; Change History (most recent first):
|
||
;
|
||
; <11> 11/17/92 PKE Add include of new IntlResourcesPriv.a for IntlTokenize equates.
|
||
; <10> 6/17/92 HA #1029756,<PKE>: Made sure that we stuff a dummy address in the
|
||
; ram based vector table for StyledLineBreak after removing it
|
||
; from ptch 4 and adding it to ptch 27.
|
||
; <9> 4/30/92 FM Remove obsolete conditionals:shrinkIntlTokenize, smgrUsesStdExit
|
||
; <8> 9/20/91 PKE Remove CubeE conditional, since the new plan (according to
|
||
; Darin) is that everything for Cube-E goes in without
|
||
; conditionals. While I'm at it, get rid of some ">=$604",
|
||
; ">=$605", and ">=$700" conditionals.
|
||
; <7> 9/15/91 PKE smb, #1011961: For Bruges/Cube-E, roll in from 6.1 - Fix
|
||
; trashing of d3 by GetFormatOrder.
|
||
;
|
||
; <6> 9/14/90 BG Removed <4>. 040s are now behaving more reliably.
|
||
; <5> 7/23/90 PKE Changed IntlTokenize (under control of shrinkIntlTokenize
|
||
; symbol) to use BSRs instead of macros with inline code. On
|
||
; Plus/SE/II, this saves 620 bytes but makes IntlTokenize 13%
|
||
; slower, so we may reverse this.
|
||
; <4> 7/17/90 BG Added EclipseNOPs for flakey 040s.
|
||
; <3> 4/10/90 PKE Deleted conditionalized definitions of forROM, SysVers,
|
||
; Scripts604, and TestScriptManager. Used smgrUseStdExit,
|
||
; smgrSysVers, and smgrROMVers symbols instead of buildLevel.
|
||
; <2> 1/4/90 PKE Updated conditionals to put SS-6.0.4 changes in 6.0.5 as well as
|
||
; 7.0. Updated header to BBS format.
|
||
; <1> 12/18/89 CCH Adding for the first time into BBS. Changed include 'inc.sum.a'
|
||
; to load 'StandardEqu.d'. Updated file name references.
|
||
;
|
||
; (BBS versions above, EASE versions below:)
|
||
; <2.2> 11/27/89 PKE Use bug fix in <1.8> for Scripts604 build too.
|
||
; <2.1> 8/26/89 PKE Cleaned up some conditionals, moved a comment.
|
||
; <2.0> 8/22/89 SES Removed references to nFiles.
|
||
; <1.9> 7/12/89 PKE NEEDED FOR AURORA: In StyledLineBreak, replace 'mulu #4' with
|
||
; 'lsl.w #2' for efficiency.
|
||
; <1.8> 6/27/89 PKE NEEDED FOR AURORA - Fix GetFormatOrder bug discovered by Sue:
|
||
; If firstFormat=lastFormat, it did a move.l (instead of .w) of
|
||
; firstFormat into the first word of the ordering array. This
|
||
; move was redundant anyway, so I just skipped it.
|
||
; <1.7> 6/23/89 PKE Skip definition of buildLevel (done in ScriptPriv.a).
|
||
; <1.6> 6/9/89 PKE Change StyledLineBreak to use input offset value as per
|
||
; documentation: if 0, returned offset >= 1 and char breaks
|
||
; possible; otherwise, returned offset >= 0 and char breaks not
|
||
; possible.
|
||
; <1.5> 5/30/89 PKE Fixed bug in StyledLineBreak for 2-byte characters.
|
||
; <1.4> 5/24/89 EMT Renamed alpha symbol to not conflict with development stage.
|
||
; <1.3> 2/21/89 PKE Replaced with RomProj version, which already had system and Rom
|
||
; sources merged.
|
||
; (EASE ROMproj history below:)
|
||
; <1.6> 2/21/89 PKE Fix up LOADs: always use include 'StandardEqu.d'.
|
||
; 2/21/89 pke Fix up includes: always use include 'inc.sum.a'.
|
||
; <1.5> 2/14/89 PKE Fixed bugs in LineBreak/FindCarriage that broke new TextEdit
|
||
; <1.4> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equate names
|
||
; <1.3> 2/3/89 PKE Merged with current system sources (CCH's 01/16/1989 merge of
|
||
; 6.0.3 and 7.0 sources, done in RES.sys:smgr)
|
||
; 2/2/89 pke Merged system sources and ROM sources
|
||
; <1.2> 11/14/88 PKE Synchronize EASE and Projector
|
||
; <1.1> 11/11/88 CCH Fixed Header.
|
||
; <1.0> 11/9/88 CCH Adding to EASE.
|
||
; (old EASE ROMproj history below:)
|
||
; <1.7> 10/27/88 LDC Mods by Carl Hewitt and Brian McGhie in the process of putting
|
||
; Script Manager in ROM
|
||
; (EASE SYSproj history below:)
|
||
; <1.2> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equates
|
||
; <1.1> 1/16/89 CCH Merged 6.0.3 final sources into 7.0.
|
||
; <1.0> 11/16/88 CCH Added to EASE.
|
||
; (pre-EASE history; recent changes here went into EASE ROMproj, not SYSproj:)
|
||
; 9/16/88 ldc fixed StyledLineBreak to get correct itlb values
|
||
; 6/16/88 ldc added StdUnlink
|
||
; 6/14/88 ldc added check for valid token value [0..tokenNil)
|
||
; 6/13/88 ldc removed delimMap for move to ROM
|
||
; changes above here are for ROM or buildLevel >= 2
|
||
; 2/10/88 med Changed GetFormatOrder pascal interface
|
||
; 2/10/88 med LineRight parameter is high byte from pascal
|
||
; 2/9/88 med Change pascal interface
|
||
; 1/7/88 med itlResource loaded if purged; minor changes in Tokenizer
|
||
; 1/4/88 med itlResource in Tokenizer is now handle
|
||
; 11/20/87 med Changed CheckSelector
|
||
; 11/16/87 med Minor cleanup of IntlTokenize
|
||
; 10/28/87 med Added IntlTokenize Routine
|
||
; 10/26/87 med Added StyledLineBreak algorithm, FormatOrder
|
||
;___________________________________________________________________________________________________
|
||
; To Do:
|
||
;
|
||
;___________________________________________________________________________________________________
|
||
|
||
debugging equ 0
|
||
|
||
BLANKS ON
|
||
STRING ASIS
|
||
|
||
load 'StandardEqu.d'
|
||
include 'ScriptPriv.a'
|
||
include 'IntlResourcesPriv.a' ; <11>
|
||
|
||
|
||
export FormatOrder,IntlTokenize ; smSelStyledLineBreak
|
||
|
||
;____________________________________________________________
|
||
;
|
||
; StyledLineBreak Algorithm
|
||
;
|
||
; by Mark E. Davis
|
||
;
|
||
; Modification History
|
||
;
|
||
; 7/25/87 med Finished first version of line-break
|
||
; 7/28/87 med Pretty final version for AIS, needs truncation fixing
|
||
; 7/29/87 med Added check for CR
|
||
; 7/31/87 med CR cannot be part of white-space
|
||
; 7/31/87 med Add check for old SISs with bad word-break table
|
||
;<8/2/87med> Use direct calls on script routines
|
||
;<8/2/87med> Moved CheckFont
|
||
;<8/4/87med> Use blockflags instead of sisReverse
|
||
;____________________________________________________________
|
||
|
||
;____________________________________________________________
|
||
; Routine StyledLineBreak (
|
||
; textPtr: Ptr;
|
||
; textLen: Longint; {must be < 32K ;<2/14/89 pke>}
|
||
; textStart: Longint;
|
||
; textEnd: Longint;
|
||
; flags: Longint;
|
||
; var textWidth: Fixed; {on exit, set if too long}
|
||
; var textOffset: Longint;
|
||
; ): LineCode;
|
||
;____________________________________________________________
|
||
|
||
IF 0 THEN
|
||
|
||
LineBreakFrame record {oldA6},decrement
|
||
resultSize equ 2
|
||
result ds.w 1
|
||
argSize equ *-8
|
||
textPtr ds.l 1
|
||
textLen ds.l 1
|
||
textStart ds.l 1
|
||
textEnd ds.l 1
|
||
flags ds.l 1
|
||
textWidth ds.l 1
|
||
textOffset ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
offsets ds.l 3
|
||
leftSide ds.b 1
|
||
savedReverse ds.b 1
|
||
lineDirection ds.w 1
|
||
oldBlockFlags ds.w 1 ; <8/4/87med>
|
||
localFrame equ *
|
||
endR
|
||
|
||
FixMinusOne equ $FFFF0000
|
||
|
||
LineBreakRegs reg a2-a3/d4-d6
|
||
;____________________________________________________________
|
||
|
||
IF 0 THEN
|
||
; StyledLineBreak Equates
|
||
|
||
smcTypeMask equ $000F
|
||
smcReserved equ $00F0
|
||
smcClassMask equ $0F00
|
||
smcReserved12 equ $1000
|
||
smcRightMask equ $2000
|
||
smcUpperMask equ $4000
|
||
smcDoubleMask equ $8000
|
||
|
||
; CharType character types.
|
||
|
||
smCharPunct equ 0
|
||
smCharAscii equ 1
|
||
smCharEuro equ 7
|
||
|
||
; CharType punctuation types.
|
||
|
||
smPunctNormal equ $0000
|
||
smPunctNumber equ $0100
|
||
smPunctSymbol equ $0200
|
||
smPunctBlank equ $0300 ; NBSP is PunctNormal
|
||
ENDIF
|
||
;____________________________________________________________
|
||
|
||
StyledLineBreak proc
|
||
|
||
import FindCarriage
|
||
import StdUnlink
|
||
|
||
; setup
|
||
with LineBreakFrame,smgrRecord,scriptRecord, ItlbRecord
|
||
CheckSelector
|
||
link a6,#localFrame
|
||
movem.l LineBreakRegs,-(sp)
|
||
|
||
; common args
|
||
move.l textWidth(a6),a3 ; get @width
|
||
move.l textPtr(a6),d5 ; pointer
|
||
add.l textStart(a6),d5 ; pointer+start
|
||
|
||
; save environment and reset
|
||
;!!! generalize for left-right
|
||
move.w teSysJust,lineDirection(a6) ; save just
|
||
move.w #0,teSysJust ; set to LR
|
||
|
||
; get current script
|
||
move.w #0,-(sp) ; allocate _FontScript return
|
||
_FontScript ; get current script
|
||
move.w (sp)+,d0 ; current script
|
||
|
||
; get core globals and test: inline instead of trap, for speed
|
||
|
||
lsl.w #2,d0 ; make script code a longword offset <1.9>
|
||
GetSmgrCore a2 ; smgr core globals
|
||
move.l smgrRecord.smgrEntry(a2,d0),a2 ; core location
|
||
|
||
;<9/16/88ldc> changed following two lines to access correct itlb values <06/09/89 pke><3>
|
||
move.b ScriptRecord.scriptBundle.itlbFlags(a2),oldBlockFlags(a6) ; <9/16/88ldc>
|
||
bclr.b #smsfReverse,ScriptRecord.scriptBundle.itlbFlags(a2) ; set left-right <8/4/87med> ; <9/16/88ldc>
|
||
|
||
; quick check on length
|
||
move.l textEnd(a6),d4 ; get length
|
||
sub.l textStart(a6),d4 ; segment length
|
||
ble @TooLong ; if zero, bail
|
||
tst.l (a3) ; no width (degenerate)
|
||
; changed this so we handle blanks at start of 0-width line. <06/09/89 pke><3>
|
||
bmi @FixNoChars ; fix neg width
|
||
|
||
; get offset
|
||
;!!! fix long length later
|
||
move.w #0,-(sp) ; return space
|
||
move.l d5,-(sp) ; pointer
|
||
move.w d4,-(sp) ; length (mod)
|
||
move.w #0,-(sp) ; slop (truncate)
|
||
move.w (a3),-(sp) ; width (truncate)
|
||
pea leftSide(a6) ; @leftSide
|
||
_Pixel2Char ; use trap
|
||
move.l #0,d6 ; longize
|
||
move.w (sp)+,d6 ; get offset
|
||
|
||
; if width is zero, fix leftSide for FindWord etc. (change for 604) <06/09/89 pke><3>
|
||
tst.l (a3) ; zero width?
|
||
bne.s @doneFixLeftSide ;
|
||
st leftSide(a6) ; if so, fix leftSide
|
||
@doneFixLeftSide
|
||
|
||
; Check for CR <7/29/87med>
|
||
|
||
move.l d5,a0 ; text ptr
|
||
move.w d6,d0 ; copy of non-offsetted length
|
||
bsr FindCarriage ; a0,d0.w: return d0.b=t/f, length in d1.w
|
||
add.w textStart+2(a6),d1 ; need offset from textPtr <added 2/14/89 pke>
|
||
tst.b d0 ; found?
|
||
bne @ReturnWord ; yes, return
|
||
|
||
; check for position being too long
|
||
|
||
move.l d6,d0 ; copy un-offsetted length
|
||
add.l textStart(a6),d6 ; get char offset
|
||
|
||
; if offset > length, return width of text
|
||
|
||
cmp.l d4,d0 ; position at end?
|
||
blt.s @GotCharPos ; no, skip
|
||
tst.b leftSide(a6) ; leftSide?
|
||
bne.s @TooLong ; yes, too long
|
||
@GotCharPos
|
||
|
||
; get word around char offset
|
||
|
||
move.l textPtr(a6),-(sp) ; ptr
|
||
move.w textLen+2(a6),-(sp) ; length (mod)
|
||
move.w d6,-(sp) ; offset (truncate)
|
||
move.b leftSide(a6),-(sp) ; leftSide
|
||
move.l MinusOne,-(sp) ; line break table
|
||
pea offsets(a6) ; offset table
|
||
_FindWord ; use trap
|
||
|
||
; test character to see if it is white-space
|
||
; Mark suggests checking to see if VisibleLength would work better here… <06/09/89 pke>
|
||
|
||
move.w #0,-(sp) ; allocate return
|
||
move.l textPtr(a6),-(sp) ; ptr
|
||
move.w offsets(a6),-(sp) ; offset
|
||
_CharType ; use trap
|
||
move.w (sp)+,d0 ; get type
|
||
move.l #0,d1 ; longize
|
||
move.w offsets(a6),d1 ; assume second offset ***
|
||
and.w #smcTypeMask+smcClassMask,d0 ; type and class
|
||
cmp.w #smCharPunct+smPunctBlank,d0 ; blank?
|
||
bne.s @ReturnOffset ; no, continue
|
||
btst.b #smsfReverse,oldBlockFlags(a6) ; test reverse block flag <8/4/87med>
|
||
sne d0 ; 0/-1 left/right <8/4/87med>
|
||
cmp.b LineDirection+1(a6),d0 ; line = char? (bottom byte of line) <8/4/87med>
|
||
bne.s @ReturnOffset ; yes, skip to end
|
||
|
||
; here, word is whitespace and direction is ok (script direction = line direction),
|
||
; so return second offset.
|
||
|
||
; add check for CR for old SISs with bad word-break table. <7/31/87med>
|
||
; All new SISs should be fixed, so we should be able to skip the FindCarriage
|
||
; stuff here and just set d1 to offsets+2(a6). <06/09/89 pke>
|
||
|
||
move.l textPtr(a6),a0 ; get ptr
|
||
|
||
moveq #0,d6 ; make long <2/14/89 pke><3>
|
||
move.w offsets(a6),d6 ; get first offset
|
||
add.l d6,a0 ; now at first offset <2/14/89 pke><3>
|
||
|
||
move.w offsets+2(a6),d0 ; take second offset
|
||
sub.w d6,d0 ; now length
|
||
bsr FindCarriage ; find in d1
|
||
add.w d6,d1 ; d1 either after cr or at end
|
||
bra.s @ReturnWord ; return
|
||
|
||
@TooLong
|
||
; return width
|
||
move.w #0,-(sp) ; return space
|
||
move.l d5,-(sp) ; pointer
|
||
move.w #0,-(sp) ; firstByte
|
||
move.w d4,-(sp) ; length
|
||
_TextWidth
|
||
move.w (sp)+,d0 ; pop width
|
||
swap d0 ; swap for long
|
||
clr.w d0 ; longize
|
||
; move.l (a5),a0 ; graf globals
|
||
; move.l fontAdj(a0),d0 ; get fixed width
|
||
sub.l d0,(a3) ; final width
|
||
|
||
move.l textEnd(a6),d6 ; set return
|
||
move.l #smBreakOverflow,d0 ; set type
|
||
bra.s @Done ; return
|
||
|
||
@ReturnOffset
|
||
tst.l d1 ; break at start?
|
||
bne.s @ReturnWord ; yes, return char
|
||
|
||
move.l textOffset(a6),a0 ; get @textOffset <06/09/89 pke><3>
|
||
tst.l (a0) ; was offset 0 at entry? <06/09/89 pke><3>
|
||
beq.s @ReturnWord ; if so, go return zero offset <06/09/89 pke><3>
|
||
|
||
; have to break word, so backup until length is less.
|
||
|
||
; The most we have to backup is one char, and we only have to do that if <06/09/89 pke><3>
|
||
; leftSide is false <06/09/89 pke>. We also need to catch the case of d6=0.
|
||
|
||
tst.b leftSide(a6) ; leftSide?
|
||
bne.s @checkNullOffset ; if T, skip decrement
|
||
sub.l #1,d6 ; decrement
|
||
clr.w -(sp) ; allocate return
|
||
move.l d5,-(sp) ; pass text ptr
|
||
move.l d6,d0 ; cur position
|
||
sub.l textStart(a6),d0 ; start of real text
|
||
move.w d0,-(sp) ; count (mod!!!)
|
||
_CharByte
|
||
tst.w (sp)+ ; return 0=single,-1=first,1=last
|
||
ble.s @checkNullOffset ; bail if -1 or 0, ok
|
||
sub.l #1,d6 ; fix for 2-byte chars
|
||
@checkNullOffset:
|
||
tst.l d6 ; are we still >0?
|
||
bgt.s @ReturnChar ; if so, go return a char
|
||
bra.s @FixNoChars1 ; don't redo test for orig offset 0
|
||
|
||
@FixNoChars
|
||
move.l textOffset(a6),a0 ; get @textOffset <06/09/89 pke><4>
|
||
tst.l (a0) ; was offset 0 at entry? <06/09/89 pke><4>
|
||
bne.s @FixNoChars1 ; if not, return 1 char <06/09/89 pke><4>
|
||
moveq #0,d1 ; if so, set zero offset <06/09/89 pke><4>
|
||
bra.s @ReturnWord ; and go return word break <06/09/89 pke><4>
|
||
@FixNoChars1
|
||
|
||
; string is now zero length, make it at least 1 char
|
||
|
||
move.l #1,d6 ; include first char
|
||
|
||
clr.w -(sp) ; allocate return <05/30/89 pke><3>
|
||
|
||
move.l d5,-(sp) ; pass text ptr
|
||
move.w #1,-(sp) ; offset
|
||
_CharByte
|
||
|
||
tst.w (sp)+ ; return 0=single,-1=first,1=last <05/30/89 pke><3>
|
||
|
||
ble.s @ReturnChar ; bail if -1 or 0, ok
|
||
add.l #1,d6 ; fix for 2-byte chars
|
||
|
||
@ReturnChar
|
||
move.l #smBreakChar,d0 ; so, word too long.
|
||
bra.s @Done ; exit
|
||
|
||
@ReturnWord
|
||
moveq #0,d6 ; make long <2/14/89 pke><3>
|
||
move.w d1,d6 ; set to word bound <2/14/89 pke><3>
|
||
|
||
move.l #smBreakWord,d0 ; break at word
|
||
|
||
@Done
|
||
move.l textOffset(a6),a0 ; get @textOffset
|
||
move.l d6,(a0) ; set to char position
|
||
move.b d0,result(a6) ; set result type
|
||
|
||
; restore environment
|
||
; <9/16/88ldc> fixed following line
|
||
|
||
move.b oldBlockFlags(a6),ScriptRecord.scriptBundle.itlbFlags(a2) ; <9/16/88ldc> use for 6.0.4 too <06/09/89 pke><3>
|
||
|
||
move.w lineDirection(a6),teSysJust ; restore just
|
||
|
||
; cleanup
|
||
movem.l (sp)+,LineBreakRegs
|
||
CheckA6
|
||
|
||
move #argSize, d0 ; for StdUnlink <6/16/88ldc>
|
||
bra StdUnlink ; standard exit <6/16/88ldc>
|
||
endWith
|
||
endProc
|
||
|
||
ENDIF
|
||
|
||
|
||
;____________________________________________________________
|
||
; PROCEDURE FormatOrder (
|
||
; ordering : FormatOrderPtr;
|
||
; firstFormat : Integer;
|
||
; lastFormat : Integer;
|
||
; lineRight : Boolean;
|
||
; FUNCTION DirProc (theFormat : Integer; dirParam : Ptr) : Boolean;
|
||
; dirParam : Ptr
|
||
; );
|
||
;____________________________________________________________
|
||
|
||
FormatOrderFrame record {oldA6},decrement
|
||
resultSize equ 0
|
||
argSize equ *-8
|
||
ordering ds.l 1
|
||
firstFormat ds.w 1
|
||
lastFormat ds.w 1
|
||
filler ds.b 1
|
||
lineRight ds.b 1 ; high byte from pascal <2/10/88med>
|
||
DirProc ds.l 1
|
||
;;filler2 ds.l 1 ; why does pascal want this? Changed pascal interface <2/9/88med>
|
||
dirParam ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
|
||
localFrame equ *
|
||
endR
|
||
|
||
FormatOrderRegs reg a2-a3/d3-d6 ; <7>
|
||
;____________________________________________________________
|
||
|
||
FormatOrder proc
|
||
|
||
import StdUnlink
|
||
|
||
with FormatOrderFrame
|
||
CheckSelector
|
||
|
||
link a6,#localFrame
|
||
movem.l FormatOrderRegs,-(sp)
|
||
|
||
; consistency check
|
||
|
||
move.l ordering(a6),a2 ; get ordering
|
||
move.w firstFormat(a6),d3 ; get start
|
||
move.w lastFormat(a6),d4 ; get end
|
||
cmp.w d3,d4 ; same?
|
||
|
||
ble.s @Exit ; lastFormat ≤ firstFormat, leave array alone. <1.8><2.2><3>
|
||
|
||
@FillArray
|
||
; {fill the array with initial values, going in lineDirection order}
|
||
|
||
move.l DirProc(a6),a3 ; get dirProc
|
||
move.l #2,d5 ; set pointer increment
|
||
move.b lineRight(a6),d6 ; right-left?
|
||
beq.s @InvertLoop ; no, continue
|
||
move.w d4,d0 ; get last element
|
||
sub.w d3,d0 ; last-first
|
||
add.w d0,d0 ; for word array
|
||
add.w d0,a2 ; last element
|
||
neg.w d5 ; invert pointer increment
|
||
|
||
@InvertLoop
|
||
clr.b -(sp) ; allocate return
|
||
move.w d3,-(sp) ; pass i
|
||
move.l dirParam(a6),-(sp) ; pass dirParam
|
||
jsr (a3) ; call it
|
||
move.w d3,d1 ; set temp
|
||
cmp.b (sp)+,d6 ; get result: same as lineRight?
|
||
beq.s @1 ; yes, skip
|
||
not.w d1 ; invert
|
||
@1
|
||
move.w d1,(a2) ; set value
|
||
add.w d5,a2 ; increment array pointer
|
||
add.w #1,d3 ; i := i+1
|
||
cmp.w d3,d4 ; same?
|
||
bge.s @InvertLoop ; no, keep looping
|
||
|
||
; {walk through the array, reversing the odd direction clusters (inverted)}
|
||
@WalkArray
|
||
sub.w firstFormat(a6),d4 ; now # elements + 1
|
||
move.l #-1,d5 ; mark as no first backwards element
|
||
clr.w d3 ; i := 0
|
||
move.l ordering(a6),a2 ; get ordering
|
||
move.l a2,a3 ; for increment
|
||
@RevLoop
|
||
tst.w (a3)+ ; get value
|
||
bge.s @BadOrder ; correct order
|
||
tst.w d5 ; have first backwards element?
|
||
bge.s @RevContinue ; yes, continue
|
||
move.w d3,d5 ; mark first backwards element
|
||
bra.s @RevContinue ; continue
|
||
@BadOrder
|
||
tst.w d5 ; have first backwards element?
|
||
blt.s @RevContinue ; no, continue
|
||
bsr.s ReverseCluster ; ReverseCluster(j, i - 1)
|
||
move.l #-1,d5 ; mark no first backwards element.
|
||
@RevContinue
|
||
add.w #1,d3 ; inc
|
||
cmp.w d3,d4 ; at end?
|
||
bge.s @RevLoop ; no, continue
|
||
|
||
tst.w d5 ; got one?
|
||
blt.s @Exit ; no, continue
|
||
bsr.s ReverseCluster ; ReverseCluster(j, limit);
|
||
|
||
@Exit
|
||
; cleanup
|
||
movem.l (sp)+,FormatOrderRegs
|
||
CheckA6
|
||
|
||
move #argSize, d0 ; for StdUnlink <6/16/88ldc>
|
||
bra StdUnlink ; standard exit <6/16/88ldc>
|
||
|
||
|
||
;____________________________________________________________
|
||
; PROCEDURE ReverseCluster (first, last : Integer);
|
||
; e.g. d5=3, d3 = 6 should reverse 3,4,5
|
||
;____________________________________________________________
|
||
ReverseCluster
|
||
|
||
move.w d3,d2 ; get last + 1
|
||
sub.w d5,d2 ; diff = first - last
|
||
sub.w #1,d2 ; sum -1
|
||
asr.w #1,d2 ; middle = (sum-1)/2 (6,3 =>
|
||
move.w d5,d0 ; get j
|
||
add.w d0,d0 ; double for word address
|
||
lea 0(a2,d0.w),a0 ; end address
|
||
move.w d3,d0 ; get i
|
||
add.w d0,d0 ; double for word address
|
||
lea 0(a2,d0.w),a1 ; end address (after end of element)
|
||
; e.g. for 3,4 items, dbra = 1 (two iterations);
|
||
|
||
@RevClusLoop ; reverse outer elements
|
||
move.w (a0),d0 ; temp = ordering^[i]
|
||
not.w d0 ; restore it
|
||
move.w -(a1),d1 ; temp2 = ordering^[j]
|
||
not.w d1 ; restore it
|
||
move.w d0,(a1) ; mix
|
||
move.w d1,(a0)+ ; ditto
|
||
dbra d2,@RevClusLoop ; til done
|
||
|
||
; exit
|
||
rts
|
||
endProc
|
||
;____________________________________________________________
|
||
; Routine IntlTokenize
|
||
;____________________________________________________________
|
||
|
||
;The tokenizer's job is to lexically analyze a block of characters
|
||
;(irrespective of script) and return a block of standardized tokens. The
|
||
;character block is interpreted as an expression from any normal computer
|
||
;language.
|
||
;The tokenizer lexes the following atoms:
|
||
; Unknown symbol (a character outside the tokenizer's vocabulary)
|
||
; White space (consecutive white space characters generate one token)
|
||
; Alphabetic (sequence of textual characters (and, optionally, digits))
|
||
; Numeric (sequence of digits)
|
||
; Alternate numeric (digits from foreign scripts)
|
||
; Real numeric (digits, decimal, [digits])
|
||
; Newline
|
||
; Comment (delimiters chosen by application)
|
||
; Literal (with delimiters chosen by the application)
|
||
; Escape character (disable special meaning of following character)
|
||
; Symbols (non-alphanumeric special symbols)
|
||
; End of Parse (source string ran out)
|
||
;--------------------------------------------------------------------------
|
||
;The following symbols are unique tokens:
|
||
; ( ) [ ] { } « » + - * ÷ ± / \ > <
|
||
; = <= ≤ >= ≥ == := ≠ <> != ! ~ , . “ ” ‘
|
||
; ’ " ' ; % ^ _ & @ | ? π √ ∑ ∫ µ ∏
|
||
; ∞ : # $
|
||
;--------------------------------------------------------------------------
|
||
;The tokenizer is a function:
|
||
; FUNCTION IntlTokenize( tokenParam: tokenBlock ): tokenResult;
|
||
;where
|
||
; tokenResult = (tokenOK, tokenOverflow, stringOverflow, badDelim, unknown);
|
||
; tokenBlock = ^paramBlock;
|
||
; paramBlock = RECORD
|
||
; source: Ptr; (*pointer to stream of characters*)
|
||
; sourceLength: LongInt; (*length of source stream*)
|
||
; tokenList: Ptr; (*pointer to array of tokens*)
|
||
; tokenLength: LongInt; (*maximum length of TokenList*)
|
||
; tokenCount: LongInt; (*number of tokens generated by tokenizer*)
|
||
; stringList: Ptr; (*pointer to stream of identifiers*)
|
||
; stringLength: LongInt; (*length of string list*)
|
||
; stringCount: LongInt; (*number of bytes currently used*)
|
||
; doString: Boolean; (*make strings & put into StringLIst*)
|
||
; doAppend: Boolean; (*append to TokenList rather than replace*)
|
||
; doAlphanumeric: Boolean;(*identifiers may include numeric*)
|
||
; leftDelims, rightDelims: ARRAY[0..1] OF tokenType;
|
||
; leftComment, rightComment: ARRAY[0..3] OF tokenType;
|
||
; escapeCode: tokenType;
|
||
; decimalCode: tokenType; (*what symbol to use for decimal mark*)
|
||
; END;
|
||
; TokenRec = RECORD
|
||
; theToken: tokenType;
|
||
; position: Ptr; (*pointer into original Source*)
|
||
; length: LongInt; (*length of text in original source*)
|
||
; stringPosition: Ptr; (*Pascal/C string copy of identifier*)
|
||
; END;
|
||
; tokenType : Integer; (*defined below*)
|
||
; (* The only value in tokenBlock directly modified by the tokenizer is
|
||
; the tokenCount field. The tokenizer modifies the memory pointed to
|
||
; by tokenList and stringList. stringList is a sequence of null-
|
||
; terminated, even-boundaried Pascal strings that is generated if
|
||
; doString is true but ignored otherwise; likewise, the
|
||
; stringPosition fields of the TokenRec records are ignored if doString
|
||
; is false. *)
|
||
; (* The Left and right delimiters are encoded as tokenType; two may be
|
||
; specified. If only one is needed, the second must be 127.
|
||
; These delimit literal strings. The position of a delimiter in the
|
||
; rightDelims array must match the position of its counterpart in
|
||
; the leftDelims array. As an example, the pairs “” and ‘’ are [47,49]
|
||
; and [48,50] respectively, where the codes are given below. *)
|
||
; (* The leftComment and rightComment are also encoded as tokenType, but
|
||
; unlike delimiters, two tokens must be specified per comment symbol
|
||
; (postions 0 and 1 in the array are one comment symbol).
|
||
; If a single token only is needed then the second must be -1. One
|
||
; or two comment symbols are available; if only one is used then the
|
||
; second comment's tokens must both be 127. The pairs (* *) are
|
||
; [16, 26, 127, 127], [26, 17, 127, 127] *)
|
||
; (* The escape character is significant only when followed by a right
|
||
; delimiter within a literal. It nullifies the special meaning of the
|
||
; delimiter. If there is no escape character, use 127 *)
|
||
; (* The decimal symbol is has special meaning only when flanked or
|
||
; preceded by numeric or AltNumeric tokens. If RealNumber parsing is to
|
||
; be disabled, decimalCode should be 127 *)
|
||
;--------------------------------------------------------------------------
|
||
;Ordinal values of tokenType are:
|
||
;-1 EndParse
|
||
;0 Unknown 1 WhiteSpace 2 LeftLitDelim 3 RightLitDelim
|
||
;4 Alpha 5 Numeric 6 NewLine 7 LeftCommentDelim
|
||
;8 RightCommentDelim 9 literal 10 Escape
|
||
;11 Alternate numeric 12 RealNumber 13 AltRealNumber
|
||
;16 ( 17 ) 18 [ 19 ] 20 { 21 } 22 « 23 » 24 +
|
||
;25 - 26 * 27 ÷ 28 ± 29 / 30 \ 31 < 32 > 33 =
|
||
;34 <= 35 ≤ 36 >= 37 ≥ 38 == 39 := 40 ≠ 41 <> 42 !=
|
||
;43 ! 44 ~ 45 , 46 . 47 “ 48 ” 49 ‘ 50 ’ 51 "
|
||
;52 ' 53 ; 54 % 55 ^ 56 _ 57 & 58 @ 59 | 60 ?
|
||
;61 π 62 √ 63 ∑ 64 ∫ 65 µ 66 ∏ 67 ∞ 68 : 69 #
|
||
;70 $
|
||
;71 non-breaking space
|
||
;72 fraction
|
||
;73 intlCurrency
|
||
;74 leftSingGuillemet
|
||
;75 rightSingGuillemet
|
||
;76 perThousand
|
||
;77 ellipsis
|
||
;78 centerDot
|
||
;127 <nil>
|
||
;--------------------------------------------------------------------------
|
||
;--------------------------------------------------------------------------
|
||
;---------------------------------------------------------------------------
|
||
; IMPLEMENTATION NOTES
|
||
;
|
||
; This header file should be included in any version of tokenizer that is
|
||
;created. In the local version's file, this header file should be "included"
|
||
;as the first significant text. The local version must then implement a
|
||
;BaseSym macro after the include. The characteristics of the macro are:
|
||
; a) the first line is movea.l srcA,lastA
|
||
; b) the number of bytes in the character must be subtracted from srcL
|
||
; c) the symbol class of the character must be placed in the low word
|
||
; of csym. The upper word must remain undisturbed.
|
||
; d) after BaseSym, srcA must point to the byte following the last byte
|
||
; of the character just retrieved.
|
||
; e) if the available bytes run out before the character does, BaseSym
|
||
; must clean up its own portion of the stack (if any), place the
|
||
; constant #unexpectedEndER into temp, and branch to cleanUp.
|
||
; f) no registers except temp and the low word of csym may be modified.
|
||
;
|
||
; Following the BaseSym macro, the tokenBase.a file must be included. This
|
||
;file contains all state transition information.
|
||
; Finally, StringCopy must be implemented by the local version. String-
|
||
;copy's characteristics are:
|
||
; a) it is a subroutine.
|
||
; b) it may freely use any registers it pleases.
|
||
; c) it creates even-boundaried, null-terminated Pascal strings corres-
|
||
; ponding to those that each token's record points to.
|
||
; d) assuming that the return address from StringCopy is at the top of the
|
||
; stack, then the next word down is the current error condition. The
|
||
; following longword is the address of the effective token list, which
|
||
; is different from the pBlok tokenList address when doAppend is true.
|
||
; In other words, StringCopy must use the stack value rather than the
|
||
; parameter block value to determine the start of the token list.
|
||
; e) it must transliterate altNum and altReal tokens into ascii real
|
||
; strings and put the transliterated numbers, rather than the original
|
||
; numbers, into the Pascal strings. The same goes for roman (and only
|
||
; roman) letters. Additionally, the decimal character should be changed
|
||
; to a canonical '.'. All other text and symbols are to be copied
|
||
; verbatim.
|
||
; f) if doAppend is true, then it must compute the effective address of
|
||
; stringList by adding stringCount to it.
|
||
; g) it must compute the total amount of space used by strings by sub-
|
||
; tracting stringList(pBlok) from the next legal string address
|
||
; following the last null character of the last Pascal string, and
|
||
; place that computed value into stringCount(pBlok).
|
||
; h) it must detect overflow of the string space. If an overflow occurs,
|
||
; it must place #stringOverflow in the error condition space on the stack
|
||
; and return immediately.
|
||
; i) it must reverse the order of digits for the zany countries that use
|
||
; reversed digit sequences -- Arabic and Hebrew. In real numbers, the
|
||
; decimal point should be considered part of the string of digits and
|
||
; reversed right along with them.
|
||
; The local version contains its own mapping tables to decide the class
|
||
;of a character.
|
||
;---------------------------------------------------------------------------
|
||
;---------------------------------------------------------------------------
|
||
|
||
TokenFrame record {oldA6},decrement
|
||
resultSize equ 2
|
||
result ds.w 1
|
||
argSize equ *-8
|
||
paramSpace ds.l 1
|
||
selector ds.l 1
|
||
return ds.l 1
|
||
oldA6 ds.l 1
|
||
localFrame EQU *
|
||
ENDR
|
||
|
||
;---
|
||
; put these here for reference; defined in IntlResourcesPriv.a
|
||
IF 0 THEN
|
||
srcA EQU a0
|
||
srcL EQU d0
|
||
toks EQU a1
|
||
tokL EQU d1
|
||
holdA EQU a2
|
||
cSym EQU d2
|
||
lastA EQU a3
|
||
ld EQU d3
|
||
map EQU a4
|
||
lc1 EQU d4
|
||
pBlok EQU a5
|
||
lc2 EQU d5
|
||
temp EQU d6
|
||
dTab EQU a6
|
||
temp1 EQU d7
|
||
|
||
rd EQU ld
|
||
escRD EQU lc1
|
||
rc EQU ld
|
||
ENDIF
|
||
;---
|
||
|
||
EXPORT IntlTokenize
|
||
|
||
|
||
IntlTokenize func
|
||
WITH TokenRec,tokenBlock,TokenFrame,TokenResults,itl4Rec
|
||
CheckSelector
|
||
|
||
MACRO
|
||
BaseSym
|
||
bsr BaseSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO ;get before swap
|
||
GetSym
|
||
bsr GetSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO ;swap before get
|
||
NextSym
|
||
bsr NextSymRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO
|
||
DoToken &TokenRec
|
||
move.w &TokenRec,temp ; <5>
|
||
bsr DoTokenRoutine ; <5>
|
||
ENDM
|
||
|
||
MACRO
|
||
makeSym &symNum
|
||
move.w &symNum,-(sp) ; <5>
|
||
bra makeSymRoutine ; <5>
|
||
ENDM
|
||
|
||
|
||
MACRO
|
||
setDel ® ;set the bit position in the delimMap
|
||
|
||
;<6/14/88ldc> move byte, not word <3>
|
||
clr.w temp
|
||
move.b ®,temp ;compute delimiter map position
|
||
|
||
;<6/14/88ldc> skip -1, -2 <3>
|
||
bmi.s @999 ;skip if temp < 0
|
||
asr.w #3,temp ;offset address
|
||
bset ®,(dTab,temp) ;set that puppy
|
||
@999
|
||
ENDM
|
||
|
||
entry
|
||
link a6,#0
|
||
movem.l d2-d7/a2-a6,-(sp) ;just save it all
|
||
movea.l TokenFrame.paramSpace(a6),pBlok ;parameter block address
|
||
movea.l itlResource(pBlok),Map ;get the itl resource block <1/7/88med>
|
||
move.l (Map),temp
|
||
bne.s @27
|
||
move.l Map,-(a7)
|
||
_LoadResource
|
||
move.l (Map),temp
|
||
@27
|
||
move.l temp,Map
|
||
movea.l source(pBlok),srcA
|
||
move.l sourceLength(pBlok),srcL
|
||
movea.l tokenList(pBlok),toks
|
||
tst.b doAppend(pBlok) ;is this an append or replace operation?
|
||
beq.s @1
|
||
move.l tokenCount(pBlok),temp ;compute end of list
|
||
mulu #tokenRecSize,temp ;that's how much is in there now
|
||
adda.l temp, toks ;viola
|
||
@1
|
||
move.l toks,-(sp) ;we need to save this for StringCopy
|
||
move.l tokenLength(pBlok),tokL
|
||
move.l leftDelims(pBlok),ld
|
||
move.l leftComment(pBlok),lc1
|
||
move.l leftComment+4(pBlok),lc2
|
||
move.l MapOffset(Map),temp ;itl character map table
|
||
lea (Map,temp),Map
|
||
lea cleanUp,dTab
|
||
move.l dTab,mapEmergOffset(Map) ;emergency exit address for itl extendFetch
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
lea -2(dTab),holdA ; holdA free for now!!! <1/7/88med>
|
||
clr.w (holdA)+ ;clear delimiter map positions
|
||
clr.l (holdA)+
|
||
clr.l (holdA)+
|
||
clr.l (holdA)+
|
||
|
||
clr.l (holdA)+ ; need to add one more word <6/14/88ldc><3>
|
||
|
||
movea.l srcA,holdA
|
||
|
||
; start wild and wacky macro calls (expression borrowed from JT).
|
||
|
||
setDel ld ;set up delimiter map
|
||
swap ld
|
||
setDel ld
|
||
swap ld
|
||
setDel lc1
|
||
setDel lc2
|
||
clr.l cSym ;clear 2nd byte of each char attr.
|
||
GetSym ;start 2-symbol pipe
|
||
bra.s main ;hop over to start
|
||
|
||
cont
|
||
DoToken cSym
|
||
main
|
||
movea.l lastA,holdA ;retrieve address of beginning of token
|
||
GetSym ;get NextSym
|
||
move.w cSym,temp ;compute delimMap address
|
||
asr.w #3,temp ;byte offset
|
||
btst cSym,(dtab,temp.w) ;is it a possible delimiter?
|
||
|
||
bne delimRun ; <05/30/89 pke><3>
|
||
|
||
notDelim ;just as fast on the average…
|
||
cmpi.w #4,cSym ;…as a jump table
|
||
beq alphabetic
|
||
cmpi.w #1,cSym
|
||
beq.s whiteSpace
|
||
cmpi.w #16,cSym
|
||
bge TokSymbol
|
||
cmpi.w #5,cSym
|
||
beq.s TokNumeric
|
||
cmpi.w #6,cSym ;check newline
|
||
beq.s cont
|
||
cmp.w #11,cSym ;alternate numeral
|
||
beq altnum
|
||
tst.w cSym ;check unknown symbol
|
||
beq.s cont
|
||
bmi endparse
|
||
bra unknownER ;shouldn't be able to get here
|
||
|
||
;-------------------------------------------------------------------------------
|
||
whitespace
|
||
swap cSym ;need to check the nextSym
|
||
@1
|
||
cmpi.w #1,cSym ;check for recurring whitespace
|
||
bne.s @3 ;no more whitespace
|
||
NextSym ;get NextSym
|
||
bra.s @1
|
||
@3
|
||
swap cSym ;get cSym back into sync
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
TokNumeric
|
||
swap cSym ;check NextSym for numeric
|
||
moveq.l #5,temp1 ;numeric code
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done ;not numeric; bail out
|
||
NextSym
|
||
bra.s @nloop ;cycle again
|
||
@done
|
||
cmp.w decimalCode(pBlok),cSym ;could be real number
|
||
beq realNumber
|
||
swap cSym ;repair cSym order
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
alphabetic
|
||
swap cSym ;check NextSym
|
||
moveq.l #4,temp1 ;alphabetic code
|
||
tst.b doAlphanumeric(pBlok) ;are numerics legal?
|
||
beq.s @aloopA ;no
|
||
@aloopN
|
||
cmp.w temp1,cSym ;is it alpha?
|
||
beq.s @1 ;no, but it might be numeric
|
||
cmpi.w #5,cSym ;is it numeric?
|
||
beq.s @1 ;no, but could be alternate numeric
|
||
cmpi.w #11,cSym
|
||
bne.s @done
|
||
@1 NextSym
|
||
bra.s @aloopN ;and re-enter loop
|
||
@aloopA
|
||
cmp.w temp1,cSym ;is it alpha?
|
||
bne.s @done ;nope so we're done
|
||
NextSym
|
||
bra.s @aloopA
|
||
@done
|
||
swap cSym ;repair cSym order
|
||
DoToken #4 ;create alpha token
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
leftLit1
|
||
move.l rightDelims(pBlok),rd ;get matching delimiter
|
||
swap rd ;match was in upper word
|
||
bra.s litMain
|
||
leftLit
|
||
move.l rightDelims(pBlok),rd ;get matching delimiter
|
||
litMain
|
||
move.w escapeCode(pBlok),escRD ;need escape code
|
||
DoToken #2 ;create leftDelim token
|
||
movea.l lastA,holdA ;save beginning of literal string
|
||
swap cSym ;look ahead
|
||
@litLoop
|
||
cmp.w cSym,rd ;see if we found an end delimiter
|
||
beq @endlit ;sure 'nuff
|
||
cmp.w cSym,escRD ;see if it's an escape
|
||
bne.s @1 ;nope, continue
|
||
DoToken #9 ;portion of literal before escape
|
||
movea.l lastA,holdA ;beginning of escape code
|
||
BaseSym
|
||
bmi badDelER ;don't stop now
|
||
DoToken #10 ;escape code
|
||
movea.l lastA,holdA ;beginning of symbol following escape
|
||
NextSym
|
||
swap cSym
|
||
DoToken cSym ;symbol following escape
|
||
movea.l lastA,holdA ;beginning of rest of literal
|
||
swap cSym
|
||
bra.s @litloop ;continue
|
||
@1 tst.w cSym ;make sure we've not exhausted the source
|
||
bmi badDelER ;hanging delimiter
|
||
NextSym ;get NextSym
|
||
bra @litLoop ;continue
|
||
@endLit
|
||
DoToken #9 ;create literal token
|
||
move.l leftDelims(pBlok),rd ;repair delim and comment regs
|
||
move.l leftComment(pBlok),escRD ;escRD is lc1
|
||
move.l lastA,holdA ;need address of closing delimiter
|
||
GetSym ;start filling pipe
|
||
DoToken #3 ;create right delimiter token
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
|
||
leftCom2A
|
||
move.l rightComment+4(pBlok),rc ;get matching delimiter
|
||
move.w #1,dTab ;this reg becomes the nest counter
|
||
bra.s comMain2
|
||
leftCom2
|
||
move.l lc1,lc2 ;lc2 becomes info for nested comment
|
||
move.w #1,dTab ;this reg becomes the nest counter
|
||
move.l rightComment(pBlok),rc
|
||
comMain2
|
||
GetSym ;NextSym
|
||
DoToken #7 ;create left comment symbol
|
||
move.l lastA,holdA ;beginning of literal string
|
||
tst.l rc ;see if right delim is 1 or 2 symbols
|
||
bmi.s endCom1 ;branch accordingly
|
||
bra endCom2
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
leftCom1A
|
||
move.l rightComment+4(pBlok),rc ;get matching delimiter
|
||
bra.s comMain1
|
||
leftCom1
|
||
move.l rightComment(pBlok),rc
|
||
comMain1
|
||
move.w #1,dTab ;for compatibility reasons (nesting)
|
||
DoToken #7 ;create left comment symbol
|
||
move.l lastA,holdA ;beginning of literal string
|
||
tst.l rc ;see if right delim is 1 or 2 symbols
|
||
bmi.s endCom1 ;branch accordingly
|
||
bra.s endCom2
|
||
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
endCom1
|
||
swap cSym ;look at the nextSym
|
||
@cloop
|
||
cmp.w cSym,rc ;check for end
|
||
beq.s @end
|
||
NextSym ;go for the next symbol
|
||
bpl.s @cloop ;may have run out before delimiter
|
||
bra badDelER
|
||
@end DoToken #9 ;create the literal token
|
||
movea.l lastA,holdA ;start of end delimiter
|
||
NextSym ;get one beyond end delimiter
|
||
swap cSym ;fix the order
|
||
move.l leftDelims(pBlok),rc ;repair register
|
||
DoToken #8 ;create end delimiter
|
||
move.l leftComment+4(pBlok),lc2
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
bra main
|
||
|
||
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
endCom2
|
||
@cloop
|
||
move.l lastA,temp1 ;effectively a 2-symbol look-behind
|
||
GetSym ;try the next puppy
|
||
cmp.l cSym,lc2 ;check for nested comment
|
||
beq.s @nest
|
||
@nogo
|
||
cmp.l cSym,rc ;check for end delimiter sequence
|
||
beq.s @end
|
||
tst.w cSym ;may have run out of symbols
|
||
bpl.s @cloop ;still okay
|
||
bra badDelER
|
||
@nest
|
||
tst.b doNest(pBlok) ;how's the weather?
|
||
beq.s @nogo
|
||
move.l temp1,lastA ;do literal token
|
||
DoToken #9
|
||
move.l temp1,holdA ;start of nested left comment
|
||
addq.w #1,dTab ;nest counter
|
||
bra comMain2
|
||
@end
|
||
move.l temp1,lastA ;fix up the pipeline
|
||
DoToken #9 ;create literal
|
||
move.l temp1,holdA ;start address of right delimiter
|
||
GetSym ;start up normal piping again
|
||
DoToken #8 ;create right delimiter
|
||
movea.l lastA,holdA ;start of whatever follows
|
||
subq.w #1,dTab ;see how the nesting is…
|
||
cmpa.w #0,dTab
|
||
bne endCom2 ;…and continue if necessary
|
||
move.l leftDelims(pBlok),rc ;repair register
|
||
move.l leftComment+4(pBlok),lc2
|
||
|
||
GetSMgrCore dTab ; <4/27/88ldc><3>
|
||
adda #smgrRecord.delimMap, dTab ; <4/27/88ldc><3>
|
||
|
||
bra main
|
||
|
||
;-------------------------------------------------------------------------------
|
||
altnum
|
||
swap cSym ;check NextSym for numeric
|
||
moveq.l #11,temp1 ;numeric code
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done ;not numeric; bail out
|
||
NextSym
|
||
bra.s @nloop ;cycle again
|
||
@done
|
||
cmp.w decimalCode(pBlok),cSym ;could be real number
|
||
beq.s realAlt
|
||
swap cSym ;repair cSym order
|
||
bra cont
|
||
|
||
;-------------------------------------------------------------------------------
|
||
realNumber ;entry from altNum or numeric
|
||
NextSym
|
||
moveq.l #5,temp1
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done
|
||
NextSym
|
||
bra.s @nloop
|
||
@done
|
||
swap cSym
|
||
move.w #12,cSym ;code for real number
|
||
bra cont
|
||
;-------------------------------------------------------------------------------
|
||
realAlt ;entry from altNum or numeric
|
||
NextSym
|
||
moveq.l #11,temp1
|
||
@nloop
|
||
cmp.w temp1,cSym
|
||
bne.s @done
|
||
NextSym
|
||
bra.s @nloop
|
||
@done
|
||
swap cSym
|
||
move.w #13,cSym ;code for real number
|
||
bra cont
|
||
;-------------------------------------------------------------------------------
|
||
TokSymbol
|
||
swap cSym ;we'll be checking NextSym
|
||
cmpi.w #33,cSym ;is NextSym "="?
|
||
bne @cont
|
||
|
||
swap cSym
|
||
cmpi.w #31,cSym ;check for <=
|
||
bne.s @c1
|
||
makeSym #34
|
||
@c1
|
||
cmpi.w #32,cSym ;check for >=
|
||
bne.s @c2
|
||
makeSym #36
|
||
@c2
|
||
cmpi.w #68,cSym ;check for :=
|
||
bne.s @c3
|
||
makeSym #39
|
||
@c3
|
||
cmpi.w #33,cSym ;check for ==
|
||
bne.s @c4
|
||
makeSym #38
|
||
@c4
|
||
cmpi.w #43,cSym ;check for !=
|
||
bne.s @cont1 ;if that's not it then it ain't nothin'
|
||
makeSym #42
|
||
@cont
|
||
swap cSym ;re-adjust it
|
||
cmpi.l #$001F0020,cSym ;is it "<>"?
|
||
bne.s @cont1
|
||
makeSym #41
|
||
@cont1
|
||
move.l lastA,temp1 ;was not a composite symbol
|
||
sub.l holdA,temp1 ;compute size
|
||
bra cont ;get back into the thick of things
|
||
|
||
;-------------------------------------------------------------------------------
|
||
delimRun
|
||
cmp.w cSym,ld ;check for left delimiter
|
||
beq leftLit
|
||
swap ld ;check for other delimiter
|
||
cmp.w cSym,ld
|
||
beq leftLit1
|
||
swap ld
|
||
tst.l lc1 ;is comment sequence two symbols?
|
||
bpl.s @1
|
||
cmp.w cSym,lc1 ;check for left comment sequence
|
||
bne.s @2
|
||
bra leftCom1
|
||
@1 cmp.l cSym,lc1
|
||
beq leftCom2
|
||
@2 tst.l lc2 ;is other comment sequence two symbols?
|
||
bpl.s @3
|
||
cmp.w cSym,lc2 ;check for other left comment
|
||
beq leftCom1A
|
||
bra notDelim
|
||
@3 cmp.l cSym,lc2
|
||
beq leftCom2A
|
||
bra notDelim
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;itl4 call interfacing
|
||
;
|
||
extendFetch
|
||
tst.l srcL ;did we come because of a double-byter?
|
||
bpl.s @1 ;yes.
|
||
move.w #-1,cSym ;we need a full word negative for dTab's benefit
|
||
rts ;we came because source ran out; return home
|
||
@1
|
||
move.l mapExtendOffset(Map),temp ;offset of the extended fetch routine
|
||
jmp (Map,temp.l) ;finish the fetch
|
||
|
||
StringCopy
|
||
movea.l itlResource(pBlok),dTab ;get the itl resource block
|
||
move.l (dTab),dTab ;dereference! <1/7/88med>
|
||
move.l strOffset(dTab),temp ;offset of the stringCopy
|
||
jmp (dTab,temp.l) ;finish the fetch
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;Tail End
|
||
;-------------------------------------------------------------------------------
|
||
endParse
|
||
moveq.l #0,temp ;status if nothing went wrong
|
||
cleanUp
|
||
move.w temp, -(sp) ;save status
|
||
move.l toks,temp ;find out how many tokens we created
|
||
sub.l tokenList(pBlok),temp
|
||
divs #tokenRecSize,temp ;divide by size of TokenRec record
|
||
move.l temp,tokenCount(pBlok) ;save the value (instead of whales)…
|
||
;…if something went wrong then upper word is non-zero
|
||
tst.b doString(pBlok) ;do we need to create string list?
|
||
beq.s @end1
|
||
bsr.s StringCopy
|
||
@end1
|
||
move.w (sp)+,d0 ;status report
|
||
addq.l #4,sp ;yank off the token start address
|
||
movem.l (sp)+,d2-d7/a2-a6 ;restore it
|
||
CheckA6 ; check stack
|
||
unlk a6
|
||
movea.l (sp)+,a0 ;return address
|
||
add.l #argSize,sp ;dump original parameters
|
||
move.b d0,(sp) ;status report
|
||
jmp (a0) ;go home
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;IntlTokenize error branches
|
||
;-------------------------------------------------------------------------------
|
||
|
||
tokenOVER
|
||
move.w #tokenOverflow,temp
|
||
bra.s cleanUp
|
||
|
||
badDelER
|
||
move.w #badDelim,temp
|
||
bra.s cleanUp
|
||
|
||
unknownER
|
||
move.w #crash,temp
|
||
bra.s cleanUp
|
||
|
||
;-------------------------------------------------------------------------------
|
||
;Common routines for code savings <5>
|
||
;-------------------------------------------------------------------------------
|
||
|
||
NextSymRoutine ; <5>
|
||
swap cSym ; <5>
|
||
BaseSymRoutine ; <5>
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
rts ; <5>
|
||
|
||
GetSymRoutine ; <5>
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
swap cSym ; <5>
|
||
rts ; <5>
|
||
|
||
DoTokenRoutine
|
||
; has parameter in temp
|
||
;move first subtraction below <5>
|
||
subq.l #1,tokL ;decrement available size of token space
|
||
bmi.s toTokenOVER ;ran out of room <5>
|
||
move.w temp,(toks)+ ;token <5>
|
||
move.l holdA,(toks)+ ;adr in srcA of token start
|
||
move.l lastA,temp ;find length of token <5>
|
||
sub.l holdA,temp ; <5>
|
||
move.l temp,(toks)+ ;size of token string
|
||
clr.l (toks)+ ;space for string ptr
|
||
rts ; <5>
|
||
toTokenOVER
|
||
addq #4,sp ; strip DoTokenRoutine return address… <5>
|
||
; …or makeSymRoutine parameter.
|
||
bra tokenOVER ; go to err handling routine <5>
|
||
|
||
makeSymRoutine
|
||
; has DoToken parameter on the stack.
|
||
;GetSym part
|
||
movea.l srcA,lastA ;save start of symbol
|
||
move.b (srcA)+,cSym ;start 2-mappedChar pipe
|
||
subq.l #1,srcL ;pay the dues
|
||
bmi.s @999
|
||
move.b 0(Map,cSym.w),cSym ;map the character
|
||
bpl.s @998
|
||
@999
|
||
bsr extendFetch ;we've either run out of input or it's 2-byte,
|
||
@998 ; and srcL tells which
|
||
swap cSym ; <5>
|
||
;DoToken part
|
||
subq.l #1,tokL ;decrement available size of token space
|
||
bmi.s toTokenOVER ;ran out of room <5>
|
||
move.w (sp)+,(toks)+ ;token <5>
|
||
move.l holdA,(toks)+ ;adr in srcA of token start
|
||
move.l lastA,temp ;find length of token <5>
|
||
sub.l holdA,temp ; <5>
|
||
move.l temp,(toks)+ ;size of token string
|
||
clr.l (toks)+ ;space for string ptr
|
||
;don't return, just branch
|
||
bra main
|
||
|
||
ENDWITH
|
||
ENDF
|
||
END
|
||
|