;
;	File:		ScriptMgrUtilText.a (formerly SMgrUtilText.a)
;
;	Contains:	Script Mgr text utilities
;
;	Written by:	MED	Mark Davis
;				DRS	daan Strebe
;				LDC	Lee Collins
;				PKE	Peter Edberg
;
;	Copyright:	© 1987-1992 by Apple Computer, Inc., all rights reserved.
;
;   This file is used in these builds:   Mac32 System CubeE
;
;	Change History (most recent first):
;
;		<11>	11/17/92	PKE		Add include of new IntlResourcesPriv.a for IntlTokenize equates.
;		<10>	 6/17/92	HA		#1029756,<PKE>: Made sure that we stuff a dummy address in the
;									ram based vector table for StyledLineBreak after removing it
;									from ptch 4 and adding it to ptch 27.
;		 <9>	 4/30/92	FM		Remove obsolete conditionals:shrinkIntlTokenize, smgrUsesStdExit
;		 <8>	 9/20/91	PKE		Remove CubeE conditional, since the new plan (according to
;									Darin) is that everything for Cube-E goes in without
;									conditionals. While I'm at it, get rid of some ">=$604",
;									">=$605", and ">=$700" conditionals.
;		 <7>	 9/15/91	PKE		smb, #1011961: For Bruges/Cube-E, roll in from 6.1 - Fix
;									trashing of d3 by GetFormatOrder.
;
;		 <6>	 9/14/90	BG		Removed <4>. 040s are now behaving more reliably.
;		 <5>	 7/23/90	PKE		Changed IntlTokenize (under control of shrinkIntlTokenize
;									symbol) to use BSRs instead of macros with inline code. On
;									Plus/SE/II, this saves 620 bytes but makes IntlTokenize 13%
;									slower, so we may reverse this.
;		 <4>	 7/17/90	BG		Added EclipseNOPs for flakey 040s.
;		 <3>	 4/10/90	PKE		Deleted conditionalized definitions of forROM, SysVers,
;									Scripts604, and TestScriptManager. Used smgrUseStdExit,
;									smgrSysVers, and smgrROMVers symbols instead of buildLevel.
;		 <2>	  1/4/90	PKE		Updated conditionals to put SS-6.0.4 changes in 6.0.5 as well as
;									7.0. Updated header to BBS format.
;		 <1>	12/18/89	CCH		Adding for the first time into BBS. Changed include 'inc.sum.a'
;									to load 'StandardEqu.d'. Updated file name references.
;
;	(BBS versions above, EASE versions below:)
;	   <2.2>	11/27/89	PKE		Use bug fix in <1.8> for Scripts604 build too.
;	   <2.1>	 8/26/89	PKE		Cleaned up some conditionals, moved a comment.
;	   <2.0>	 8/22/89	SES		Removed references to nFiles.
;	   <1.9>	 7/12/89	PKE		NEEDED FOR AURORA: In StyledLineBreak, replace 'mulu #4' with
;									'lsl.w #2' for efficiency.
;	   <1.8>	 6/27/89	PKE		NEEDED FOR AURORA - Fix GetFormatOrder bug discovered by Sue:
;									If firstFormat=lastFormat, it did a move.l (instead of .w) of
;									firstFormat into the first word of the ordering array. This
;									move was redundant anyway, so I just skipped it.
;	   <1.7>	 6/23/89	PKE		Skip definition of buildLevel (done in ScriptPriv.a).
;	   <1.6>	  6/9/89	PKE		Change StyledLineBreak to use input offset value as per
;									documentation: if 0, returned offset >= 1 and char breaks
;									possible; otherwise, returned offset >= 0 and char breaks not
;									possible.
;	   <1.5>	 5/30/89	PKE		Fixed bug in StyledLineBreak for 2-byte characters.
;	   <1.4>	 5/24/89	EMT		Renamed alpha symbol to not conflict with development stage.
;	   <1.3>	 2/21/89	PKE		Replaced with RomProj version, which already had system and Rom
;									sources merged.
;	(EASE ROMproj history below:)
;	   <1.6>	 2/21/89	PKE		Fix up LOADs: always use include 'StandardEqu.d'.
;				 2/21/89	pke		Fix up includes: always use include 'inc.sum.a'.
;	   <1.5>	 2/14/89	PKE		Fixed bugs in LineBreak/FindCarriage that broke new TextEdit
;	   <1.4>	 2/14/89	PKE		Updated to use MPW 3.0 final ScriptEqu.a equate names
;	   <1.3>	  2/3/89	PKE		Merged with current system sources (CCH's 01/16/1989 merge of
;									6.0.3 and 7.0 sources, done in RES.sys:smgr)
;				  2/2/89	pke		Merged system sources and ROM sources
;	   <1.2>	11/14/88	PKE		Synchronize EASE and Projector
;	   <1.1>	11/11/88	CCH		Fixed Header.
;	   <1.0>	 11/9/88	CCH		Adding to EASE.
;	(old EASE ROMproj history below:)
;	   <1.7>	10/27/88	LDC		Mods by Carl Hewitt and Brian McGhie in the process of putting
;									Script Manager in ROM
;	(EASE SYSproj history below:)
;	   <1.2>	 2/14/89	PKE		Updated to use MPW 3.0 final ScriptEqu.a equates
;	   <1.1>	 1/16/89	CCH		Merged 6.0.3 final sources into 7.0.
;	   <1.0>	11/16/88	CCH		Added to EASE.
;	(pre-EASE history; recent changes here went into EASE ROMproj, not SYSproj:)
;				 9/16/88	ldc		fixed StyledLineBreak to get correct itlb values
;				 6/16/88	ldc		added StdUnlink
;				 6/14/88	ldc		added check for valid token value [0..tokenNil)
;				 6/13/88	ldc		removed delimMap for move to ROM
;			changes above here are for ROM or buildLevel >= 2
;				 2/10/88	med		Changed GetFormatOrder pascal interface
;				 2/10/88	med		LineRight parameter is high byte from pascal
;				  2/9/88	med		Change pascal interface
;				  1/7/88	med		itlResource loaded if purged; minor changes in Tokenizer
;				  1/4/88	med		itlResource in Tokenizer is now handle
;				11/20/87	med		Changed CheckSelector
;				11/16/87	med		Minor cleanup of IntlTokenize
;				10/28/87	med		Added IntlTokenize Routine
;				10/26/87	med		Added StyledLineBreak algorithm, FormatOrder
;___________________________________________________________________________________________________
;	To Do:
;
;___________________________________________________________________________________________________

debugging	equ	0

		BLANKS	ON
		STRING	ASIS

		load	'StandardEqu.d'
		include	'ScriptPriv.a'
		include	'IntlResourcesPriv.a'			;			<11>


		export	FormatOrder,IntlTokenize		; smSelStyledLineBreak
	
;____________________________________________________________
;
; StyledLineBreak Algorithm
;
; by Mark E. Davis
;
; Modification History
;  
; 7/25/87 med	Finished first version of line-break
; 7/28/87 med	Pretty final version for AIS, needs truncation fixing
; 7/29/87 med	Added check for CR
; 7/31/87 med	CR cannot be part of white-space
; 7/31/87 med	Add check for old SISs with bad word-break table
;<8/2/87med>	Use direct calls on script routines
;<8/2/87med>	Moved CheckFont
;<8/4/87med>	Use blockflags instead of sisReverse
;____________________________________________________________

;____________________________________________________________
; Routine	StyledLineBreak (
;				textPtr:		Ptr;
;				textLen: 		Longint;	{must be < 32K ;<2/14/89 pke>}
;				textStart:		Longint;
;				textEnd:		Longint;
;				flags:			Longint;
;				var textWidth:	Fixed;		{on exit, set if too long}
;				var textOffset:	Longint;
;			):					LineCode;
;____________________________________________________________

	IF 0 THEN

LineBreakFrame	record	{oldA6},decrement
resultSize		equ	2
result			ds.w	1
argSize			equ		*-8
textPtr			ds.l	1
textLen			ds.l	1
textStart		ds.l	1
textEnd			ds.l	1
flags			ds.l	1
textWidth		ds.l	1
textOffset		ds.l	1
selector		ds.l	1
return			ds.l	1
oldA6			ds.l	1
offsets			ds.l	3
leftSide		ds.b	1
savedReverse	ds.b	1
lineDirection	ds.w	1
oldBlockFlags	ds.w	1			; <8/4/87med>
localFrame		equ	*
				endR

FixMinusOne	equ	$FFFF0000

LineBreakRegs	reg	a2-a3/d4-d6
;____________________________________________________________

	IF 0 THEN
; StyledLineBreak Equates

smcTypeMask		equ	$000F
smcReserved		equ	$00F0
smcClassMask	equ	$0F00
smcReserved12	equ	$1000
smcRightMask	equ	$2000
smcUpperMask	equ	$4000
smcDoubleMask	equ	$8000

; CharType character types.

smCharPunct		equ	0
smCharAscii		equ	1
smCharEuro		equ	7

; CharType punctuation types.

smPunctNormal	equ	$0000
smPunctNumber	equ	$0100
smPunctSymbol	equ	$0200
smPunctBlank	equ	$0300				; NBSP is PunctNormal
	ENDIF
;____________________________________________________________

StyledLineBreak	proc

		import	FindCarriage
		import	StdUnlink

; setup
		with	LineBreakFrame,smgrRecord,scriptRecord, ItlbRecord
		CheckSelector
		link	a6,#localFrame
		movem.l	LineBreakRegs,-(sp)

; common args
		move.l	textWidth(a6),a3			; get @width
		move.l	textPtr(a6),d5				; pointer
		add.l	textStart(a6),d5			; pointer+start

; save environment and reset
;!!! generalize for left-right
		move.w	teSysJust,lineDirection(a6) ; save just
		move.w	#0,teSysJust				; set to LR

; get current script
		move.w	#0,-(sp)					; allocate _FontScript return
		_FontScript							; get current script
		move.w	(sp)+,d0					; current script
 
; get core globals and test: inline instead of trap, for speed
 
		lsl.w	#2,d0						; make script code a longword offset		<1.9>
		GetSmgrCore	a2							; smgr core globals
		move.l	smgrRecord.smgrEntry(a2,d0),a2	; core location
		
;<9/16/88ldc>	changed following two lines to access correct itlb values		<06/09/89 pke><3>
 		move.b	ScriptRecord.scriptBundle.itlbFlags(a2),oldBlockFlags(a6)	; <9/16/88ldc>
		bclr.b	#smsfReverse,ScriptRecord.scriptBundle.itlbFlags(a2)	; set left-right <8/4/87med>		; <9/16/88ldc>

; quick check on length
		move.l	textEnd(a6),d4				; get length
		sub.l	textStart(a6),d4			; segment length
		ble		@TooLong					; if zero, bail
		tst.l	(a3)						; no width (degenerate)
; changed this so we handle blanks at start of 0-width line.					<06/09/89 pke><3>
		bmi		@FixNoChars					; fix neg width

; get offset
;!!! fix long length later
		move.w	#0,-(sp)					; return space
		move.l	d5,-(sp)					; pointer
		move.w	d4,-(sp)					; length (mod)
		move.w	#0,-(sp)					; slop (truncate)
		move.w	(a3),-(sp)					; width (truncate)
		pea		leftSide(a6)				; @leftSide
 		_Pixel2Char							; use trap
		move.l	#0,d6						; longize
		move.w	(sp)+,d6					; get offset

; if width is zero, fix leftSide for FindWord etc. (change for 604)				<06/09/89 pke><3>
		tst.l	(a3)						; zero width?
		bne.s	@doneFixLeftSide			;
		st		leftSide(a6)				; if so, fix leftSide
@doneFixLeftSide

; Check for CR <7/29/87med>

		move.l	d5,a0						; text ptr
		move.w	d6,d0						; copy of non-offsetted length
		bsr		FindCarriage				; a0,d0.w: return d0.b=t/f, length in d1.w
		add.w	textStart+2(a6),d1			; need offset from textPtr			<added 2/14/89 pke>
		tst.b	d0							; found?
		bne		@ReturnWord					; yes, return

; check for position being too long

		move.l	d6,d0						; copy un-offsetted length
		add.l	textStart(a6),d6			; get char offset

; if offset > length, return width of text

		cmp.l	d4,d0						; position at end?
		blt.s	@GotCharPos					; no, skip
		tst.b	leftSide(a6)				; leftSide?
		bne.s	@TooLong					; yes, too long
@GotCharPos

; get word around char offset

		move.l	textPtr(a6),-(sp)			; ptr
		move.w	textLen+2(a6),-(sp)			; length (mod)
		move.w	d6,-(sp)					; offset (truncate)
		move.b	leftSide(a6),-(sp)			; leftSide
		move.l	MinusOne,-(sp)				; line break table
		pea		offsets(a6)					; offset table
 		_FindWord							; use trap

; test character to see if it is white-space
; Mark suggests checking to see if VisibleLength would work better here… <06/09/89 pke>

		move.w	#0,-(sp)					; allocate return
		move.l	textPtr(a6),-(sp)			; ptr
		move.w	offsets(a6),-(sp)			; offset
 		_CharType							; use trap
		move.w	(sp)+,d0					; get type
		move.l	#0,d1						; longize
		move.w	offsets(a6),d1				; assume second offset ***
		and.w	#smcTypeMask+smcClassMask,d0	; type and class
		cmp.w	#smCharPunct+smPunctBlank,d0	; blank?
		bne.s	@ReturnOffset					; no, continue
		btst.b	#smsfReverse,oldBlockFlags(a6)	; test reverse block flag			<8/4/87med>
		sne		d0								; 0/-1 left/right					<8/4/87med>
		cmp.b	LineDirection+1(a6),d0		; line = char? (bottom byte of line)	<8/4/87med>
		bne.s	@ReturnOffset				; yes, skip to end

; here, word is whitespace and direction is ok (script direction = line direction),
; so return second offset.

; add check for CR for old SISs with bad word-break table.						<7/31/87med>
; All new SISs should be fixed, so we should be able to skip the FindCarriage
; stuff here and just set d1 to offsets+2(a6).									<06/09/89 pke>

		move.l	textPtr(a6),a0				; get ptr

		moveq	#0,d6						; make long							<2/14/89 pke><3>
		move.w	offsets(a6),d6				; get first offset
		add.l	d6,a0						; now at first offset				<2/14/89 pke><3>

		move.w	offsets+2(a6),d0			; take second offset
		sub.w	d6,d0						; now length
		bsr		FindCarriage				; find in d1
		add.w	d6,d1						; d1 either after cr or at end
		bra.s	@ReturnWord					; return

@TooLong
; return width
		move.w	#0,-(sp)					; return space
		move.l	d5,-(sp)					; pointer
		move.w	#0,-(sp)					; firstByte
		move.w	d4,-(sp)					; length
		_TextWidth
		move.w	(sp)+,d0					; pop width
		swap	d0							; swap for long
		clr.w	d0							; longize
;		move.l	(a5),a0						; graf globals
;		move.l	fontAdj(a0),d0				; get fixed width
		sub.l	d0,(a3)						; final width
		
		move.l	textEnd(a6),d6				; set return
		move.l	#smBreakOverflow,d0			; set type
		bra.s	@Done						; return
		
@ReturnOffset
		tst.l	d1							; break at start?
		bne.s	@ReturnWord					; yes, return char

		move.l	textOffset(a6),a0			; get @textOffset					<06/09/89 pke><3>
		tst.l	(a0)						; was offset 0 at entry?			<06/09/89 pke><3>
		beq.s	@ReturnWord					; if so, go return zero offset		<06/09/89 pke><3>

; have to break word, so backup until length is less.

; The most we have to backup is one char, and we only have to do that if		<06/09/89 pke><3>
; leftSide is false <06/09/89 pke>. We also need to catch the case of d6=0.

		tst.b	leftSide(a6)				; leftSide?
		bne.s	@checkNullOffset			; if T, skip decrement
		sub.l	#1,d6						; decrement
		clr.w	-(sp)						; allocate return
		move.l	d5,-(sp)					; pass text ptr
		move.l	d6,d0						; cur position
		sub.l	textStart(a6),d0			; start of real text
		move.w	d0,-(sp)					; count (mod!!!)
		_CharByte
		tst.w	(sp)+						; return 0=single,-1=first,1=last
		ble.s	@checkNullOffset			; bail if -1 or 0, ok
		sub.l	#1,d6						; fix for 2-byte chars
@checkNullOffset:
		tst.l	d6							; are we still >0?
		bgt.s	@ReturnChar					; if so, go return a char
		bra.s	@FixNoChars1				; don't redo test for orig offset 0

@FixNoChars	
		move.l	textOffset(a6),a0			; get @textOffset				<06/09/89 pke><4>
		tst.l	(a0)						; was offset 0 at entry?		<06/09/89 pke><4>
		bne.s	@FixNoChars1				; if not, return 1 char			<06/09/89 pke><4>
		moveq	#0,d1						; if so, set zero offset		<06/09/89 pke><4>
		bra.s	@ReturnWord					; and go return word break		<06/09/89 pke><4>
@FixNoChars1	

; string is now zero length, make it at least 1 char

		move.l	#1,d6						; include first char

		clr.w	-(sp)						; allocate return				<05/30/89 pke><3>

		move.l	d5,-(sp)					; pass text ptr
		move.w	#1,-(sp)					; offset
		_CharByte

		tst.w	(sp)+						; return 0=single,-1=first,1=last	<05/30/89 pke><3>

		ble.s	@ReturnChar					; bail if -1 or 0, ok
		add.l	#1,d6						; fix for 2-byte chars

@ReturnChar
		move.l	#smBreakChar,d0				; so, word too long.
		bra.s	@Done						; exit

@ReturnWord
		moveq	#0,d6						; make long						<2/14/89 pke><3>
		move.w	d1,d6						; set to word bound				<2/14/89 pke><3>

		move.l	#smBreakWord,d0				; break at word

@Done
		move.l	textOffset(a6),a0			; get @textOffset
		move.l	d6,(a0)						; set to char position
		move.b	d0,result(a6)				; set result type
		
; restore environment
; <9/16/88ldc> fixed following line

 		move.b	oldBlockFlags(a6),ScriptRecord.scriptBundle.itlbFlags(a2)	;	<9/16/88ldc>	use for 6.0.4 too <06/09/89 pke><3>

 		move.w	lineDirection(a6),teSysJust	; restore just

; cleanup
		movem.l	(sp)+,LineBreakRegs
		CheckA6
				
		move	#argSize, d0				; for StdUnlink <6/16/88ldc>
		bra		StdUnlink					; standard exit <6/16/88ldc>
		endWith
		endProc
		
		ENDIF


;____________________________________________________________
;	PROCEDURE FormatOrder (
;			ordering :		FormatOrderPtr;
;			firstFormat :	Integer;
;			lastFormat :	Integer;
;			lineRight :		Boolean;
;			FUNCTION DirProc (theFormat : Integer; dirParam : Ptr) : Boolean;
;			dirParam :		Ptr
;		);
;____________________________________________________________

FormatOrderFrame	record	{oldA6},decrement
resultSize		equ		0
argSize			equ		*-8
ordering		ds.l	1
firstFormat		ds.w	1
lastFormat		ds.w	1
filler			ds.b	1
lineRight		ds.b	1	; high byte from pascal <2/10/88med>
DirProc			ds.l	1
;;filler2		ds.l	1	; why does pascal want this? Changed pascal interface <2/9/88med>
dirParam		ds.l	1
selector		ds.l	1
return			ds.l	1
oldA6			ds.l	1

localFrame		equ	*
				endR

FormatOrderRegs	reg	a2-a3/d3-d6				;									<7>
;____________________________________________________________

FormatOrder	proc

		import	StdUnlink

		with	FormatOrderFrame
		CheckSelector
		
		link	a6,#localFrame
		movem.l	FormatOrderRegs,-(sp)

; consistency check

		move.l	ordering(a6),a2				; get ordering
		move.w	firstFormat(a6),d3			; get start
		move.w	lastFormat(a6),d4			; get end
		cmp.w	d3,d4						; same?

		ble.s	@Exit						; lastFormat ≤ firstFormat, leave array alone.	<1.8><2.2><3>

@FillArray
;	{fill the array with initial values, going in lineDirection order}

		move.l	DirProc(a6),a3				; get dirProc
		move.l	#2,d5						; set pointer increment
		move.b	lineRight(a6),d6			; right-left?
		beq.s	@InvertLoop					; no, continue
		move.w	d4,d0						; get last element
		sub.w	d3,d0						; last-first
		add.w	d0,d0						; for word array
		add.w	d0,a2						; last element
		neg.w	d5							; invert pointer increment

@InvertLoop
		clr.b	-(sp)						; allocate return
		move.w	d3,-(sp)					; pass i
		move.l	dirParam(a6),-(sp)			; pass dirParam
		jsr		(a3)						; call it
		move.w	d3,d1						; set temp
		cmp.b	(sp)+,d6					; get result: same as lineRight?
		beq.s	@1							; yes, skip
		not.w	d1							; invert
@1
		move.w	d1,(a2)						; set value
		add.w	d5,a2						; increment array pointer
		add.w	#1,d3						; i := i+1
		cmp.w	d3,d4						; same?
		bge.s	@InvertLoop					; no, keep looping

;	{walk through the array, reversing the odd direction clusters (inverted)}
@WalkArray
		sub.w	firstFormat(a6),d4			; now # elements + 1
		move.l	#-1,d5						; mark as no first backwards element
		clr.w	d3							; i := 0
		move.l	ordering(a6),a2				; get ordering
		move.l	a2,a3						; for increment
@RevLoop
		tst.w	(a3)+						; get value
		bge.s	@BadOrder					; correct order
		tst.w	d5							; have first backwards element?
		bge.s	@RevContinue				; yes, continue
		move.w	d3,d5						; mark first backwards element
		bra.s	@RevContinue				; continue
@BadOrder
		tst.w	d5							; have first backwards element?
		blt.s	@RevContinue				; no, continue
		bsr.s	ReverseCluster				; ReverseCluster(j, i - 1)
		move.l	#-1,d5						; mark no first backwards element.
@RevContinue
		add.w	#1,d3						; inc
		cmp.w	d3,d4						; at end?
		bge.s	@RevLoop					; no, continue

		tst.w	d5							; got one?
		blt.s	@Exit						; no, continue
		bsr.s	ReverseCluster				; ReverseCluster(j, limit);

@Exit
; cleanup
		movem.l	(sp)+,FormatOrderRegs
 		CheckA6

		move	#argSize, d0				; for StdUnlink					<6/16/88ldc>
		bra		StdUnlink					; standard exit					<6/16/88ldc>


;____________________________________________________________
; PROCEDURE ReverseCluster (first, last : Integer);
; e.g. d5=3, d3 = 6 should reverse 3,4,5
;____________________________________________________________
ReverseCluster

		move.w	d3,d2					; get last + 1
		sub.w	d5,d2					; diff = first - last
		sub.w	#1,d2					; sum -1 
		asr.w	#1,d2					; middle = (sum-1)/2 (6,3 => 
		move.w	d5,d0					; get j
		add.w	d0,d0					; double for word address
		lea		0(a2,d0.w),a0			; end address
		move.w	d3,d0					; get i
		add.w	d0,d0					; double for word address
		lea		0(a2,d0.w),a1			; end address (after end of element)
; e.g. for 3,4 items, dbra = 1 (two iterations);
		
@RevClusLoop							; reverse outer elements
		move.w	(a0),d0					; temp = ordering^[i]
		not.w	d0						; restore it
		move.w	-(a1),d1				; temp2 = ordering^[j]
		not.w	d1						; restore it
		move.w	d0,(a1)					; mix
		move.w	d1,(a0)+				; ditto
		dbra	d2,@RevClusLoop			; til done

; exit
		rts
		endProc
;____________________________________________________________
; Routine IntlTokenize
;____________________________________________________________
			
;The tokenizer's job is to lexically analyze a block of characters
;(irrespective of script) and return a block of standardized tokens. The
;character block is interpreted as an expression from any normal computer
;language.
;The tokenizer lexes the following atoms:
;	Unknown symbol		(a character outside the tokenizer's vocabulary)
;	White space			(consecutive white space characters generate one token)
;	Alphabetic			(sequence of textual characters (and, optionally, digits))
;	Numeric				(sequence of digits)
;	Alternate numeric	(digits from foreign scripts)
;	Real numeric		(digits, decimal, [digits])
;	Newline	
;	Comment				(delimiters chosen by application)
;	Literal				(with delimiters chosen by the application)
;	Escape character	(disable special meaning of following character)
;	Symbols				(non-alphanumeric special symbols)
;	End of Parse		(source string ran out)
;--------------------------------------------------------------------------
;The following symbols are unique tokens:
;	(	)	[	]	{	}	«	»	+	-	*	÷	±	/	\	>	<
;	=	<=	≤	>=	≥	==	:=	≠	<>	!=	!	~	,	.	“	”	‘
;	’	"	'	;	%	^	_	&	@	|	?	π	√	∑	∫	µ	∏
;	∞	:	#	$
;--------------------------------------------------------------------------
;The tokenizer is a function:
;	FUNCTION IntlTokenize( tokenParam: tokenBlock ): tokenResult;
;where
;	tokenResult = (tokenOK, tokenOverflow, stringOverflow, badDelim, unknown);
;	tokenBlock = ^paramBlock;
;	paramBlock = RECORD
;		source: Ptr;			(*pointer to stream of characters*)
;		sourceLength: LongInt;	(*length of source stream*)
;		tokenList: Ptr; 		(*pointer to array of tokens*)
;		tokenLength: LongInt;	(*maximum length of TokenList*)
;		tokenCount: LongInt;	(*number of tokens generated by tokenizer*)
;		stringList: Ptr;		(*pointer to stream of identifiers*)
;		stringLength: LongInt;	(*length of string list*)
;		stringCount: LongInt;	(*number of bytes currently used*)
;		doString: Boolean;		(*make strings & put into StringLIst*)
;		doAppend: Boolean;		(*append to TokenList rather than replace*)
;		doAlphanumeric: Boolean;(*identifiers may include numeric*)
;		leftDelims, rightDelims: ARRAY[0..1] OF tokenType;
;		leftComment, rightComment: ARRAY[0..3] OF tokenType;
;		escapeCode: tokenType;
;		decimalCode: tokenType;	(*what symbol to use for decimal mark*)
;	END;
;	TokenRec = RECORD
;		theToken: tokenType;
;		position: Ptr;			(*pointer into original Source*)
;		length: LongInt;		(*length of text in original source*)
;		stringPosition: Ptr;	(*Pascal/C string copy of identifier*)
;	END;
;   tokenType : Integer;		(*defined below*)
;	(* The only value in tokenBlock directly modified by the tokenizer is
;		the tokenCount field. The tokenizer modifies the memory pointed to
;		by tokenList and stringList. stringList is a sequence of null-
;		terminated, even-boundaried Pascal strings that is generated if
;		doString is true but ignored otherwise; likewise, the
;		stringPosition fields of the TokenRec records are ignored if doString
;		is false. *)
;	(* The Left and right delimiters are encoded as tokenType; two may be
;		specified. If only one is needed, the second must be 127.
;		These delimit literal strings. The position of a delimiter in the
;		rightDelims array must match the position of its counterpart in 
;		the leftDelims array. As an example, the pairs “” and ‘’ are [47,49]
;		and [48,50] respectively, where the codes are given below. *)
;	(* The leftComment and rightComment are also encoded as tokenType, but
;		unlike delimiters, two tokens must be specified per comment symbol
;		(postions 0 and 1 in the array are one comment symbol).
;		If a single token only is needed then the second must be -1. One
;		or two comment symbols are available; if only one is used then the
;		second comment's tokens must both be 127. The pairs (* *) are
;		[16, 26, 127, 127], [26, 17, 127, 127] *)
;	(* The escape character is significant only when followed by a right
;		delimiter within a literal. It nullifies the special meaning of the
;		delimiter. If there is no escape character, use 127 *)
;   (* The decimal symbol is has special meaning only when flanked or
;		preceded by numeric or AltNumeric tokens. If RealNumber parsing is to
;		be disabled, decimalCode should be 127 *)
;--------------------------------------------------------------------------
;Ordinal values of tokenType are:
;-1 EndParse
;0 Unknown		1 WhiteSpace	2 LeftLitDelim	3 RightLitDelim
;4 Alpha		5 Numeric		6 NewLine		7 LeftCommentDelim
;8 RightCommentDelim			9 literal		10 Escape
;11 Alternate numeric			12 RealNumber	13 AltRealNumber
;16 (	17 )	18 [	19 ]	20 {	21 }	22 «	23 »	24 +
;25 -	26 *	27 ÷	28 ±	29 /	30 \	31 <	32 >	33 =
;34 <=	35 ≤	36 >=	37 ≥	38 ==	39 :=	40 ≠	41 <>	42 !=
;43 !	44 ~	45 ,	46 .	47 “	48 ”	49 ‘	50 ’	51 "
;52 '   53 ;    54 %    55 ^    56 _    57 &    58 @    59 |    60 ?
;61 π	62 √	63 ∑	64 ∫	65 µ	66 ∏	67 ∞	68 :	69 #
;70 $
;71 non-breaking space	
;72 fraction
;73 intlCurrency
;74 leftSingGuillemet
;75 rightSingGuillemet
;76 perThousand
;77 ellipsis
;78 centerDot
;127 <nil>
;--------------------------------------------------------------------------
;--------------------------------------------------------------------------
;---------------------------------------------------------------------------
; IMPLEMENTATION NOTES
;
;	This header file should be included in any version of tokenizer that is
;created. In the local version's file, this header file should be "included"
;as the first significant text. The local version must then implement a 
;BaseSym macro after the include. The characteristics of the macro are:
;	a) the first line is    movea.l srcA,lastA
;	b) the number of bytes in the character must be subtracted from srcL
;	c) the symbol class of the character must be placed in the low word
;		of csym. The upper word must remain undisturbed.
;	d) after BaseSym, srcA must point to the byte following the last byte
;		of the character just retrieved.
;	e) if the available bytes run out before the character does, BaseSym
;		must clean up its own portion of the stack (if any), place the
;		constant #unexpectedEndER into temp, and branch to cleanUp.
;	f) no registers except temp and the low word of csym may be modified.
;
;	Following the BaseSym macro, the tokenBase.a file must be included. This
;file contains all state transition information.
;	Finally, StringCopy must be implemented by the local version. String-
;copy's characteristics are:
;	a) it is a subroutine.
;	b) it may freely use any registers it pleases.
;	c) it creates even-boundaried, null-terminated Pascal strings corres-
;		ponding to those that each token's record points to.
;	d) assuming that the return address from StringCopy is at the top of the
;		stack, then the next word down is the current error condition. The
;		following longword is the address of the effective token list, which
;		is different from the pBlok tokenList address when doAppend is true.
;		In other words, StringCopy must use the stack value rather than the
;		parameter block value to determine the start of the token list.
;	e) it must transliterate altNum and altReal tokens into ascii real
;		strings and put the transliterated numbers, rather than the original
;		numbers, into the Pascal strings. The same goes for roman (and only
;		roman) letters. Additionally, the decimal character should be changed
;		to a canonical '.'. All other text and symbols are to be copied 
;		verbatim.
;	f) if doAppend is true, then it must compute the effective address of
;		stringList by adding stringCount to it.
;	g) it must compute the total amount of space used by strings by sub-
;		tracting stringList(pBlok) from the next legal string address
;		following the last null character of the last Pascal string, and
;		place that computed value into stringCount(pBlok).
;	h) it must detect overflow of the string space. If an overflow occurs,
;		it must place #stringOverflow in the error condition space on the stack
;		and return immediately.
;	i) it must reverse the order of digits for the zany countries that use
;		reversed digit sequences -- Arabic and Hebrew. In real numbers, the
;		decimal point should be considered part of the string of digits and
;		reversed right along with them.
;   The local version contains its own mapping tables to decide the class
;of a character.
;---------------------------------------------------------------------------
;---------------------------------------------------------------------------

TokenFrame	record  {oldA6},decrement
resultSize	equ	2
result		ds.w	1
argSize		equ		*-8
paramSpace	ds.l    1
selector	ds.l    1
return		ds.l    1
oldA6		ds.l    1
localFrame	EQU     *
			ENDR
	
;---
; put these here for reference; defined in IntlResourcesPriv.a
	IF 0 THEN
srcA	    EQU     a0
srcL	    EQU     d0
toks	    EQU     a1
tokL	    EQU     d1
holdA	    EQU     a2
cSym	    EQU     d2
lastA	    EQU     a3
ld	    	EQU     d3
map	    	EQU     a4
lc1	    	EQU     d4
pBlok	    EQU     a5
lc2	   		EQU     d5
temp	    EQU     d6
dTab	    EQU     a6
temp1	    EQU     d7

rd	    	EQU     ld
escRD	    EQU     lc1
rc	    	EQU     ld
	ENDIF
;---
	
	    EXPORT  IntlTokenize
	
	
IntlTokenize    func
	    WITH    TokenRec,tokenBlock,TokenFrame,TokenResults,itl4Rec
		CheckSelector

	    MACRO
	    BaseSym
		bsr		BaseSymRoutine		;													<5>
	    ENDM	

	    MACRO						;get before swap
	    GetSym
		bsr		GetSymRoutine		;													<5>
	    ENDM
	
	    MACRO						;swap before get
	    NextSym
		bsr		NextSymRoutine		;													<5>
	    ENDM
	
	    MACRO
	    DoToken &TokenRec
		move.w	&TokenRec,temp		;													<5>
		bsr		DoTokenRoutine		;													<5>
	    ENDM
	
	    MACRO
	    makeSym &symNum
		move.w	&symNum,-(sp)		;													<5>
		bra		makeSymRoutine		;													<5>
	    ENDM
	    
	    
	    MACRO
	    setDel  &reg				;set the bit position in the delimMap
	    
;<6/14/88ldc>	move byte, not word													<3>
		clr.w	temp	
	    move.b  &reg,temp			;compute delimiter map position
	    
;<6/14/88ldc>	skip -1, -2															<3>
	    bmi.s   @999				;skip if temp < 0
		asr.w   #3,temp				;offset address
		bset    &reg,(dTab,temp)	;set that puppy
@999
	    ENDM

entry	
	    link    a6,#0
	    movem.l d2-d7/a2-a6,-(sp)				;just save it all
	    movea.l TokenFrame.paramSpace(a6),pBlok ;parameter block address
		movea.l	itlResource(pBlok),Map			;get the itl resource block		<1/7/88med>
		move.l	(Map),temp
		bne.s	@27
		move.l	Map,-(a7)
		_LoadResource
		move.l	(Map),temp
@27
		move.l	temp,Map
	    movea.l source(pBlok),srcA
	    move.l  sourceLength(pBlok),srcL
	    movea.l tokenList(pBlok),toks
	    tst.b   doAppend(pBlok)					;is this an append or replace operation?
	    beq.s   @1
	    move.l  tokenCount(pBlok),temp    		;compute end of list
	    mulu    #tokenRecSize,temp	    		;that's how much is in there now
	    adda.l  temp, toks		   				;viola
@1	
	    move.l  toks,-(sp)						;we need to save this for StringCopy
	    move.l  tokenLength(pBlok),tokL
	    move.l  leftDelims(pBlok),ld
	    move.l  leftComment(pBlok),lc1
	    move.l  leftComment+4(pBlok),lc2
	    move.l  MapOffset(Map),temp     		;itl character map table
	    lea     (Map,temp),Map
	    lea     cleanUp,dTab
	    move.l  dTab,mapEmergOffset(Map)		;emergency exit address for itl extendFetch

	    GetSMgrCore	dTab						;								<4/27/88ldc><3>
	    adda     #smgrRecord.delimMap, dTab		;								<4/27/88ldc><3>

	    lea		-2(dTab),holdA					; holdA free for now!!!			<1/7/88med>
		clr.w	(holdA)+						;clear delimiter map positions
		clr.l	(holdA)+
		clr.l	(holdA)+
		clr.l	(holdA)+

		clr.l	(holdA)+						; need to add one more word		<6/14/88ldc><3>

	    movea.l	srcA,holdA

; start wild and wacky macro calls (expression borrowed from JT).

	    setDel  ld								;set up delimiter map
	    swap    ld
	    setDel  ld
	    swap    ld
	    setDel  lc1
	    setDel  lc2
	    clr.l   cSym							;clear 2nd byte of each char attr.
	    GetSym									;start 2-symbol pipe
	    bra.s   main							;hop over to start

cont
	    DoToken cSym
main	
	    movea.l lastA,holdA 				;retrieve address of beginning of token
	    GetSym								;get NextSym
	    move.w  cSym,temp					;compute delimMap address
	    asr.w   #3,temp						;byte offset
	    btst    cSym,(dtab,temp.w)			;is it a possible delimiter?

	    bne     delimRun					;									<05/30/89 pke><3>

notDelim									;just as fast on the average…
	    cmpi.w  #4,cSym						;…as a jump table
	    beq	    alphabetic
	    cmpi.w  #1,cSym
	    beq.s   whiteSpace
	    cmpi.w  #16,cSym
	    bge     TokSymbol
	    cmpi.w  #5,cSym
	    beq.s   TokNumeric
	    cmpi.w  #6,cSym						;check newline
	    beq.s   cont
	    cmp.w   #11,cSym					;alternate numeral
	    beq     altnum
	    tst.w   cSym						;check unknown symbol
	    beq.s   cont
	    bmi     endparse
	    bra     unknownER					;shouldn't be able to get here

;-------------------------------------------------------------------------------
whitespace
	    swap    cSym						;need to check the nextSym
@1	
	    cmpi.w  #1,cSym						;check for recurring whitespace
	    bne.s   @3							;no more whitespace
	    NextSym								;get NextSym
	    bra.s   @1
@3	
	    swap    cSym						;get cSym back into sync
	    bra     cont

;-------------------------------------------------------------------------------
TokNumeric
	    swap    cSym						;check NextSym for numeric
	    moveq.l #5,temp1					;numeric code
@nloop
	    cmp.w   temp1,cSym
	    bne.s   @done						;not numeric; bail out
	    NextSym
	    bra.s   @nloop						;cycle again
@done
	    cmp.w   decimalCode(pBlok),cSym		;could be real number
	    beq     realNumber
	    swap    cSym						;repair cSym order
	    bra     cont

;-------------------------------------------------------------------------------	
alphabetic
	    swap    cSym						;check NextSym
	    moveq.l #4,temp1					;alphabetic code
	    tst.b   doAlphanumeric(pBlok) 		;are numerics legal?
	    beq.s   @aloopA						;no
@aloopN
	    cmp.w   temp1,cSym		  			;is it alpha?
	    beq.s   @1							;no, but it might be numeric
	    cmpi.w  #5,cSym						;is it numeric?
	    beq.s   @1							;no, but could be alternate numeric
	    cmpi.w  #11,cSym
	    bne.s   @done
@1	    NextSym
	    bra.s   @aloopN						;and re-enter loop
@aloopA
	    cmp.w   temp1,cSym					;is it alpha?
	    bne.s   @done						;nope so we're done
	    NextSym
	    bra.s   @aloopA
@done
	    swap    cSym						;repair cSym order
	    DoToken #4							;create alpha token
	    bra     main

;-------------------------------------------------------------------------------	
leftLit1
	    move.l  rightDelims(pBlok),rd		;get matching delimiter
	    swap    rd							;match was in upper word
	    bra.s   litMain
leftLit
	    move.l  rightDelims(pBlok),rd		;get matching delimiter
litMain
	    move.w  escapeCode(pBlok),escRD		;need escape code
	    DoToken #2							;create leftDelim token
	    movea.l lastA,holdA 				;save beginning of literal string
	    swap    cSym						;look ahead
@litLoop
	    cmp.w   cSym,rd						;see if we found an end delimiter
	    beq     @endlit						;sure 'nuff
	    cmp.w   cSym,escRD					;see if it's an escape
	    bne.s   @1							;nope, continue
	    DoToken #9							;portion of literal before escape
	    movea.l lastA,holdA 				;beginning of escape code
	    BaseSym
	    bmi     badDelER					;don't stop now
	    DoToken #10 						;escape code
	    movea.l lastA,holdA 				;beginning of symbol following escape
	    NextSym
	    swap    cSym
	    DoToken cSym						;symbol following escape
	    movea.l lastA,holdA 				;beginning of rest of literal
	    swap    cSym
	    bra.s   @litloop					;continue
@1	    tst.w   cSym						;make sure we've not exhausted the source
	    bmi     badDelER					;hanging delimiter
	    NextSym								;get NextSym
	    bra     @litLoop					;continue
@endLit 
	    DoToken #9							;create literal token
	    move.l  leftDelims(pBlok),rd		;repair delim and comment regs
	    move.l  leftComment(pBlok),escRD	;escRD is lc1
	    move.l  lastA,holdA 				;need address of closing delimiter
	    GetSym								;start filling pipe
	    DoToken #3							;create right delimiter token
	    bra     main

;-------------------------------------------------------------------------------	
	
leftCom2A
	    move.l  rightComment+4(pBlok),rc	;get matching delimiter
	    move.w  #1,dTab		   				;this reg becomes the nest counter
	    bra.s   comMain2
leftCom2
	    move.l  lc1,lc2		    			;lc2 becomes info for nested comment
	    move.w  #1,dTab		    			;this reg becomes the nest counter
	    move.l  rightComment(pBlok),rc
comMain2
	    GetSym								;NextSym
	    DoToken #7							;create left comment symbol
	    move.l  lastA,holdA 				;beginning of literal string
	    tst.l   rc							;see if right delim is 1 or 2 symbols
	    bmi.s   endCom1						;branch accordingly
	    bra     endCom2

;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
leftCom1A
	    move.l  rightComment+4(pBlok),rc    ;get matching delimiter
	    bra.s   comMain1
leftCom1
	    move.l  rightComment(pBlok),rc
comMain1
	    move.w  #1,dTab						;for compatibility reasons (nesting)
	    DoToken #7							;create left comment symbol
	    move.l  lastA,holdA 				;beginning of literal string
	    tst.l   rc							;see if right delim is 1 or 2 symbols
	    bmi.s   endCom1						;branch accordingly
	    bra.s   endCom2


;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
endCom1
	    swap    cSym						;look at the nextSym
@cloop	
	    cmp.w   cSym,rc						;check for end
	    beq.s   @end
	    NextSym								;go for the next symbol
	    bpl.s   @cloop						;may have run out before delimiter
	    bra 	badDelER
@end	DoToken #9							;create the literal token
	    movea.l lastA,holdA 				;start of end delimiter
	    NextSym								;get one beyond end delimiter
	    swap    cSym						;fix the order
	    move.l  leftDelims(pBlok),rc		;repair register
	    DoToken #8							;create end delimiter
	    move.l  leftComment+4(pBlok),lc2

	    GetSMgrCore	dTab					;							<4/27/88ldc><3>
	    adda     #smgrRecord.delimMap, dTab	;							<4/27/88ldc><3>

	    bra     main
	
;- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
endCom2
@cloop	
	    move.l  lastA,temp1 				;effectively a 2-symbol look-behind
	    GetSym								;try the next puppy
	    cmp.l   cSym,lc2					;check for nested comment
	    beq.s   @nest
@nogo
	    cmp.l   cSym,rc						;check for end delimiter sequence
	    beq.s   @end
	    tst.w   cSym						;may have run out of symbols
	    bpl.s   @cloop						;still okay
	    bra     badDelER
@nest
	    tst.b   doNest(pBlok)				;how's the weather?
	    beq.s   @nogo
	    move.l  temp1,lastA 				;do literal token
	    DoToken #9
	    move.l  temp1,holdA 				;start of nested left comment
	    addq.w  #1,dTab						;nest counter
	    bra     comMain2
@end
	    move.l  temp1,lastA 				;fix up the pipeline
	    DoToken #9							;create literal
	    move.l  temp1,holdA 				;start address of right delimiter
	    GetSym								;start up normal piping again
	    DoToken #8							;create right delimiter
	    movea.l lastA,holdA 				;start of whatever follows
	    subq.w  #1,dTab						;see how the nesting is…
	    cmpa.w  #0,dTab
	    bne     endCom2						;…and continue if necessary
	    move.l  leftDelims(pBlok),rc		;repair register
	    move.l  leftComment+4(pBlok),lc2

	    GetSMgrCore	dTab					;							<4/27/88ldc><3>
	    adda     #smgrRecord.delimMap, dTab	;							<4/27/88ldc><3>

	    bra     main

;-------------------------------------------------------------------------------	
altnum
	    swap    cSym						;check NextSym for numeric
	    moveq.l #11,temp1					;numeric code
@nloop
	    cmp.w   temp1,cSym
	    bne.s   @done						;not numeric; bail out
	    NextSym
	    bra.s   @nloop						;cycle again
@done
	    cmp.w   decimalCode(pBlok),cSym		;could be real number
	    beq.s   realAlt
	    swap    cSym						;repair cSym order
	    bra     cont

;-------------------------------------------------------------------------------	
realNumber									;entry from altNum or numeric
	    NextSym
	    moveq.l #5,temp1
@nloop
	    cmp.w   temp1,cSym
	    bne.s   @done
	    NextSym
	    bra.s   @nloop
@done
	    swap    cSym
	    move.w  #12,cSym					;code for real number
	    bra     cont
;-------------------------------------------------------------------------------	
realAlt 			   						;entry from altNum or numeric
	    NextSym
	    moveq.l #11,temp1
@nloop
	    cmp.w   temp1,cSym
	    bne.s   @done
	    NextSym
	    bra.s   @nloop
@done
	    swap    cSym
	    move.w  #13,cSym					;code for real number
	    bra     cont
;-------------------------------------------------------------------------------	
TokSymbol
	    swap    cSym						;we'll be checking NextSym
	    cmpi.w  #33,cSym					;is NextSym "="?
	    bne     @cont
	
	    swap    cSym
	    cmpi.w  #31,cSym					;check for <=
	    bne.s   @c1
	    makeSym #34
@c1
	    cmpi.w  #32,cSym					;check for >=
	    bne.s   @c2
	    makeSym #36
@c2
	    cmpi.w  #68,cSym					;check for :=
	    bne.s   @c3
	    makeSym #39
@c3
	    cmpi.w  #33,cSym					;check for ==
	    bne.s   @c4
	    makeSym #38
@c4
	    cmpi.w  #43,cSym					;check for !=
	    bne.s   @cont1						;if that's not it then it ain't nothin'
	    makeSym #42
@cont
	    swap    cSym						;re-adjust it
	    cmpi.l  #$001F0020,cSym				;is it "<>"?
	    bne.s   @cont1
	    makeSym #41
@cont1
	    move.l  lastA,temp1 				;was not a composite symbol
	    sub.l   holdA,temp1 				;compute size
	    bra     cont						;get back into the thick of things
	
;-------------------------------------------------------------------------------	
delimRun
	    cmp.w   cSym,ld						;check for left delimiter
	    beq     leftLit
	    swap    ld							;check for other delimiter
	    cmp.w   cSym,ld
	    beq     leftLit1
	    swap    ld
	    tst.l   lc1 						;is comment sequence two symbols?
	    bpl.s   @1
	    cmp.w   cSym,lc1					;check for left comment sequence
	    bne.s   @2
	    bra     leftCom1
@1	    cmp.l   cSym,lc1
	    beq     leftCom2
@2	    tst.l   lc2 						;is other comment sequence two symbols?
	    bpl.s   @3
	    cmp.w   cSym,lc2					;check for other left comment
	    beq     leftCom1A
	    bra     notDelim
@3	    cmp.l   cSym,lc2
	    beq     leftCom2A
		bra		notDelim
	
;-------------------------------------------------------------------------------			
;itl4 call interfacing
;
extendFetch
		tst.l	srcL						;did we come because of a double-byter?
		bpl.s	@1							;yes.
		move.w	#-1,cSym					;we need a full word negative for dTab's benefit
		rts									;we came because source ran out; return home
@1
		move.l	mapExtendOffset(Map),temp	;offset of the extended fetch routine
		jmp		(Map,temp.l)				;finish the fetch
			
StringCopy
		movea.l	itlResource(pBlok),dTab		;get the itl resource block
		move.l	(dTab),dTab					;dereference!					<1/7/88med>
		move.l	strOffset(dTab),temp		;offset of the stringCopy
		jmp		(dTab,temp.l)				;finish the fetch

;-------------------------------------------------------------------------------			
;Tail End
;-------------------------------------------------------------------------------	
endParse
	    moveq.l #0,temp		    			;status if nothing went wrong
cleanUp
	    move.w  temp, -(sp) 	   			;save status
	    move.l  toks,temp		    		;find out how many tokens we created
	    sub.l   tokenList(pBlok),temp
	    divs    #tokenRecSize,temp			;divide by size of TokenRec record
	    move.l  temp,tokenCount(pBlok)		;save the value (instead of whales)…
					    ;…if something went wrong then upper word is non-zero
	    tst.b   doString(pBlok)				;do we need to create string list?
	    beq.s   @end1
	    bsr.s	StringCopy
@end1
	    move.w  (sp)+,d0					;status report
	    addq.l  #4,sp						;yank off the token start address
	    movem.l (sp)+,d2-d7/a2-a6			;restore it
	    CheckA6								; check stack
	    unlk    a6
	    movea.l (sp)+,a0					;return address
	    add.l   #argSize,sp	    			;dump original parameters
	    move.b  d0,(sp)						;status report
	    jmp     (a0)						;go home
	
;-------------------------------------------------------------------------------	
;IntlTokenize error branches
;-------------------------------------------------------------------------------	

tokenOVER
	    move.w  #tokenOverflow,temp
	    bra.s   cleanUp
	
badDelER
	    move.w  #badDelim,temp
	    bra.s   cleanUp
	
unknownER
	    move.w  #crash,temp
	    bra.s   cleanUp

;-------------------------------------------------------------------------------	
;Common routines for code savings														<5>
;-------------------------------------------------------------------------------	

NextSymRoutine						;													<5>
		swap	cSym				;													<5>
BaseSymRoutine						;													<5>
	    movea.l srcA,lastA			;save start of symbol
	    move.b  (srcA)+,cSym		;start 2-mappedChar pipe
	    subq.l  #1,srcL				;pay the dues
	    bmi.s   @999
	    move.b  0(Map,cSym.w),cSym	;map the character
	    bpl.s   @998
@999
	    bsr     extendFetch 		;we've either run out of input or it's 2-byte,
@998								; and srcL tells which
		rts							;													<5>

GetSymRoutine						;													<5>
	    movea.l srcA,lastA			;save start of symbol
	    move.b  (srcA)+,cSym		;start 2-mappedChar pipe
	    subq.l  #1,srcL				;pay the dues
	    bmi.s   @999
	    move.b  0(Map,cSym.w),cSym	;map the character
	    bpl.s   @998
@999
	    bsr     extendFetch 		;we've either run out of input or it's 2-byte,
@998								; and srcL tells which
		swap	cSym				;													<5>
		rts							;													<5>

DoTokenRoutine
; has parameter in temp
		;move first subtraction below													<5>
	    subq.l  #1,tokL				;decrement available size of token space
	    bmi.s   toTokenOVER			;ran out of room									<5>
	    move.w  temp,(toks)+		;token												<5>
	    move.l  holdA,(toks)+		;adr in srcA of token start
	    move.l  lastA,temp			;find length of token								<5>
	    sub.l   holdA,temp			;													<5>
	    move.l  temp,(toks)+		;size of token string
	    clr.l   (toks)+				;space for string ptr
		rts							;													<5>
toTokenOVER
		addq	#4,sp				; strip DoTokenRoutine return address…				<5>
									; …or makeSymRoutine parameter.
		bra		tokenOVER			; go to err handling routine						<5>

makeSymRoutine
; has DoToken parameter on the stack.
;GetSym part
	    movea.l srcA,lastA			;save start of symbol
	    move.b  (srcA)+,cSym		;start 2-mappedChar pipe
	    subq.l  #1,srcL				;pay the dues
	    bmi.s   @999
	    move.b  0(Map,cSym.w),cSym	;map the character
	    bpl.s   @998
@999
	    bsr     extendFetch 		;we've either run out of input or it's 2-byte,
@998								; and srcL tells which
		swap	cSym				;													<5>
;DoToken part
	    subq.l  #1,tokL				;decrement available size of token space
	    bmi.s   toTokenOVER			;ran out of room									<5>
	    move.w  (sp)+,(toks)+		;token												<5>
	    move.l  holdA,(toks)+		;adr in srcA of token start
	    move.l  lastA,temp			;find length of token								<5>
	    sub.l   holdA,temp			;													<5>
	    move.l  temp,(toks)+		;size of token string
	    clr.l   (toks)+				;space for string ptr
;don't return, just branch
	    bra     main

	    ENDWITH
	    ENDF
	    END