mac-rom/Toolbox/ScriptMgr/itl4Roman.a

595 lines
21 KiB
Plaintext
Raw Normal View History

;
; File: itl4Roman.a
;
; Contains: Localizable tables and code for Script Manager<65>s IntlTokenize routine and formatted
; number handling routines (Str2Format, Format2Str, FormatStr2X, FormatX2Str). This is
; the U.S. version.
;
; Written by: DRS daan Strebe
; MED Mark Davis
; LDC Lee Collins
; PKE Peter Edberg
;
; Copyright: <09> 1987-1990, 1992 by Apple Computer, Inc., all rights reserved.
;
; Change History (most recent first):
;
; <12> 11/17/92 PKE Change includes so that we can give this file out to localizers.
; <11> 4/14/92 JSM Get rid of all SysVers conditionals.
; <10> 10/30/90 PKE (stb) Added some comments. Resource data byte-for-byte identical
; with previous version.
; <9> 9/16/90 PKE Use formerly reserved field as format code (makes resource
; easier to DeRez, etc.).
; <8> 8/29/90 PKE Put length of whiteSpaceList in corresponding header field.
; <7> 7/5/90 PKE NEEDED FOR SIXPACK: Deleted extra line in MapChar table
; accidentally introduced in change <5> - it was seriously messing
; up IntlTokenize! Removed local definition of smgrSixPack since
; it is now defined in ScriptPriv.a.
; <6> 7/2/90 PKE Added guts of whiteSpaceList.
; <5> 6/26/90 PKE NEEDED FOR SIXPACK (by HyperCard): Fix old bug in MapChar for
; char code 0 (causes IntlTokenize to crash on nulls).
; <4> 6/12/90 PKE The MapChar table should also supply tokenCenterDot for the
; small center dot with code $E1.
; <3> 5/18/90 PKE Handle new token types tokenEllipsis (used by TruncText) and
; tokenCenterDot (used by AppleShare) in MapChar and unToken
; tables. Delete truncMarker table (since we use tokenEllipsis
; instead) and replace it with whiteSpaceList (needed for
; HyperCard, but not fully implemented yet).
; <2> 3/21/90 PKE Rearranged length information to be correct in itl4 header.
; Changed version number for 7.0 from $200 to $700. Updated file
; header to BBS format.
; <1> 12/18/89 CCH Adding for the first time into BBS. Changed include 'inc.sum.a'
; to load 'StandardEqu.d'. Updated file name references.
;
; (BBS versions above, EASE versions below)
; <1.8> 9/19/89 PKE Add 8 length words to header, fill in unTokenLen and
; defaultPartsLen.
; <1.7> 9/15/89 PKE Add truncMarker for 7.0. Update version number from $100 to
; $200.
; <1.6> 8/26/89 PKE Delete load 'nEqu.d'.
; <1.5> 5/30/89 PKE Fix IntlTokenize for <20> (BRC #46579); remove support in 6.0.4 for
; double-acute accent.
; <1.4> 3/5/89 PKE Add <20>,<2C>,<2C> as letters for tokenizer; added support for char set
; extensions (beyond $D8): letters, some floating accents as
; letters, plus <20> <20> <20> <20> <20> as new symbols returning unique types.
; <1.3> 2/21/89 PKE Fix up includes: always include 'inc.sum.a'.
; <1.2> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equate names
; <1.1> 1/16/89 CCH Merged 6.0.3 final sources into 7.0.
; <1.0> 11/16/88 CCH Added to EASE.
; (pre-EASE versions below)
; 9/23/88 ldc Fixed incorrect use of untokenizing macros (7.0)
; 7/21/88 ldc Added char extensions beyond $D8 (chars in Adobe font) (7.0)
; 4/7/88 ldc Added localization instructions (7.0)
; 3/19/88 med Dropped length from numberparts table: it is constant size
; 3/19/88 med Changed untokenize table: used word offsets, added missing
; values
; 3/19/88 ldc removed '&' from numberparts table
; 3/14/88 ldc added version to numberparts table
; 2/8/88 med Added alternate numbers, removed all but () from unquoteds
; 1/4/88 med Added new version from Dan (v. 1.0.A)
; 6/19/87 daan Strebe
;
;___________________________________________________________________________________________________
;--------------------------------------------------------------------------
;
;THIS FILE IS THE ONLY PART OF THE TOKENIZER THAT SHOULD CHANGE BETWEEN SISs
;
;LOCALIZATION INSTRUCTIONS:
;Select and find (Command+h) the string LOCALVERSION for instructions about modifying
;this resource for local versions.
;
;After changes have been made, you can assemble and link the modified itl4Roman.a
;with the following MPW commands:
;
;(note: this example uses French--country code = 1--as an example. The complete list of
;country codes may be found in the International Utilities chapter of Inside Macintosh,
;volume I)
;
;
;set myCountry 1
;asm itl4Roman.a -o itl4Roman.a.o -i "IncludeFilePath"
;link itl4Roman.a.o -o itl4Roman.code -rt itl4={myCountry} -t 'none' -c 'none' -sg "Roman"
;
;(note: this assumes any include files such as inc.sum.d and nEqu.d for
;itl4Roman.a are on a directory in the "IncludeFilePath")
;
;After linking, you can use resedit to copy the itl4 resource from itl4Roman.code into
;the system file.
;
;
;--------------------------------------------------------------------------
;LOCALVERSION xxxToken
;Non-US Roman systems require no change to use the procedure USToken.
;Other scripts such as Arabic and Japanese require entirely different local
;versions of the xxxToken procedure. For details, see the implementation notes
;for the StringCopy procedure used by the function IntlTokenize in ScriptMgrUtilText.a
;
;
EXPORT USToken
; new includes <12>
include 'Script.a'
include 'IntlResourcesPriv.a'
; old includes:
;; load 'StandardEqu.d'
;; include 'ScriptPriv.a'
USToken Proc
WITH TokenRec,tokenBlock,TokenResults
string asis
itl4Id equ 0 ; itl4 resource ID for US <9>
itl4Version equ $0700 ; version number <1.7><2><9>
itl4Format equ 1 ; has extended header, etc. <9>
start
dc.w 0 ;flags word
dc.l 'itl4' ;resource type
dc.w itl4Id ;resource ID number <9>
dc.w itl4Version ;version number <1.7><2><9>
dc.w itl4Format ;format <9>
dc.w 0 ;reserved <9>
dc.l 0 ;reserved, for 18 bytes total header
dc.w 8 ;# of table entries
offsets
t1 dc.l MapChar - start
t2 dc.l StringCopy - start
t3 dc.l extendFetch - start
t4 dc.l unToken - start
t5 dc.l defaultPartsTable - start
t6 dc.l whiteSpaceList - start ; <3>
dc.l 0
dc.l 0
dc.w 0 ; resLength1
dc.w 0 ; resLength2
dc.w 0 ; resLength3
dc.w unTokenEnd - unToken ; unTokenLength <2>
dc.w defaultEnd - defaultPartsTable ; defPartsLength <2>
dc.w whiteSpaceListEnd-whiteSpaceList ; whtSpListLength <2><8>
dc.w 0 ; resLength7
dc.w 0 ; resLength8
;-------------------------------------------------------------------------------------
;The following two values are referenced from MapChar and MUST NOT BE MOVED.
dc.l 0 ;emergency exit address for extendFetch
dc.l extendFetch - MapChar
MapChar
dc.b 00,00,00,00,00,00,00,00,00,01,00,00,00,06,00,00 ; $00-0F Fix for nulls <5>
dc.b 00,00,00,00,00,00,00,00,00,00,00,00,00,00,00,00 ; $10-1F
dc.b 01,43,51,69,70,54,57,52,16,17,26,24,45,25,46,29 ; $20-2F
dc.b 05,05,05,05,05,05,05,05,05,05,68,53,31,33,32,60 ; $30-3F
dc.b 58,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $40-4F
dc.b 04,04,04,04,04,04,04,04,04,04,04,18,30,19,55,56 ; $50-5F
dc.b 00,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $60-6F
dc.b 04,04,04,04,04,04,04,04,04,04,04,20,59,21,44,00 ; $70-7F
dc.b 04,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $80-8F
dc.b 04,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $90-9F
dc.b 00,00,00,00,00,78,00,04,00,00,00,00,00,40,04,04 ; $A0-AF <3>
dc.b 67,28,35,37,00,65,00,63,66,61,64,00,00,00,04,04 ; $B0-BF
dc.b 00,00,00,62,00,00,00,22,23,77,71,04,04,04,04,04 ; $C0-CF <3>
dc.b 00,00,47,48,49,50,27,00,04,04,72,73,74,75,04,04 ; $D0-DF
dc.b 00,78,00,00,76,04,04,04,04,04,04,04,04,04,04,04 ; $E0-EF <4>
dc.b 00,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; No <20> <03/05/89 pke>
;-------------------------------------------------------------------------------------
extendFetch
stop #0 ;it's "impossible" to get here in single byte scripts
;-------------------------------------------------------------------------------------
StringCopy
loopC EQU ld
myToken EQU holdA
myDest EQU srcA
mySource EQU lastA
limit EQU dTab
ch EQU lc2
move.l stringList(pBlok),myDest ;get address of string list
move.l tokenCount(pBlok),loopC ;set up loop counter
beq.s @out
subq.l #1,loopC
tst.b doAppend(pBlok) ;offset the string address?
beq.s @kel
add.l stringCount(pBlok),myDest ;effective end of string
@kel
move.l magicStackTokenEnd(sp),myToken ;address of token list
move.l myDest,limit ;compute end address of string list
adda.l stringLength(pBlok),limit
@loop
move.l myDest,stringPosition(myToken) ;address of string in string list
movea.l position(myToken),mySource ;address of source string
move.l length(myToken),temp ;length of string
moveq.l #0,temp1 ;clean out upper bytes
move.b temp,temp1
add.l myDest,temp1 ;what's the limit?
cmp.l limit,temp1 ;are we gonna run out of room?
bpl.s stringERROR ;uhoh
move.b temp,(myDest)+ ;make Pascal string length
beq.s @normalize ;empty string
subq.b #1,temp ;the bane of computer novices
move.w theToken(myToken),temp1
cmp.w #12,temp1 ;have to translate the decimal mark
beq.s @realLoop
@subloop
move.b (mySource)+,(myDest)+ ;copy that byte
dbra temp,@subloop ;high-tail it back if we can
bra.s @normalize
@realLoop
move.b (mySource)+,ch
cmp.b #'0',ch
blt.s @decimal
cmp.b #'9',ch
ble.s @cont2
@decimal
move.b #'.',ch
@cont2
move.b ch,(myDest)+
dbra temp,@realLoop
@normalize
move.b #0,(myDest)+ ;null terminate that baby
move.l myDest,temp ;make sure myDest is even-boundaried
addq.l #1,temp
bclr #0,temp
move.l temp,myDest ;all tidied up
add.w #tokenRecSize,myToken ;advance to next record
dbra loopC,@loop
@out
sub.l stringList(pBlok),myDest ;compute StringCount
move.l myDest,stringCount(pBlok) ;and save it
rts
stringERROR
move.w #stringOverflow,magicStackTokenEnd(sp) ;modify error parameter
rts
ENDWITH
;-------------------------------------------------------------------------------------
;UNTOKENIZE strings
;
; For 7.0, we must have tokens through tokenCenterDot (78), because AppleShare gets
; center dot from here and ReplaceText gets ellipsis from here.
;-------------------------------------------------------------------------------------
unToken
dc.w unTokenEnd-* ; length in bytes of table
dc.w 78 ;last code whose string equivalent appears here <3>
s0 dc.w unk-unToken
s1 dc.w whi-unToken
s2 dc.w llt-unToken
s3 dc.w rlt-unToken
s4 dc.w alf-unToken
s5 dc.w num-unToken
s6 dc.w nli-unToken
s7 dc.w lco-unToken
s8 dc.w rco-unToken
s9 dc.w lit-unToken
s10 dc.w esp-unToken
s11 dc.w anm-unToken
s12 dc.w rnm-unToken
s13 dc.w arn-unToken
s14 dc.w rs1-unToken
s15 dc.w rs2-unToken
s16 dc.w lpa-unToken
s17 dc.w rpa-unToken
s18 dc.w lbr-unToken
s19 dc.w rbr-unToken
s20 dc.w lcu-unToken
s21 dc.w rcu-unToken
s22 dc.w ldo-unToken
s23 dc.w rdo-unToken
s24 dc.w plu-unToken
s25 dc.w min-unToken
s26 dc.w tim-unToken
s27 dc.w div-unToken
s28 dc.w pmi-unToken
s29 dc.w sla-unToken
s30 dc.w bsl-unToken
s31 dc.w lth-unToken
s32 dc.w gth-unToken
s33 dc.w eql-unToken
s34 dc.w lte-unToken
s35 dc.w leq-unToken
s36 dc.w gte-unToken
s37 dc.w geq-unToken
s38 dc.w deq-unToken
s39 dc.w ceq-unToken
s40 dc.w neq-unToken
s41 dc.w lgr-unToken
s42 dc.w eeq-unToken
s43 dc.w exc-unToken
s44 dc.w til-unToken
s45 dc.w com-unToken
s46 dc.w per-unToken
s47 dc.w ldq-unToken
s48 dc.w rdq-unToken
s49 dc.w lsq-unToken
s50 dc.w rsq-unToken
s51 dc.w dqu-unToken
s52 dc.w squ-unToken
s53 dc.w sco-unToken
s54 dc.w psi-unToken
s55 dc.w car-unToken
s56 dc.w uli-unToken
s57 dc.w ape-unToken
s58 dc.w ats-unToken
s59 dc.w vba-unToken
s60 dc.w qma-unToken
s61 dc.w pi-unToken
s62 dc.w sqr-unToken
s63 dc.w sig-unToken
s64 dc.w int-unToken
s65 dc.w mic-unToken
s66 dc.w cpi-unToken
s67 dc.w inf-unToken
s68 dc.w col-unToken
s69 dc.w pou-unToken
s70 dc.w dsi-unToken
s71 dc.w nsp-unToken
s72 dc.w frc-unToken ; fraction <7/21/88ldc>
s73 dc.w icr-unToken ; intl currency <7/21/88ldc>
s74 dc.w lsg-unToken ; left single guillemet
s75 dc.w rsg-unToken ; right single guillemet <7/21/88ldc>
s76 dc.w prt-unToken ; per thousands <7/21/88ldc>
s77 dc.w elp-unToken ; ellipsis <3>
s78 dc.w cdt-unToken ; center dot <3>
Macro
GenPString &str
string pascal
dc.b &str
;; dc.b 0 ; add null termination
align 2 ; make sure even aligned for next value
EndMacro
Macro
BytPString &fust
lcla &n
dc.b &nbr(&syslist)
&n seta 1
while &n <= &nbr(&syslist) do
dc.b &syslist[&n]
&n seta &n+1
endw
;; dc.b 0 ; add null termination
align 2 ; make sure even aligned for next value
EndMacro
; these should never come through
rs1 GenPString '??' ; 14 reserved1
rs2 GenPString '??' ; 15 reserved2
arn GenPString '??' ; 13 alternate real number
anm GenPString '??' ; 11 alternate number (e.g. $B0-B9)
rnm GenPString '??' ; 12 real number
lit GenPString '??' ; 09 literal
alf GenPString '??' ; 04 alphabetic
num GenPString '??' ; 05 numeric
unk GenPString '??' ; 00 unknown
;LOCALVERSION Tokens
;Below are the string representations of the normalized tokens.
;Some scripts may require different character values for the token
;strings. The Arabic question mark, for example, has the hexadecimal
;value $BF, not $3F.
;
;To represent the Arabic question mark as a token then,
;change the entry for the question mark (qma)
;
; qma GenPString '?'
;to
; qma BytPString $BF
;
;
;Note that the string macro GenPString is to be used for
;the direct ASCII values. BytPString is used to create strings
;from the raw bytes that represent the characters in the string.
;Non-US Roman systems that use the standard character set require
; no change here.
;
whi GenPString ' ' ; 01 whitespace
llt GenPString '"' ; 02 literal begin ("left literal marker")
rlt GenPString '"' ; 03 literal end ("right literal marker")
nli BytPString $0D ; 06 new line
lco GenPString '/*' ; 07 open comment ("left comment")
rco GenPString '*/' ; 08 close comment ("right comment")
esp BytPString $5C ; 10 character escape (e.g. \n, \t )
lpa GenPString '(' ; 16 open parenthesis ("left parenthesis")
rpa GenPString ')' ; 17 close parenthesis ("right parenthesis")
lbr GenPString '[' ; 18 open square bracket ("left bracket")
rbr GenPString ']' ; 19 close square bracket ("right bracket")
lcu GenPString '{' ; 20 open curly bracket ("left curly bracket")
rcu GenPString '}' ; 21 close curly bracket ("right curly bracket")
ldo GenPString '<27>' ; 22 open guillemet ("left European double quote")
rdo GenPString '<27>' ; 23 close guillemet ("right European double quote")
plu GenPString '+' ; 24 plus
min GenPString '-' ; 25 minus
tim GenPString '*' ; 26 times/multiply
div GenPString '<27>' ; 27 divide
pmi GenPString '<27>' ; 28 plus/minus
sla GenPString '/' ; 29 slash
bsl BytPString $5C ; 30 backslash
lth GenPString '<' ; 31 less than
gth GenPString '>' ; 32 greater than
eql GenPString '=' ; 33 equal
lte GenPString '<=' ; 34 less than or equal (2 symbols)
leq GenPString '<27>' ; 35 less than or equal (single symbol)
gte GenPString '>=' ; 36 greater than or equal (2 symbols)
geq GenPString '<27>' ; 37 greater than or equal (single symbol)
deq GenPString '==' ; 38 double equal
ceq GenPString ':=' ; 39 colon equal
neq GenPString '<27>' ; 40 not equal
lgr GenPString '<>' ; 41 less/greater symbol (not equal in Pascal)
eeq GenPString '!=' ; 42 exclamation equal (not equal in C)
exc GenPString '!' ; 43 exclamation point
til GenPString '~' ; 44 centered tilde (as opposed to real tilde at $F7)
com GenPString ',' ; 45 comma
per GenPString '.' ; 46 period
ldq GenPString '<27>' ; 47 open double quote ("left double quote")
rdq GenPString '<27>' ; 48 close double quote ("right double quote")
lsq GenPString '<27>' ; 49 open single quote ("left single quote")
rsq GenPString '<27>' ; 50 close single quote ("right single quote")
dqu GenPString '"' ; 51 double quote
squ BytPString $27 ; 52 single quote
sco GenPString ';' ; 53 semicolon
psi GenPString '%' ; 54 percent
car GenPString '^' ; 55 caret
uli GenPString '_' ; 56 underline
ape BytPString $26 ; 57 ampersand
ats GenPString '@' ; 58 at sign
vba GenPString '|' ; 59 vertical bar
qma GenPString '?' ; 60 question mark
pi GenPString '<27>' ; 61 pi
sqr GenPString '<27>' ; 62 square root
sig GenPString '<27>' ; 63 capital sigma
int GenPString '<27>' ; 64 integral
mic GenPString '<27>' ; 65 micro
cpi GenPString '<27>' ; 66 capital pi
inf GenPString '<27>' ; 67 infinity
col GenPString ':' ; 68 colon
pou GenPString '#' ; 69 pound sign (U.S.)
dsi GenPString '$' ; 70 dollar sign
nsp BytPString $CA ; 71 non-breaking space
frc BytPString $DA ; 72 fraction <7/21/88ldc>
icr BytPString $DB ; 73 intl currency <7/21/88ldc>
lsg BytPString $DC ; 74 open single guillemet ("left single g<>") <7/21/88ldc>
rsg BytPString $DD ; 75 close single guillemet ("right single g<>") <7/21/88ldc>
prt BytPString $E4 ; 76 per thousands <7/21/88ldc>
elp BytPString $C9 ; 77 ellipsis <3>
cdt BytPString $A5 ; 78 center dot <3>
untokenEnd
;-------------------------------------------------------------------------------------
; Default Parts Table
;
; NumberParts = RECORD
; data: ARRAY[leftQuote..maxSymbols] OF WideChar;
; PePlus, PeMinus, PeMinusPlus: WideCharArr;
; altNumTable:WideCharArr;
; END;
; leftQuote, rightQuote, leadPlacer, leader, nonLeader,
; zeroLead, percent, plusSign, minusSign, thousands,
; noRoomFill, separator, escape, decPoint <unquoteds>
;
;-------------------------------------------------------------------------------------
string asis
defaultPartsTable
dc.w 1 ; version
;LOCALVERSION NumberParts Table
;The number parts table contains two-byte values for the symbols used in
;formating number strings. Each two-byte combination represents the following
;values:
; leftQuote, rightQuote, leadPlacer, leader,
; nonLeader, zeroLead, percent, plusSign,
; minusSign, thousands, reserved, separator,
; escape, decPoint <unquoteds>
;
;The US version begins with $00, $27 for the leftQuote, $00, $27 for
;the rightQuote, and ends with the unquoted right paren, $00, ')'.
;
;Localization requires replacing these default values with those representing
;symbols required locally.
;
;For example, to modify the table to represent European number formats that
;use ',' for the decimal point and '.' for the thousands place, simply
;switch these symbols in the third and fourth rows of the table.
;
; dc.b $00, '-', $00, '.', $00, $00, $00, ';'
; dc.b $00, '\', $00, ',', $00, '(', $00, ')'
@data
dc.b $00, $27, $00, $27, $00, '^', $00, '<27>'
dc.b $00, '#', $00, '0', $00, '%', $00, '+'
dc.b $00, '-', $00, ',', $00, $00, $00, ';'
dc.b $00, '\', $00, '.', $00, '(', $00, ')'
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00
@PePlus
dc.w 1
dc.b $00, 'E', $00, '+', $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
@PeMinus
dc.w 1
dc.b $00, 'E', $00, '-', $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
@PeMinusPlus
dc.w 0
dc.b $00, 'E', $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
;LOCALVERSION alternate numbers
;alternate characters for the numbers 0..9 can be added here
;For an Arabic system for example, the Arabic digits
;(hexadecimal $B0 to B9) could be added as follows:
;@altNumTable
; dc.w 9
; dc.b $00, $B0, $00, $B1, $00, $B2, $00, $B3, $00, $B4
; dc.b $00, $B5, $00, $B6, $00, $B7, $00, $B8, $00, $B9
@altNumTable
dc.w 9
dc.b $00, $30, $00, $31, $00, $32, $00, $33, $00, $34
dc.b $00, $35, $00, $36, $00, $37, $00, $38, $00, $39
@reserved
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
defaultEnd
;-------------------------------------------------------------------------------------
; <3> Whitespace character list for HyperCard
; <6> Filled in the guts of the table.
;-------------------------------------------------------------------------------------
string pascal
whiteSpaceList ; <3>
; Header with length and count. <6>
dc.w whiteSpaceListEnd-whiteSpaceList ; length in bytes of table
dc.w 2 ; number of space characters in table
; Offsets to whitespace strings. <6>
dc.w normalSp-whiteSpaceList
dc.w tabSp-whiteSpaceList
; Now here are the strings themselves, in Pascal style. <6>
; Non-breaking spaces should NOT be included here.
normalSp GenPString ' '
tabSp BytPString $09
whiteSpaceListEnd ; <6>
EndP
End