sys7.1-doc-wip/Toolbox/ScriptMgr/itl4Roman.a
2019-07-27 22:37:48 +08:00

595 lines
21 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;
; File: itl4Roman.a
;
; Contains: Localizable tables and code for Script Managers IntlTokenize routine and formatted
; number handling routines (Str2Format, Format2Str, FormatStr2X, FormatX2Str). This is
; the U.S. version.
;
; Written by: DRS daan Strebe
; MED Mark Davis
; LDC Lee Collins
; PKE Peter Edberg
;
; Copyright: © 1987-1990, 1992 by Apple Computer, Inc., all rights reserved.
;
; Change History (most recent first):
;
; <12> 11/17/92 PKE Change includes so that we can give this file out to localizers.
; <11> 4/14/92 JSM Get rid of all SysVers conditionals.
; <10> 10/30/90 PKE (stb) Added some comments. Resource data byte-for-byte identical
; with previous version.
; <9> 9/16/90 PKE Use formerly reserved field as format code (makes resource
; easier to DeRez, etc.).
; <8> 8/29/90 PKE Put length of whiteSpaceList in corresponding header field.
; <7> 7/5/90 PKE NEEDED FOR SIXPACK: Deleted extra line in MapChar table
; accidentally introduced in change <5> - it was seriously messing
; up IntlTokenize! Removed local definition of smgrSixPack since
; it is now defined in ScriptPriv.a.
; <6> 7/2/90 PKE Added guts of whiteSpaceList.
; <5> 6/26/90 PKE NEEDED FOR SIXPACK (by HyperCard): Fix old bug in MapChar for
; char code 0 (causes IntlTokenize to crash on nulls).
; <4> 6/12/90 PKE The MapChar table should also supply tokenCenterDot for the
; small center dot with code $E1.
; <3> 5/18/90 PKE Handle new token types tokenEllipsis (used by TruncText) and
; tokenCenterDot (used by AppleShare) in MapChar and unToken
; tables. Delete truncMarker table (since we use tokenEllipsis
; instead) and replace it with whiteSpaceList (needed for
; HyperCard, but not fully implemented yet).
; <2> 3/21/90 PKE Rearranged length information to be correct in itl4 header.
; Changed version number for 7.0 from $200 to $700. Updated file
; header to BBS format.
; <1> 12/18/89 CCH Adding for the first time into BBS. Changed include 'inc.sum.a'
; to load 'StandardEqu.d'. Updated file name references.
;
; (BBS versions above, EASE versions below)
; <1.8> 9/19/89 PKE Add 8 length words to header, fill in unTokenLen and
; defaultPartsLen.
; <1.7> 9/15/89 PKE Add truncMarker for 7.0. Update version number from $100 to
; $200.
; <1.6> 8/26/89 PKE Delete load 'nEqu.d'.
; <1.5> 5/30/89 PKE Fix IntlTokenize for ≤ (BRC #46579); remove support in 6.0.4 for
; double-acute accent.
; <1.4> 3/5/89 PKE Add ß,ø,Ø as letters for tokenizer; added support for char set
; extensions (beyond $D8): letters, some floating accents as
; letters, plus ‰ as new symbols returning unique types.
; <1.3> 2/21/89 PKE Fix up includes: always include 'inc.sum.a'.
; <1.2> 2/14/89 PKE Updated to use MPW 3.0 final ScriptEqu.a equate names
; <1.1> 1/16/89 CCH Merged 6.0.3 final sources into 7.0.
; <1.0> 11/16/88 CCH Added to EASE.
; (pre-EASE versions below)
; 9/23/88 ldc Fixed incorrect use of untokenizing macros (7.0)
; 7/21/88 ldc Added char extensions beyond $D8 (chars in Adobe font) (7.0)
; 4/7/88 ldc Added localization instructions (7.0)
; 3/19/88 med Dropped length from numberparts table: it is constant size
; 3/19/88 med Changed untokenize table: used word offsets, added missing
; values
; 3/19/88 ldc removed '&' from numberparts table
; 3/14/88 ldc added version to numberparts table
; 2/8/88 med Added alternate numbers, removed all but () from unquoteds
; 1/4/88 med Added new version from Dan (v. 1.0.A)
; 6/19/87 daan Strebe
;
;___________________________________________________________________________________________________
;--------------------------------------------------------------------------
;
;THIS FILE IS THE ONLY PART OF THE TOKENIZER THAT SHOULD CHANGE BETWEEN SISs
;
;LOCALIZATION INSTRUCTIONS:
;Select and find (Command+h) the string LOCALVERSION for instructions about modifying
;this resource for local versions.
;
;After changes have been made, you can assemble and link the modified itl4Roman.a
;with the following MPW commands:
;
;(note: this example uses French--country code = 1--as an example. The complete list of
;country codes may be found in the International Utilities chapter of Inside Macintosh,
;volume I)
;
;
;set myCountry 1
;asm itl4Roman.a -o itl4Roman.a.o -i "IncludeFilePath"
;link itl4Roman.a.o -o itl4Roman.code -rt itl4={myCountry} -t 'none' -c 'none' -sg "Roman"
;
;(note: this assumes any include files such as inc.sum.d and nEqu.d for
;itl4Roman.a are on a directory in the "IncludeFilePath")
;
;After linking, you can use resedit to copy the itl4 resource from itl4Roman.code into
;the system file.
;
;
;--------------------------------------------------------------------------
;LOCALVERSION xxxToken
;Non-US Roman systems require no change to use the procedure USToken.
;Other scripts such as Arabic and Japanese require entirely different local
;versions of the xxxToken procedure. For details, see the implementation notes
;for the StringCopy procedure used by the function IntlTokenize in ScriptMgrUtilText.a
;
;
EXPORT USToken
; new includes <12>
include 'Script.a'
include 'IntlResourcesPriv.a'
; old includes:
;; load 'StandardEqu.d'
;; include 'ScriptPriv.a'
USToken Proc
WITH TokenRec,tokenBlock,TokenResults
string asis
itl4Id equ 0 ; itl4 resource ID for US <9>
itl4Version equ $0700 ; version number <1.7><2><9>
itl4Format equ 1 ; has extended header, etc. <9>
start
dc.w 0 ;flags word
dc.l 'itl4' ;resource type
dc.w itl4Id ;resource ID number <9>
dc.w itl4Version ;version number <1.7><2><9>
dc.w itl4Format ;format <9>
dc.w 0 ;reserved <9>
dc.l 0 ;reserved, for 18 bytes total header
dc.w 8 ;# of table entries
offsets
t1 dc.l MapChar - start
t2 dc.l StringCopy - start
t3 dc.l extendFetch - start
t4 dc.l unToken - start
t5 dc.l defaultPartsTable - start
t6 dc.l whiteSpaceList - start ; <3>
dc.l 0
dc.l 0
dc.w 0 ; resLength1
dc.w 0 ; resLength2
dc.w 0 ; resLength3
dc.w unTokenEnd - unToken ; unTokenLength <2>
dc.w defaultEnd - defaultPartsTable ; defPartsLength <2>
dc.w whiteSpaceListEnd-whiteSpaceList ; whtSpListLength <2><8>
dc.w 0 ; resLength7
dc.w 0 ; resLength8
;-------------------------------------------------------------------------------------
;The following two values are referenced from MapChar and MUST NOT BE MOVED.
dc.l 0 ;emergency exit address for extendFetch
dc.l extendFetch - MapChar
MapChar
dc.b 00,00,00,00,00,00,00,00,00,01,00,00,00,06,00,00 ; $00-0F Fix for nulls <5>
dc.b 00,00,00,00,00,00,00,00,00,00,00,00,00,00,00,00 ; $10-1F
dc.b 01,43,51,69,70,54,57,52,16,17,26,24,45,25,46,29 ; $20-2F
dc.b 05,05,05,05,05,05,05,05,05,05,68,53,31,33,32,60 ; $30-3F
dc.b 58,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $40-4F
dc.b 04,04,04,04,04,04,04,04,04,04,04,18,30,19,55,56 ; $50-5F
dc.b 00,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $60-6F
dc.b 04,04,04,04,04,04,04,04,04,04,04,20,59,21,44,00 ; $70-7F
dc.b 04,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $80-8F
dc.b 04,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; $90-9F
dc.b 00,00,00,00,00,78,00,04,00,00,00,00,00,40,04,04 ; $A0-AF <3>
dc.b 67,28,35,37,00,65,00,63,66,61,64,00,00,00,04,04 ; $B0-BF
dc.b 00,00,00,62,00,00,00,22,23,77,71,04,04,04,04,04 ; $C0-CF <3>
dc.b 00,00,47,48,49,50,27,00,04,04,72,73,74,75,04,04 ; $D0-DF
dc.b 00,78,00,00,76,04,04,04,04,04,04,04,04,04,04,04 ; $E0-EF <4>
dc.b 00,04,04,04,04,04,04,04,04,04,04,04,04,04,04,04 ; No  <03/05/89 pke>
;-------------------------------------------------------------------------------------
extendFetch
stop #0 ;it's "impossible" to get here in single byte scripts
;-------------------------------------------------------------------------------------
StringCopy
loopC EQU ld
myToken EQU holdA
myDest EQU srcA
mySource EQU lastA
limit EQU dTab
ch EQU lc2
move.l stringList(pBlok),myDest ;get address of string list
move.l tokenCount(pBlok),loopC ;set up loop counter
beq.s @out
subq.l #1,loopC
tst.b doAppend(pBlok) ;offset the string address?
beq.s @kel
add.l stringCount(pBlok),myDest ;effective end of string
@kel
move.l magicStackTokenEnd(sp),myToken ;address of token list
move.l myDest,limit ;compute end address of string list
adda.l stringLength(pBlok),limit
@loop
move.l myDest,stringPosition(myToken) ;address of string in string list
movea.l position(myToken),mySource ;address of source string
move.l length(myToken),temp ;length of string
moveq.l #0,temp1 ;clean out upper bytes
move.b temp,temp1
add.l myDest,temp1 ;what's the limit?
cmp.l limit,temp1 ;are we gonna run out of room?
bpl.s stringERROR ;uhoh
move.b temp,(myDest)+ ;make Pascal string length
beq.s @normalize ;empty string
subq.b #1,temp ;the bane of computer novices
move.w theToken(myToken),temp1
cmp.w #12,temp1 ;have to translate the decimal mark
beq.s @realLoop
@subloop
move.b (mySource)+,(myDest)+ ;copy that byte
dbra temp,@subloop ;high-tail it back if we can
bra.s @normalize
@realLoop
move.b (mySource)+,ch
cmp.b #'0',ch
blt.s @decimal
cmp.b #'9',ch
ble.s @cont2
@decimal
move.b #'.',ch
@cont2
move.b ch,(myDest)+
dbra temp,@realLoop
@normalize
move.b #0,(myDest)+ ;null terminate that baby
move.l myDest,temp ;make sure myDest is even-boundaried
addq.l #1,temp
bclr #0,temp
move.l temp,myDest ;all tidied up
add.w #tokenRecSize,myToken ;advance to next record
dbra loopC,@loop
@out
sub.l stringList(pBlok),myDest ;compute StringCount
move.l myDest,stringCount(pBlok) ;and save it
rts
stringERROR
move.w #stringOverflow,magicStackTokenEnd(sp) ;modify error parameter
rts
ENDWITH
;-------------------------------------------------------------------------------------
;UNTOKENIZE strings
;
; For 7.0, we must have tokens through tokenCenterDot (78), because AppleShare gets
; center dot from here and ReplaceText gets ellipsis from here.
;-------------------------------------------------------------------------------------
unToken
dc.w unTokenEnd-* ; length in bytes of table
dc.w 78 ;last code whose string equivalent appears here <3>
s0 dc.w unk-unToken
s1 dc.w whi-unToken
s2 dc.w llt-unToken
s3 dc.w rlt-unToken
s4 dc.w alf-unToken
s5 dc.w num-unToken
s6 dc.w nli-unToken
s7 dc.w lco-unToken
s8 dc.w rco-unToken
s9 dc.w lit-unToken
s10 dc.w esp-unToken
s11 dc.w anm-unToken
s12 dc.w rnm-unToken
s13 dc.w arn-unToken
s14 dc.w rs1-unToken
s15 dc.w rs2-unToken
s16 dc.w lpa-unToken
s17 dc.w rpa-unToken
s18 dc.w lbr-unToken
s19 dc.w rbr-unToken
s20 dc.w lcu-unToken
s21 dc.w rcu-unToken
s22 dc.w ldo-unToken
s23 dc.w rdo-unToken
s24 dc.w plu-unToken
s25 dc.w min-unToken
s26 dc.w tim-unToken
s27 dc.w div-unToken
s28 dc.w pmi-unToken
s29 dc.w sla-unToken
s30 dc.w bsl-unToken
s31 dc.w lth-unToken
s32 dc.w gth-unToken
s33 dc.w eql-unToken
s34 dc.w lte-unToken
s35 dc.w leq-unToken
s36 dc.w gte-unToken
s37 dc.w geq-unToken
s38 dc.w deq-unToken
s39 dc.w ceq-unToken
s40 dc.w neq-unToken
s41 dc.w lgr-unToken
s42 dc.w eeq-unToken
s43 dc.w exc-unToken
s44 dc.w til-unToken
s45 dc.w com-unToken
s46 dc.w per-unToken
s47 dc.w ldq-unToken
s48 dc.w rdq-unToken
s49 dc.w lsq-unToken
s50 dc.w rsq-unToken
s51 dc.w dqu-unToken
s52 dc.w squ-unToken
s53 dc.w sco-unToken
s54 dc.w psi-unToken
s55 dc.w car-unToken
s56 dc.w uli-unToken
s57 dc.w ape-unToken
s58 dc.w ats-unToken
s59 dc.w vba-unToken
s60 dc.w qma-unToken
s61 dc.w pi-unToken
s62 dc.w sqr-unToken
s63 dc.w sig-unToken
s64 dc.w int-unToken
s65 dc.w mic-unToken
s66 dc.w cpi-unToken
s67 dc.w inf-unToken
s68 dc.w col-unToken
s69 dc.w pou-unToken
s70 dc.w dsi-unToken
s71 dc.w nsp-unToken
s72 dc.w frc-unToken ; fraction <7/21/88ldc>
s73 dc.w icr-unToken ; intl currency <7/21/88ldc>
s74 dc.w lsg-unToken ; left single guillemet
s75 dc.w rsg-unToken ; right single guillemet <7/21/88ldc>
s76 dc.w prt-unToken ; per thousands <7/21/88ldc>
s77 dc.w elp-unToken ; ellipsis <3>
s78 dc.w cdt-unToken ; center dot <3>
Macro
GenPString &str
string pascal
dc.b &str
;; dc.b 0 ; add null termination
align 2 ; make sure even aligned for next value
EndMacro
Macro
BytPString &fust
lcla &n
dc.b &nbr(&syslist)
&n seta 1
while &n <= &nbr(&syslist) do
dc.b &syslist[&n]
&n seta &n+1
endw
;; dc.b 0 ; add null termination
align 2 ; make sure even aligned for next value
EndMacro
; these should never come through
rs1 GenPString '??' ; 14 reserved1
rs2 GenPString '??' ; 15 reserved2
arn GenPString '??' ; 13 alternate real number
anm GenPString '??' ; 11 alternate number (e.g. $B0-B9)
rnm GenPString '??' ; 12 real number
lit GenPString '??' ; 09 literal
alf GenPString '??' ; 04 alphabetic
num GenPString '??' ; 05 numeric
unk GenPString '??' ; 00 unknown
;LOCALVERSION Tokens
;Below are the string representations of the normalized tokens.
;Some scripts may require different character values for the token
;strings. The Arabic question mark, for example, has the hexadecimal
;value $BF, not $3F.
;
;To represent the Arabic question mark as a token then,
;change the entry for the question mark (qma)
;
; qma GenPString '?'
;to
; qma BytPString $BF
;
;
;Note that the string macro GenPString is to be used for
;the direct ASCII values. BytPString is used to create strings
;from the raw bytes that represent the characters in the string.
;Non-US Roman systems that use the standard character set require
; no change here.
;
whi GenPString ' ' ; 01 whitespace
llt GenPString '"' ; 02 literal begin ("left literal marker")
rlt GenPString '"' ; 03 literal end ("right literal marker")
nli BytPString $0D ; 06 new line
lco GenPString '/*' ; 07 open comment ("left comment")
rco GenPString '*/' ; 08 close comment ("right comment")
esp BytPString $5C ; 10 character escape (e.g. \n, \t )
lpa GenPString '(' ; 16 open parenthesis ("left parenthesis")
rpa GenPString ')' ; 17 close parenthesis ("right parenthesis")
lbr GenPString '[' ; 18 open square bracket ("left bracket")
rbr GenPString ']' ; 19 close square bracket ("right bracket")
lcu GenPString '{' ; 20 open curly bracket ("left curly bracket")
rcu GenPString '}' ; 21 close curly bracket ("right curly bracket")
ldo GenPString '«' ; 22 open guillemet ("left European double quote")
rdo GenPString '»' ; 23 close guillemet ("right European double quote")
plu GenPString '+' ; 24 plus
min GenPString '-' ; 25 minus
tim GenPString '*' ; 26 times/multiply
div GenPString '÷' ; 27 divide
pmi GenPString '±' ; 28 plus/minus
sla GenPString '/' ; 29 slash
bsl BytPString $5C ; 30 backslash
lth GenPString '<' ; 31 less than
gth GenPString '>' ; 32 greater than
eql GenPString '=' ; 33 equal
lte GenPString '<=' ; 34 less than or equal (2 symbols)
leq GenPString '≤' ; 35 less than or equal (single symbol)
gte GenPString '>=' ; 36 greater than or equal (2 symbols)
geq GenPString '≥' ; 37 greater than or equal (single symbol)
deq GenPString '==' ; 38 double equal
ceq GenPString ':=' ; 39 colon equal
neq GenPString '≠' ; 40 not equal
lgr GenPString '<>' ; 41 less/greater symbol (not equal in Pascal)
eeq GenPString '!=' ; 42 exclamation equal (not equal in C)
exc GenPString '!' ; 43 exclamation point
til GenPString '~' ; 44 centered tilde (as opposed to real tilde at $F7)
com GenPString ',' ; 45 comma
per GenPString '.' ; 46 period
ldq GenPString '“' ; 47 open double quote ("left double quote")
rdq GenPString '”' ; 48 close double quote ("right double quote")
lsq GenPString '' ; 49 open single quote ("left single quote")
rsq GenPString '' ; 50 close single quote ("right single quote")
dqu GenPString '"' ; 51 double quote
squ BytPString $27 ; 52 single quote
sco GenPString ';' ; 53 semicolon
psi GenPString '%' ; 54 percent
car GenPString '^' ; 55 caret
uli GenPString '_' ; 56 underline
ape BytPString $26 ; 57 ampersand
ats GenPString '@' ; 58 at sign
vba GenPString '|' ; 59 vertical bar
qma GenPString '?' ; 60 question mark
pi GenPString 'π' ; 61 pi
sqr GenPString '√' ; 62 square root
sig GenPString '∑' ; 63 capital sigma
int GenPString '∫' ; 64 integral
mic GenPString 'µ' ; 65 micro
cpi GenPString '∏' ; 66 capital pi
inf GenPString '∞' ; 67 infinity
col GenPString ':' ; 68 colon
pou GenPString '#' ; 69 pound sign (U.S.)
dsi GenPString '$' ; 70 dollar sign
nsp BytPString $CA ; 71 non-breaking space
frc BytPString $DA ; 72 fraction <7/21/88ldc>
icr BytPString $DB ; 73 intl currency <7/21/88ldc>
lsg BytPString $DC ; 74 open single guillemet ("left single g…") <7/21/88ldc>
rsg BytPString $DD ; 75 close single guillemet ("right single g…") <7/21/88ldc>
prt BytPString $E4 ; 76 per thousands <7/21/88ldc>
elp BytPString $C9 ; 77 ellipsis <3>
cdt BytPString $A5 ; 78 center dot <3>
untokenEnd
;-------------------------------------------------------------------------------------
; Default Parts Table
;
; NumberParts = RECORD
; data: ARRAY[leftQuote..maxSymbols] OF WideChar;
; PePlus, PeMinus, PeMinusPlus: WideCharArr;
; altNumTable:WideCharArr;
; END;
; leftQuote, rightQuote, leadPlacer, leader, nonLeader,
; zeroLead, percent, plusSign, minusSign, thousands,
; noRoomFill, separator, escape, decPoint <unquoteds>
;
;-------------------------------------------------------------------------------------
string asis
defaultPartsTable
dc.w 1 ; version
;LOCALVERSION NumberParts Table
;The number parts table contains two-byte values for the symbols used in
;formating number strings. Each two-byte combination represents the following
;values:
; leftQuote, rightQuote, leadPlacer, leader,
; nonLeader, zeroLead, percent, plusSign,
; minusSign, thousands, reserved, separator,
; escape, decPoint <unquoteds>
;
;The US version begins with $00, $27 for the leftQuote, $00, $27 for
;the rightQuote, and ends with the unquoted right paren, $00, ')'.
;
;Localization requires replacing these default values with those representing
;symbols required locally.
;
;For example, to modify the table to represent European number formats that
;use ',' for the decimal point and '.' for the thousands place, simply
;switch these symbols in the third and fourth rows of the table.
;
; dc.b $00, '-', $00, '.', $00, $00, $00, ';'
; dc.b $00, '\', $00, ',', $00, '(', $00, ')'
@data
dc.b $00, $27, $00, $27, $00, '^', $00, ' '
dc.b $00, '#', $00, '0', $00, '%', $00, '+'
dc.b $00, '-', $00, ',', $00, $00, $00, ';'
dc.b $00, '\', $00, '.', $00, '(', $00, ')'
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00
@PePlus
dc.w 1
dc.b $00, 'E', $00, '+', $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
@PeMinus
dc.w 1
dc.b $00, 'E', $00, '-', $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
@PeMinusPlus
dc.w 0
dc.b $00, 'E', $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
;LOCALVERSION alternate numbers
;alternate characters for the numbers 0..9 can be added here
;For an Arabic system for example, the Arabic digits
;(hexadecimal $B0 to B9) could be added as follows:
;@altNumTable
; dc.w 9
; dc.b $00, $B0, $00, $B1, $00, $B2, $00, $B3, $00, $B4
; dc.b $00, $B5, $00, $B6, $00, $B7, $00, $B8, $00, $B9
@altNumTable
dc.w 9
dc.b $00, $30, $00, $31, $00, $32, $00, $33, $00, $34
dc.b $00, $35, $00, $36, $00, $37, $00, $38, $00, $39
@reserved
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
dc.b $00, $00, $00, $00, $00, $00, $00, $00, $00, $00
defaultEnd
;-------------------------------------------------------------------------------------
; <3> Whitespace character list for HyperCard
; <6> Filled in the guts of the table.
;-------------------------------------------------------------------------------------
string pascal
whiteSpaceList ; <3>
; Header with length and count. <6>
dc.w whiteSpaceListEnd-whiteSpaceList ; length in bytes of table
dc.w 2 ; number of space characters in table
; Offsets to whitespace strings. <6>
dc.w normalSp-whiteSpaceList
dc.w tabSp-whiteSpaceList
; Now here are the strings themselves, in Pascal style. <6>
; Non-breaking spaces should NOT be included here.
normalSp GenPString ' '
tabSp BytPString $09
whiteSpaceListEnd ; <6>
EndP
End