Compare commits
93 Commits
2768a0c856
...
cb701d0796
Author | SHA1 | Date | |
---|---|---|---|
|
cb701d0796 | ||
|
59e3376f8c | ||
|
1396fb1cbf | ||
|
1f1407bb0e | ||
|
37929beede | ||
|
c1862532b8 | ||
|
4a99288be2 | ||
|
4cf55bffbc | ||
|
08e5f10ce4 | ||
|
f8c84387ca | ||
|
8514bbfff6 | ||
|
b1bcae531b | ||
|
56a3983129 | ||
|
3398ed425c | ||
|
5f1b8e872b | ||
|
9224a9b789 | ||
|
873192318f | ||
|
2be29e4b87 | ||
|
6bdaaee963 | ||
|
def1c2c253 | ||
|
a4ce2924db | ||
|
4d4f986e38 | ||
|
76362fb2c1 | ||
|
01a3863697 | ||
|
d5bfaa510a | ||
|
8e04863cda | ||
|
0ba00114d6 | ||
|
ec3c80360f | ||
|
61e4168fbc | ||
|
3f6acc9f42 | ||
|
054a4134e6 | ||
|
6844e5a271 | ||
|
337fd52125 | ||
|
2b5f5a2425 | ||
|
d81cce155d | ||
|
3bf095fb49 | ||
|
ad220442d7 | ||
|
4d4ee214b3 | ||
|
dd96385d7c | ||
|
75f1908690 | ||
|
2f644a8674 | ||
|
0ca2cd7e6a | ||
|
45b93f5300 | ||
|
c7147b2737 | ||
|
e5e7749d2e | ||
|
3d7537f7f6 | ||
|
68665d1fff | ||
|
36ab91617c | ||
|
39ebe32975 | ||
|
4b20b30330 | ||
|
3dcda5579e | ||
|
7ee3dd3d99 | ||
|
940614c032 | ||
|
70abe95ff1 | ||
|
56c8229dd5 | ||
|
c4bf86786e | ||
|
d980169ec6 | ||
|
16f3764790 | ||
|
8e2efa707e | ||
|
617bf831d8 | ||
|
66d46452ab | ||
|
ee685ec9ea | ||
|
ecc7b30844 | ||
|
5ad207f785 | ||
|
6267606293 | ||
|
651a2fb674 | ||
|
d6cb7935fe | ||
|
bc46e1e982 | ||
|
491950e57d | ||
|
7e7a6a9da2 | ||
|
798d5eca5c | ||
|
c96bb9cd0a | ||
|
612aad382f | ||
|
43cb03c572 | ||
|
6ec3549cd7 | ||
|
a4d2de2120 | ||
|
f2317d0ce7 | ||
|
094f2c5016 | ||
|
3d8bf3721e | ||
|
3f1b059de2 | ||
|
7c5699b508 | ||
|
f94a65d245 | ||
|
f49e81388c | ||
|
ca074ce387 | ||
|
e901b5baaa | ||
|
022d11fa14 | ||
|
7bc44f4a5a | ||
|
8713cb96aa | ||
|
faa2460468 | ||
|
d2401b963a | ||
|
db4187b65b | ||
|
d77ccf009e | ||
|
f39ab85222 |
14
.bazelrc
Normal file
|
@ -0,0 +1,14 @@
|
|||
build --define=warnings=off
|
||||
|
||||
build:asan --strip=never
|
||||
build:asan --copt=-fsanitize=address
|
||||
build:asan --copt=-g
|
||||
build:asan --copt=-O1
|
||||
build:asan --copt=-fno-omit-frame-pointer
|
||||
build:asan --linkopt=-fsanitize=address
|
||||
|
||||
try-import %workspace%/.user.bazelrc
|
||||
|
||||
build --copt=-fdiagnostics-color
|
||||
build --host_copt=-fdiagnostics-color
|
||||
build --host_copt=-UNDEBUG
|
|
@ -20,3 +20,4 @@ SpacesBeforeTrailingComments: 1
|
|||
Standard: Cpp11
|
||||
TabWidth: 4
|
||||
UseTab: AlignWithSpaces
|
||||
BreakBeforeBraces: Linux
|
||||
|
|
7
.gitignore
vendored
|
@ -1 +1,6 @@
|
|||
/convert_test
|
||||
/.user.bazelrc
|
||||
/bazel-*
|
||||
/gh-pages
|
||||
/.ccls-cache
|
||||
*.xcworkspace
|
||||
xcuserdata
|
||||
|
|
7
.header
Normal file
|
@ -0,0 +1,7 @@
|
|||
width = 80
|
||||
tabsize = 4
|
||||
guards = true
|
||||
copyright_notice = <<EOF
|
||||
This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
EOF
|
12
.vscode/c_cpp_properties.json
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": ["${workspaceFolder}"],
|
||||
"cStandard": "c11",
|
||||
"intelliSenseMode": "linux-clang-x64",
|
||||
"compilerPath": "/usr/bin/clang"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
19
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"editor.rulers": [80],
|
||||
"editor.detectIndentation": false,
|
||||
"editor.tabSize": 4,
|
||||
"editor.insertSpaces": false,
|
||||
"files.associations": {
|
||||
"*.h": "c"
|
||||
},
|
||||
"[starlark]": {
|
||||
"editor.insertSpaces": true
|
||||
},
|
||||
"[json]": {
|
||||
"editor.insertSpaces": true,
|
||||
"editor.tabSize": 2
|
||||
},
|
||||
"[python]": {
|
||||
"editor.insertSpaces": true
|
||||
}
|
||||
}
|
3
BUILD.bazel
Normal file
|
@ -0,0 +1,3 @@
|
|||
load("@bazel_gazelle//:def.bzl", "gazelle")
|
||||
|
||||
gazelle(name = "gazelle")
|
16
Formats.md
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Table Formats
|
||||
|
||||
Each character map table starts with a single nonzero byte, indicating the table's format.
|
||||
|
||||
## Extended ASCII
|
||||
|
||||
Format 1 is for "extended ASCII". Encoded values 0-127 are identical to ASCII, and encoded values 128-255 are mapped to single Unicode characters.
|
||||
|
||||
The table contains 128 entries, for encoded values 128-255, with the following format:
|
||||
|
||||
u8 Length of Unicode character
|
||||
u8[] Unicode character, UTF-8
|
||||
u8 Length of normalized Unicode character, may be zero
|
||||
u8[] Unicode character in NFD normal form, UTF-8
|
||||
|
||||
The second copy of the character is only present if the character decomposes into multiple characters.
|
386
LICENSE.txt
|
@ -1,19 +1,373 @@
|
|||
Copyright 2021 Dietrich Epp
|
||||
Mozilla Public License Version 2.0
|
||||
==================================
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
1. Definitions
|
||||
--------------
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
1.1. "Contributor"
|
||||
means each individual or legal entity that creates, contributes to
|
||||
the creation of, or owns Covered Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
1.2. "Contributor Version"
|
||||
means the combination of the Contributions of others (if any) used
|
||||
by a Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
means Source Code Form to which the initial Contributor has attached
|
||||
the notice in Exhibit A, the Executable Form of such Source Code
|
||||
Form, and Modifications of such Source Code Form, in each case
|
||||
including portions thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
(a) that the initial Contributor has attached the notice described
|
||||
in Exhibit B to the Covered Software; or
|
||||
|
||||
(b) that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the
|
||||
terms of a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
means having the right to grant, to the maximum extent possible,
|
||||
whether at the time of the initial grant or subsequently, any and
|
||||
all of the rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
means any of the following:
|
||||
|
||||
(a) any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered
|
||||
Software; or
|
||||
|
||||
(b) any new file in Source Code Form that contains any Covered
|
||||
Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the
|
||||
License, by the making, using, selling, offering for sale, having
|
||||
made, import, or transfer of either its Contributions or its
|
||||
Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
means either the GNU General Public License, Version 2.0, the GNU
|
||||
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||
Public License, Version 3.0, or any later versions of those
|
||||
licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that
|
||||
controls, is controlled by, or is under common control with You. For
|
||||
purposes of this definition, "control" means (a) the power, direct
|
||||
or indirect, to cause the direction or management of such entity,
|
||||
whether by contract or otherwise, or (b) ownership of more than
|
||||
fifty percent (50%) of the outstanding shares or beneficial
|
||||
ownership of such entity.
|
||||
|
||||
2. License Grants and Conditions
|
||||
--------------------------------
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
(a) under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||
for sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
(a) for any code that a Contributor has removed from Covered Software;
|
||||
or
|
||||
|
||||
(b) for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights
|
||||
to grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||
in Section 2.1.
|
||||
|
||||
3. Responsibilities
|
||||
-------------------
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
(a) such Covered Software must also be made available in Source Code
|
||||
Form, as described in Section 3.1, and You must inform recipients of
|
||||
the Executable Form how they can obtain a copy of such Source Code
|
||||
Form by reasonable means in a timely manner, at a charge no more
|
||||
than the cost of distribution to the recipient; and
|
||||
|
||||
(b) You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter
|
||||
the recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty,
|
||||
or limitations of liability) contained within the Source Code Form of
|
||||
the Covered Software, except that You may alter any license notices to
|
||||
the extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
---------------------------------------------------
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this
|
||||
License with respect to some or all of the Covered Software due to
|
||||
statute, judicial order, or regulation then You must: (a) comply with
|
||||
the terms of this License to the maximum extent possible; and (b)
|
||||
describe the limitations and the code they affect. Such description must
|
||||
be placed in a text file included with all distributions of the Covered
|
||||
Software under this License. Except to the extent prohibited by statute
|
||||
or regulation, such description must be sufficiently detailed for a
|
||||
recipient of ordinary skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
--------------
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically
|
||||
if You fail to comply with any of its terms. However, if You become
|
||||
compliant, then the rights granted under this License from a particular
|
||||
Contributor are reinstated (a) provisionally, unless and until such
|
||||
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||
ongoing basis, if such Contributor fails to notify You of the
|
||||
non-compliance by some reasonable means prior to 60 days after You have
|
||||
come back into compliance. Moreover, Your grants from a particular
|
||||
Contributor are reinstated on an ongoing basis if such Contributor
|
||||
notifies You of the non-compliance by some reasonable means, this is the
|
||||
first time You have received notice of non-compliance with this License
|
||||
from such Contributor, and You become compliant prior to 30 days after
|
||||
Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||
end user license agreements (excluding distributors and resellers) which
|
||||
have been validly granted by You or Your distributors under this License
|
||||
prior to termination shall survive termination.
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 6. Disclaimer of Warranty *
|
||||
* ------------------------- *
|
||||
* *
|
||||
* Covered Software is provided under this License on an "as is" *
|
||||
* basis, without warranty of any kind, either expressed, implied, or *
|
||||
* statutory, including, without limitation, warranties that the *
|
||||
* Covered Software is free of defects, merchantable, fit for a *
|
||||
* particular purpose or non-infringing. The entire risk as to the *
|
||||
* quality and performance of the Covered Software is with You. *
|
||||
* Should any Covered Software prove defective in any respect, You *
|
||||
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||
* essential part of this License. No use of any Covered Software is *
|
||||
* authorized under this License except under this disclaimer. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 7. Limitation of Liability *
|
||||
* -------------------------- *
|
||||
* *
|
||||
* Under no circumstances and under no legal theory, whether tort *
|
||||
* (including negligence), contract, or otherwise, shall any *
|
||||
* Contributor, or anyone who distributes Covered Software as *
|
||||
* permitted above, be liable to You for any direct, indirect, *
|
||||
* special, incidental, or consequential damages of any character *
|
||||
* including, without limitation, damages for lost profits, loss of *
|
||||
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||
* and all other commercial damages or losses, even if such party *
|
||||
* shall have been informed of the possibility of such damages. This *
|
||||
* limitation of liability shall not apply to liability for death or *
|
||||
* personal injury resulting from such party's negligence to the *
|
||||
* extent applicable law prohibits such limitation. Some *
|
||||
* jurisdictions do not allow the exclusion or limitation of *
|
||||
* incidental or consequential damages, so this exclusion and *
|
||||
* limitation may not apply to You. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
8. Litigation
|
||||
-------------
|
||||
|
||||
Any litigation relating to this License may be brought only in the
|
||||
courts of a jurisdiction where the defendant maintains its principal
|
||||
place of business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions.
|
||||
Nothing in this Section shall prevent a party's ability to bring
|
||||
cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
----------------
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides
|
||||
that the language of a contract shall be construed against the drafter
|
||||
shall not be used to construe this License against a Contributor.
|
||||
|
||||
10. Versions of the License
|
||||
---------------------------
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses
|
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
-------------------------------------------
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular
|
||||
file, then You may include the notice in a location (such as a LICENSE
|
||||
file in a relevant directory) where a recipient would be likely to look
|
||||
for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
---------------------------------------------------------
|
||||
|
||||
This Source Code Form is "Incompatible With Secondary Licenses", as
|
||||
defined by the Mozilla Public License, v. 2.0.
|
||||
|
|
72
README.md
|
@ -1,71 +1,27 @@
|
|||
# SyncFiles
|
||||
|
||||
SyncFiles is a tool for MPW (Macintosh Programmer’s Workshop) which synchronizes files between a Macintosh and Unix system. It is used to copy files between a classic Macintosh development environment (e.g. MPW running on Mac System 7) and a modern Unix environment (e.g. a Basilisk II host system or an AppleShare volume).
|
||||
SyncFiles is being written, it is currently unusable.
|
||||
|
||||
## What SyncFiles Does
|
||||
SyncFiles is a tool which synchronizes files between classic Mac OS systems and modern systems. You can use it to copy files between a classic Macintosh development environment (e.g. MPW or CodeWarrior running on Mac System 7) and a modern Unix environment (e.g. a Basilisk II host system or AppleShare volume). SyncFiles will convert line endings, convert character encodings, preserve resource forks, and set file type and creator codes.
|
||||
|
||||
- By default, only copies files which are _newer_ than the destination file (unless `-force` is specified). This means that your classic Macintosh’s system should have the clock set correctly!
|
||||
## Sharing Files
|
||||
|
||||
- Sets the modification timestamp of the destination file to match the timestamp of the source file.
|
||||
You still have to figure out how to transfer files.
|
||||
|
||||
- Only synchronizes files which match hard-coded patterns.
|
||||
- Networked Macs running Mac OS 9 or earlier: Use [Netatalk][netatalk] to run a AFP file server on a different computer on the network. Connect to the file server using the Chooser. Use SyncFiles to transfer files between the local hard drive and the file server. Note that Netatalk may require special configuration in order to work with old Macs.
|
||||
|
||||
- Converts text files to UTF-8 and LF line endings for Unix systems; converts to Mac OS Roman and CR line endings for Macintosh systems. XML files are not re-encoded.
|
||||
- Networked Macs running Mac OS X: In addition to using AFP, you can also use a Samba file server. This will be easier to set up than Netatalk. Use SyncFiles to transfer files between the local hard drive and the file server. There is a command-line version of SyncFiles for Mac OS X.
|
||||
|
||||
- For resource files, converts by copying the Macintosh resource fork to the data fork.
|
||||
- [Basilisk II][basiliskii] or [SheepShaver][sheepshaver] virtual machines: Use SyncFiles to transfer files between the VM and the host using the “host directory tree”. This requires either running Mac OS 7.6 or an older version of Mac OS with the “File System Manager 1.2” extension.
|
||||
|
||||
- Sets the file type and creator code, creating MPW Shell text files and ResEdit resource files.
|
||||
- Other virtual machines: [Mini vMac][minivmac], [QEMU][qemu]: Unknown.
|
||||
|
||||
## File Patterns
|
||||
|
||||
Copies files named Makefile, and files with the following extensions:
|
||||
|
||||
- C: `.c` `.h`
|
||||
|
||||
- C++: `.cc` `.cp` `.cpp` `.cxx` `.hh` `.hpp` `.hxx`
|
||||
|
||||
- Plain text: `.txt`
|
||||
|
||||
- Resource: `.rsrc`
|
||||
|
||||
- XML: `.xml` (CR-LF conversion only, no encoding conversion)
|
||||
|
||||
## Usage
|
||||
|
||||
Operates in push or pull mode. The tool runs from inside the classic Macintosh environment, so the “push” mode copies from Macintosh to Unix, and the “pull” mode copies from Unix to Macintosh. It is assumed that the Macintosh directory is on a normal disk volume.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
To push files from the current directory,
|
||||
|
||||
```
|
||||
SyncFiles <DestPath> -push
|
||||
```
|
||||
|
||||
To pull files from the current directory,
|
||||
|
||||
```
|
||||
SyncFiles <DestPath> -pull
|
||||
```
|
||||
|
||||
### Other Flags
|
||||
|
||||
- `-verbose`: Print lots of boring messages.
|
||||
|
||||
- `-quiet`: Print only errors and warnings.
|
||||
|
||||
- `-force`: Ignore timestamps, copy all source files to destination.
|
||||
|
||||
- `-dry-run`: Perform no actions, just print out what would be done.
|
||||
|
||||
- `-dir`: Specify an alternat Macintosh directory to push from or pull to. By default, pushes from and pulls to the current directory.
|
||||
|
||||
- `-delete`: Delete files in destination which are missing from source.
|
||||
|
||||
## Testing
|
||||
|
||||
Run `sh test.sh` to test the text conversion code.
|
||||
[netatalk]: https://netatalk.sourceforge.io/
|
||||
[basiliskii]: https://basilisk.cebix.net/
|
||||
[sheepshaver]: https://sheepshaver.cebix.net/
|
||||
[minivmac]: https://www.gryphel.com/c/minivmac/
|
||||
[qemu]: https://www.qemu.org/
|
||||
|
||||
## License
|
||||
|
||||
SyncFiles is distributed under the terms of the MIT license. See LICENSE.txt for details.
|
||||
SyncFiles is distributed under the terms of the Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
|
|
BIN
SyncFiles CW4.mcp
Normal file
57
WORKSPACE
Normal file
|
@ -0,0 +1,57 @@
|
|||
workspace(name = "syncfiles")
|
||||
|
||||
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
||||
|
||||
http_archive(
|
||||
name = "bazel_skylib",
|
||||
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
|
||||
urls = [
|
||||
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
|
||||
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
|
||||
|
||||
bazel_skylib_workspace()
|
||||
|
||||
http_archive(
|
||||
name = "io_bazel_rules_go",
|
||||
sha256 = "f2dcd210c7095febe54b804bb1cd3a58fe8435a909db2ec04e31542631cf715c",
|
||||
urls = [
|
||||
"https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.31.0/rules_go-v0.31.0.zip",
|
||||
"https://github.com/bazelbuild/rules_go/releases/download/v0.31.0/rules_go-v0.31.0.zip",
|
||||
],
|
||||
)
|
||||
|
||||
http_archive(
|
||||
name = "bazel_gazelle",
|
||||
sha256 = "de69a09dc70417580aabf20a28619bb3ef60d038470c7cf8442fafcf627c21cb",
|
||||
urls = [
|
||||
"https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz",
|
||||
"https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.24.0/bazel-gazelle-v0.24.0.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies")
|
||||
load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")
|
||||
|
||||
go_repository(
|
||||
name = "org_golang_x_text",
|
||||
importpath = "golang.org/x/text",
|
||||
sum = "h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=",
|
||||
version = "v0.3.5",
|
||||
)
|
||||
|
||||
# go_repository(
|
||||
# name = "org_golang_x_tools",
|
||||
# importpath = "golang.org/x/tools",
|
||||
# sum = "h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc=",
|
||||
# version = "v0.0.0-20180917221912-90fa682c2a6e",
|
||||
# )
|
||||
|
||||
go_rules_dependencies()
|
||||
|
||||
go_register_toolchains(version = "1.18")
|
||||
|
||||
gazelle_dependencies()
|
36
bazel/BUILD.bazel
Normal file
|
@ -0,0 +1,36 @@
|
|||
load("@bazel_skylib//rules:common_settings.bzl", "string_flag")
|
||||
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
string_flag(
|
||||
name = "warnings",
|
||||
build_setting_default = "on",
|
||||
values = [
|
||||
"off",
|
||||
"on",
|
||||
"error",
|
||||
],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "warnings_off",
|
||||
flag_values = {
|
||||
":warnings": "off",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "warnings_on",
|
||||
flag_values = {
|
||||
":warnings": "on",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "warnings_error",
|
||||
flag_values = {
|
||||
":warnings": "error",
|
||||
},
|
||||
)
|
40
bazel/copts.bzl
Normal file
|
@ -0,0 +1,40 @@
|
|||
# Bazel + GCC,
|
||||
# Default:
|
||||
# -U_FORTIFY_SOURCE
|
||||
# -Wall
|
||||
# -Wunused-but-set-parameter
|
||||
# -Wno-free-nonheap-object
|
||||
# -fno-omit-frame-pointer
|
||||
# With -c dbg, adds:
|
||||
# -g
|
||||
# With -c opt, adds:
|
||||
# -g0 -O2
|
||||
|
||||
# Base C options
|
||||
COPTS_BASE = [
|
||||
"-std=c11",
|
||||
]
|
||||
|
||||
_COPTS_WARNING = [
|
||||
"-Wall",
|
||||
"-Wextra",
|
||||
"-Wpointer-arith",
|
||||
"-Wwrite-strings",
|
||||
"-Wmissing-prototypes",
|
||||
"-Wdouble-promotion",
|
||||
"-Werror=implicit-function-declaration",
|
||||
"-Winit-self",
|
||||
"-Wstrict-prototypes",
|
||||
"-Wno-format-zero-length",
|
||||
]
|
||||
|
||||
# Internal C compilation options. Use this by default for all C targets in the
|
||||
# repo.
|
||||
COPTS = (
|
||||
COPTS_BASE +
|
||||
select({
|
||||
"//bazel:warnings_off": [],
|
||||
"//bazel:warnings_on": _COPTS_WARNING,
|
||||
"//bazel:warnings_error": _COPTS_WARNING + ["-Werror"],
|
||||
})
|
||||
)
|
536
charmap/ARABIC.TXT
Normal file
|
@ -0,0 +1,536 @@
|
|||
#=======================================================================
|
||||
# File name: ARABIC.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Arabic
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-04 Update header comments. Matches internal xml
|
||||
# <c1.2> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Add comments about character display and
|
||||
# direction overrides. Update URLs, notes.
|
||||
# Matches internal utom<b4>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n10 1998-Feb-05 Show required Unicode character
|
||||
# directionality in a different way. Matches
|
||||
# internal utom<n4>, ufrm<n21>, and Text
|
||||
# Encoding Converter version 1.3. Update
|
||||
# header comments; include information on
|
||||
# loose mapping of digits.
|
||||
# n07 1997-Jul-17 Update to match internal utom<n2>, ufrm<n17>:
|
||||
# Change standard mapping for 0xC0 from U+066D
|
||||
# to U+274A. Add direction overrides to
|
||||
# mappings for 0x25, 0x2C, 0x3B, 0x3F. Add
|
||||
# information on variants.
|
||||
# n03 1995-Apr-18 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n11>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Arabic code (in hex as 0xNN).
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN),
|
||||
# possibly preceded by a tag indicating required directionality
|
||||
# (i.e. <LR>+0xNNNN or <RL>+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Arabic code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Arabic character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Arabic:
|
||||
# -----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# 1. General
|
||||
#
|
||||
# The Mac OS Arabic character set is intended to cover Arabic as
|
||||
# used in North Africa, the Arabian peninsula, and the Levant. It
|
||||
# also contains several characters needed for Urdu and/or Farsi.
|
||||
#
|
||||
# The Mac OS Arabic character set is essentially a superset of ISO
|
||||
# 8859-6. The 8859-6 code points that are interpreted differently
|
||||
# in the Mac OS Arabic set are as follows:
|
||||
# 0xA0 is NO-BREAK SPACE in 8859-6 and right-left SPACE in Mac OS
|
||||
# Arabic; NO-BREAK is 0x81 in Mac OS Arabic.
|
||||
# 0xA4 is CURRENCY SIGN in 8859-6 and right-left DOLLAR SIGN in
|
||||
# Mac OS Arabic.
|
||||
# 0xAD is SOFT HYPHEN in 8859-6 and right-left HYPHEN-MINUS in
|
||||
# Mac OS Arabic.
|
||||
# ISO 8859-6 specifies that codes 0x30-0x39 can be rendered either
|
||||
# with European digit shapes or Arabic digit shapes. This is also
|
||||
# true in Mac OS Arabic, which determines from context which digit
|
||||
# shapes to use (see below).
|
||||
#
|
||||
# The Mac OS Arabic character set uses the C1 controls area and other
|
||||
# code points which are undefined in ISO 8859-6 for additional
|
||||
# graphic characters: additional Arabic letters for Farsi and Urdu,
|
||||
# some accented Roman letters for European languages (such as French),
|
||||
# and duplicates of some of the punctuation, symbols, and digits in
|
||||
# the ASCII block. The duplicate punctuation, symbol, and digit
|
||||
# characters have right-left directionality, while the ASCII versions
|
||||
# have left-right directionality. See the next section for more
|
||||
# information on this.
|
||||
#
|
||||
# Mac OS Arabic characters 0xEB-0xF2 are non-spacing/combining marks.
|
||||
#
|
||||
# 2. Directional characters and roundtrip fidelity
|
||||
#
|
||||
# The Mac OS Arabic character set was developed in 1986-1987. At that
|
||||
# time the bidirectional line layout algorithm used in the Mac OS
|
||||
# Arabic system was fairly simple; it used only a few direction
|
||||
# classes (instead of the 19 now used in the Unicode bidirectional
|
||||
# algorithm). In order to permit users to handle some tricky layout
|
||||
# problems, certain punctuation and symbol characters were encoded
|
||||
# twice, one with a left-right direction attribute and the other with
|
||||
# a right-left direction attribute.
|
||||
#
|
||||
# For example, plus sign is encoded at 0x2B with a left-right
|
||||
# attribute, and at 0xAB with a right-left attribute. However, there
|
||||
# is only one PLUS SIGN character in Unicode. This leads to some
|
||||
# interesting problems when mapping between Mac OS Arabic and Unicode;
|
||||
# see below.
|
||||
#
|
||||
# A related problem is that even when a particular character is
|
||||
# encoded only once in Mac OS Arabic, it may have a different
|
||||
# direction attribute than the corresponding Unicode character.
|
||||
#
|
||||
# For example, the Mac OS Arabic character at 0x93 is HORIZONTAL
|
||||
# ELLIPSIS with strong right-left direction. However, the Unicode
|
||||
# character HORIZONTAL ELLIPSIS has direction class neutral.
|
||||
#
|
||||
# 3. Behavior of ASCII-range numbers in WorldScript
|
||||
#
|
||||
# Mac OS Arabic also has two sets of digit codes.
|
||||
#
|
||||
# The digits at 0x30-0x39 may be displayed using either European
|
||||
# digit forms or Arabic digit forms, depending on context. If there
|
||||
# is a "strong European" character such as a Latin letter on either
|
||||
# side of a sequence consisting of digits 0x30-0x39 and possibly comma
|
||||
# 0x2C or period 0x2E, then the characters will be displayed using
|
||||
# European forms (This will happen even if there are neutral characters
|
||||
# between the digits and the strong European character). Otherwise, the
|
||||
# digits will be displayed using Arabic forms, the comma will be
|
||||
# displayed as Arabic thousands separator, and the period as Arabic
|
||||
# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always
|
||||
# left-right.
|
||||
#
|
||||
# The digits at 0xB0-0xB9 are always displayed using Arabic digit
|
||||
# shapes, and moreover, these digits always have strong right-left
|
||||
# directionality. These are mainly intended for special layout
|
||||
# purposes such as part numbers, etc.
|
||||
#
|
||||
# 4. Font variants
|
||||
#
|
||||
# The table in this file gives the Unicode mappings for the standard
|
||||
# Mac OS Arabic encoding. This encoding is supported by the Cairo font
|
||||
# (the system font for Arabic), and is the encoding supported by the
|
||||
# text processing utilities. However, the other Arabic fonts actually
|
||||
# implement slightly different encodings; this mainly affects the code
|
||||
# points 0xAA and 0xC0. For these code points the standard Mac OS
|
||||
# Arabic encoding has the following mappings:
|
||||
# 0xAA -> <RL>+0x002A ASTERISK, right-left
|
||||
# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK,
|
||||
# right-left
|
||||
# This mapping of 0xAA is consistent with the normal convention for
|
||||
# Mac OS Arabic and Hebrew that the right-left duplicates have codes
|
||||
# that are equal to the ASCII code of the left-right character plus
|
||||
# 0x80. However, in all of the other fonts, 0xAA is MULTIPLY SIGN, and
|
||||
# right-left ASTERISK may be at a different code point. The other
|
||||
# variants are described below.
|
||||
#
|
||||
# The TrueType variant is used for most of the Arabic TrueType fonts:
|
||||
# Baghdad, Geeza, Kufi, Nadeem. It differs from the standard variant
|
||||
# in the following way:
|
||||
# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
|
||||
# 0xC0 -> <RL>+0x002A ASTERISK, right-left
|
||||
#
|
||||
# The Thuluth variant is used for the Arabic Postscript-only fonts:
|
||||
# Thuluth and Thuluth bold. It differs from the standard variant in
|
||||
# the following way:
|
||||
# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
|
||||
# 0xC0 -> 0x066D ARABIC FIVE POINTED STAR
|
||||
#
|
||||
# The AlBayan variant is used for the Arabic TrueType font Al Bayan.
|
||||
# It differs from the standard variant in the following way:
|
||||
# 0x81 -> no mapping (glyph just has authorship information, etc.)
|
||||
# 0xA3 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM
|
||||
# 0xA4 -> 0xFDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
|
||||
# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
|
||||
# 0xDC -> <RL>+0x25CF BLACK CIRCLE, right-left
|
||||
# 0xFC -> <RL>+0x25A0 BLACK SQUARE, right-left
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Matching the direction of Mac OS Arabic characters
|
||||
#
|
||||
# When Mac OS Arabic encodes a character twice but with different
|
||||
# direction attributes for the two code points - as in the case of
|
||||
# plus sign mentioned above - we need a way to map both Mac OS Arabic
|
||||
# code points to Unicode and back again without loss of information.
|
||||
# With the plus sign, for example, mapping one of the Mac OS Arabic
|
||||
# characters to a code in the Unicode corporate use zone is
|
||||
# undesirable, since both of the plus sign characters are likely to
|
||||
# be used in text that is interchanged.
|
||||
#
|
||||
# The problem is solved with the use of direction override characters
|
||||
# and direction-dependent mappings. When mapping from Mac OS Arabic
|
||||
# to Unicode, we use direction overrides as necessary to force the
|
||||
# direction of the resulting Unicode characters.
|
||||
#
|
||||
# The required direction is indicated by a direction tag in the
|
||||
# mappings. A tag of <LR> means the corresponding Unicode character
|
||||
# must have a strong left-right context, and a tag of <RL> indicates
|
||||
# a right-left context.
|
||||
#
|
||||
# For example, the mapping of 0x2B is given as <LR>+0x002B; the
|
||||
# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated
|
||||
# instance of 0x2B to Unicode, it should be mapped as follows (LRO
|
||||
# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION
|
||||
# FORMATTING):
|
||||
#
|
||||
# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
|
||||
#
|
||||
# When mapping several characters in a row that require direction
|
||||
# forcing, the overrides need only be used at the beginning and end.
|
||||
# For example:
|
||||
#
|
||||
# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C
|
||||
#
|
||||
# If neutral characters that require direction forcing are already
|
||||
# between strong-direction characters with matching directionality,
|
||||
# then direction overrides need not be used. Direction overrides are
|
||||
# always needed to map the right-left digits at 0xB0-0xB9.
|
||||
#
|
||||
# When mapping from Unicode to Mac OS Arabic, the Unicode
|
||||
# bidirectional algorithm should be used to determine resolved
|
||||
# direction of the Unicode characters. The mapping from Unicode to
|
||||
# Mac OS Arabic can then be disambiguated by the use of the resolved
|
||||
# direction:
|
||||
#
|
||||
# Unicode 0x002B -> Mac OS Arabic 0x2B (if L) or 0xAB (if R)
|
||||
#
|
||||
# However, this also means the direction override characters should
|
||||
# be discarded when mapping from Unicode to Mac OS Arabic (after
|
||||
# they have been used to determine resolved direction), since the
|
||||
# direction override information is carried by the code point itself.
|
||||
#
|
||||
# Even when direction overrides are not needed for roundtrip
|
||||
# fidelity, they are sometimes used when mapping Mac OS Arabic
|
||||
# characters to Unicode in order to achieve similar text layout with
|
||||
# the resulting Unicode text. For example, the single Mac OS Arabic
|
||||
# ellipsis character has direction class right-left,and there is no
|
||||
# left-right version. However, the Unicode HORIZONTAL ELLIPSIS
|
||||
# character has direction class neutral (which means it may end up
|
||||
# with a resolved direction of left-right if surrounded by left-right
|
||||
# characters). When mapping the Mac OS Arabic ellipsis to Unicode, it
|
||||
# is surrounded with a direction override to help preserve proper
|
||||
# text layout. The resolved direction is not needed or used when
|
||||
# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Arabic.
|
||||
#
|
||||
# 2. Mapping the Mac OS Arabic digits
|
||||
#
|
||||
# The main table below contains mappings that should be used when
|
||||
# strict round-trip fidelity is required. However, for numeric
|
||||
# values, the mappings in that table will produce Unicode characters
|
||||
# that may appear different than the Mac OS Arabic text displayed on
|
||||
# a Mac OS system using WorldScript. This is because WorldScript
|
||||
# uses context-dependent display for the 0x30-0x39 digits.
|
||||
#
|
||||
# If roundtrip fidelity is not required, then the following
|
||||
# alternate mappings should be used when a sequence of 0x30-0x39
|
||||
# digits - possibly including 0x2C and 0x2E - occurs in an Arabic
|
||||
# context (that is, when the first "strong" character on either side
|
||||
# of the digit sequence is Arabic, or there is no strong character):
|
||||
#
|
||||
# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR
|
||||
# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR
|
||||
# 0x30 0x0660 # ARABIC-INDIC DIGIT ZERO
|
||||
# 0x31 0x0661 # ARABIC-INDIC DIGIT ONE
|
||||
# 0x32 0x0662 # ARABIC-INDIC DIGIT TWO
|
||||
# 0x33 0x0663 # ARABIC-INDIC DIGIT THREE
|
||||
# 0x34 0x0664 # ARABIC-INDIC DIGIT FOUR
|
||||
# 0x35 0x0665 # ARABIC-INDIC DIGIT FIVE
|
||||
# 0x36 0x0666 # ARABIC-INDIC DIGIT SIX
|
||||
# 0x37 0x0667 # ARABIC-INDIC DIGIT SEVEN
|
||||
# 0x38 0x0668 # ARABIC-INDIC DIGIT EIGHT
|
||||
# 0x39 0x0669 # ARABIC-INDIC DIGIT NINE
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n03 to version n07:
|
||||
#
|
||||
# - Change mapping for 0xC0 from U+066D to U+274A.
|
||||
#
|
||||
# - Add direction overrides (required directionality) to mappings
|
||||
# for 0x25, 0x2C, 0x3B, 0x3F.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 <LR>+0x0020 # SPACE, left-right
|
||||
0x21 <LR>+0x0021 # EXCLAMATION MARK, left-right
|
||||
0x22 <LR>+0x0022 # QUOTATION MARK, left-right
|
||||
0x23 <LR>+0x0023 # NUMBER SIGN, left-right
|
||||
0x24 <LR>+0x0024 # DOLLAR SIGN, left-right
|
||||
0x25 <LR>+0x0025 # PERCENT SIGN, left-right
|
||||
0x26 <LR>+0x0026 # AMPERSAND, left-right
|
||||
0x27 <LR>+0x0027 # APOSTROPHE, left-right
|
||||
0x28 <LR>+0x0028 # LEFT PARENTHESIS, left-right
|
||||
0x29 <LR>+0x0029 # RIGHT PARENTHESIS, left-right
|
||||
0x2A <LR>+0x002A # ASTERISK, left-right
|
||||
0x2B <LR>+0x002B # PLUS SIGN, left-right
|
||||
0x2C <LR>+0x002C # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
|
||||
0x2D <LR>+0x002D # HYPHEN-MINUS, left-right
|
||||
0x2E <LR>+0x002E # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
|
||||
0x2F <LR>+0x002F # SOLIDUS, left-right
|
||||
0x30 0x0030 # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE
|
||||
0x3A <LR>+0x003A # COLON, left-right
|
||||
0x3B <LR>+0x003B # SEMICOLON, left-right
|
||||
0x3C <LR>+0x003C # LESS-THAN SIGN, left-right
|
||||
0x3D <LR>+0x003D # EQUALS SIGN, left-right
|
||||
0x3E <LR>+0x003E # GREATER-THAN SIGN, left-right
|
||||
0x3F <LR>+0x003F # QUESTION MARK, left-right
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B <LR>+0x005B # LEFT SQUARE BRACKET, left-right
|
||||
0x5C <LR>+0x005C # REVERSE SOLIDUS, left-right
|
||||
0x5D <LR>+0x005D # RIGHT SQUARE BRACKET, left-right
|
||||
0x5E <LR>+0x005E # CIRCUMFLEX ACCENT, left-right
|
||||
0x5F <LR>+0x005F # LOW LINE, left-right
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B <LR>+0x007B # LEFT CURLY BRACKET, left-right
|
||||
0x7C <LR>+0x007C # VERTICAL LINE, left-right
|
||||
0x7D <LR>+0x007D # RIGHT CURLY BRACKET, left-right
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 <RL>+0x00A0 # NO-BREAK SPACE, right-left
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x06BA # ARABIC LETTER NOON GHUNNA
|
||||
0x8C <RL>+0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 <RL>+0x2026 # HORIZONTAL ELLIPSIS, right-left
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 <RL>+0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B <RL>+0x00F7 # DIVISION SIGN, right-left
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 <RL>+0x0020 # SPACE, right-left
|
||||
0xA1 <RL>+0x0021 # EXCLAMATION MARK, right-left
|
||||
0xA2 <RL>+0x0022 # QUOTATION MARK, right-left
|
||||
0xA3 <RL>+0x0023 # NUMBER SIGN, right-left
|
||||
0xA4 <RL>+0x0024 # DOLLAR SIGN, right-left
|
||||
0xA5 0x066A # ARABIC PERCENT SIGN
|
||||
0xA6 <RL>+0x0026 # AMPERSAND, right-left
|
||||
0xA7 <RL>+0x0027 # APOSTROPHE, right-left
|
||||
0xA8 <RL>+0x0028 # LEFT PARENTHESIS, right-left
|
||||
0xA9 <RL>+0x0029 # RIGHT PARENTHESIS, right-left
|
||||
0xAA <RL>+0x002A # ASTERISK, right-left
|
||||
0xAB <RL>+0x002B # PLUS SIGN, right-left
|
||||
0xAC 0x060C # ARABIC COMMA
|
||||
0xAD <RL>+0x002D # HYPHEN-MINUS, right-left
|
||||
0xAE <RL>+0x002E # FULL STOP, right-left
|
||||
0xAF <RL>+0x002F # SOLIDUS, right-left
|
||||
0xB0 <RL>+0x0660 # ARABIC-INDIC DIGIT ZERO, right-left (need override)
|
||||
0xB1 <RL>+0x0661 # ARABIC-INDIC DIGIT ONE, right-left (need override)
|
||||
0xB2 <RL>+0x0662 # ARABIC-INDIC DIGIT TWO, right-left (need override)
|
||||
0xB3 <RL>+0x0663 # ARABIC-INDIC DIGIT THREE, right-left (need override)
|
||||
0xB4 <RL>+0x0664 # ARABIC-INDIC DIGIT FOUR, right-left (need override)
|
||||
0xB5 <RL>+0x0665 # ARABIC-INDIC DIGIT FIVE, right-left (need override)
|
||||
0xB6 <RL>+0x0666 # ARABIC-INDIC DIGIT SIX, right-left (need override)
|
||||
0xB7 <RL>+0x0667 # ARABIC-INDIC DIGIT SEVEN, right-left (need override)
|
||||
0xB8 <RL>+0x0668 # ARABIC-INDIC DIGIT EIGHT, right-left (need override)
|
||||
0xB9 <RL>+0x0669 # ARABIC-INDIC DIGIT NINE, right-left (need override)
|
||||
0xBA <RL>+0x003A # COLON, right-left
|
||||
0xBB 0x061B # ARABIC SEMICOLON
|
||||
0xBC <RL>+0x003C # LESS-THAN SIGN, right-left
|
||||
0xBD <RL>+0x003D # EQUALS SIGN, right-left
|
||||
0xBE <RL>+0x003E # GREATER-THAN SIGN, right-left
|
||||
0xBF 0x061F # ARABIC QUESTION MARK
|
||||
0xC0 <RL>+0x274A # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
|
||||
0xC1 0x0621 # ARABIC LETTER HAMZA
|
||||
0xC2 0x0622 # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0xC3 0x0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0xC4 0x0624 # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0xC5 0x0625 # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0xC6 0x0626 # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0xC7 0x0627 # ARABIC LETTER ALEF
|
||||
0xC8 0x0628 # ARABIC LETTER BEH
|
||||
0xC9 0x0629 # ARABIC LETTER TEH MARBUTA
|
||||
0xCA 0x062A # ARABIC LETTER TEH
|
||||
0xCB 0x062B # ARABIC LETTER THEH
|
||||
0xCC 0x062C # ARABIC LETTER JEEM
|
||||
0xCD 0x062D # ARABIC LETTER HAH
|
||||
0xCE 0x062E # ARABIC LETTER KHAH
|
||||
0xCF 0x062F # ARABIC LETTER DAL
|
||||
0xD0 0x0630 # ARABIC LETTER THAL
|
||||
0xD1 0x0631 # ARABIC LETTER REH
|
||||
0xD2 0x0632 # ARABIC LETTER ZAIN
|
||||
0xD3 0x0633 # ARABIC LETTER SEEN
|
||||
0xD4 0x0634 # ARABIC LETTER SHEEN
|
||||
0xD5 0x0635 # ARABIC LETTER SAD
|
||||
0xD6 0x0636 # ARABIC LETTER DAD
|
||||
0xD7 0x0637 # ARABIC LETTER TAH
|
||||
0xD8 0x0638 # ARABIC LETTER ZAH
|
||||
0xD9 0x0639 # ARABIC LETTER AIN
|
||||
0xDA 0x063A # ARABIC LETTER GHAIN
|
||||
0xDB <RL>+0x005B # LEFT SQUARE BRACKET, right-left
|
||||
0xDC <RL>+0x005C # REVERSE SOLIDUS, right-left
|
||||
0xDD <RL>+0x005D # RIGHT SQUARE BRACKET, right-left
|
||||
0xDE <RL>+0x005E # CIRCUMFLEX ACCENT, right-left
|
||||
0xDF <RL>+0x005F # LOW LINE, right-left
|
||||
0xE0 0x0640 # ARABIC TATWEEL
|
||||
0xE1 0x0641 # ARABIC LETTER FEH
|
||||
0xE2 0x0642 # ARABIC LETTER QAF
|
||||
0xE3 0x0643 # ARABIC LETTER KAF
|
||||
0xE4 0x0644 # ARABIC LETTER LAM
|
||||
0xE5 0x0645 # ARABIC LETTER MEEM
|
||||
0xE6 0x0646 # ARABIC LETTER NOON
|
||||
0xE7 0x0647 # ARABIC LETTER HEH
|
||||
0xE8 0x0648 # ARABIC LETTER WAW
|
||||
0xE9 0x0649 # ARABIC LETTER ALEF MAKSURA
|
||||
0xEA 0x064A # ARABIC LETTER YEH
|
||||
0xEB 0x064B # ARABIC FATHATAN
|
||||
0xEC 0x064C # ARABIC DAMMATAN
|
||||
0xED 0x064D # ARABIC KASRATAN
|
||||
0xEE 0x064E # ARABIC FATHA
|
||||
0xEF 0x064F # ARABIC DAMMA
|
||||
0xF0 0x0650 # ARABIC KASRA
|
||||
0xF1 0x0651 # ARABIC SHADDA
|
||||
0xF2 0x0652 # ARABIC SUKUN
|
||||
0xF3 0x067E # ARABIC LETTER PEH
|
||||
0xF4 0x0679 # ARABIC LETTER TTEH
|
||||
0xF5 0x0686 # ARABIC LETTER TCHEH
|
||||
0xF6 0x06D5 # ARABIC LETTER AE
|
||||
0xF7 0x06A4 # ARABIC LETTER VEH
|
||||
0xF8 0x06AF # ARABIC LETTER GAF
|
||||
0xF9 0x0688 # ARABIC LETTER DDAL
|
||||
0xFA 0x0691 # ARABIC LETTER RREH
|
||||
0xFB <RL>+0x007B # LEFT CURLY BRACKET, right-left
|
||||
0xFC <RL>+0x007C # VERTICAL LINE, right-left
|
||||
0xFD <RL>+0x007D # RIGHT CURLY BRACKET, right-left
|
||||
0xFE 0x0698 # ARABIC LETTER JEH
|
||||
0xFF 0x06D2 # ARABIC LETTER YEH BARREE
|
33
charmap/BUILD.bazel
Normal file
|
@ -0,0 +1,33 @@
|
|||
filegroup(
|
||||
name = "data",
|
||||
srcs = [
|
||||
"ARABIC.TXT",
|
||||
"CELTIC.TXT",
|
||||
"CENTEURO.TXT",
|
||||
"CHINSIMP.TXT",
|
||||
"CHINTRAD.TXT",
|
||||
"CORPCHAR.TXT",
|
||||
"CROATIAN.TXT",
|
||||
"CYRILLIC.TXT",
|
||||
"DEVANAGA.TXT",
|
||||
"DINGBATS.TXT",
|
||||
"FARSI.TXT",
|
||||
"GAELIC.TXT",
|
||||
"GREEK.TXT",
|
||||
"GUJARATI.TXT",
|
||||
"GURMUKHI.TXT",
|
||||
"HEBREW.TXT",
|
||||
"ICELAND.TXT",
|
||||
"INUIT.TXT",
|
||||
"JAPANESE.TXT",
|
||||
"KEYBOARD.TXT",
|
||||
"KOREAN.TXT",
|
||||
"ROMAN.TXT",
|
||||
"ROMANIAN.TXT",
|
||||
"SYMBOL.TXT",
|
||||
"THAI.TXT",
|
||||
"TURKISH.TXT",
|
||||
"UKRAINE.TXT",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
328
charmap/CELTIC.TXT
Normal file
|
@ -0,0 +1,328 @@
|
|||
#=======================================================================
|
||||
# File name: CELTIC.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Celtic
|
||||
# character set to Unicode 2.1 and later
|
||||
#
|
||||
# Contacts: charsets@apple.com, everson@evertype.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c01 2005-Apr-01 First posted version. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Celtic code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Celtic code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Celtic character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Celtic (partly from Michael Everson):
|
||||
# -----------------------------------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# This character set was developed by Michael Everson of Everson
|
||||
# Typography (everson@evertype.com) and was used for the Irish
|
||||
# localizations of Mac OS 6.0.8 and 7.1, for the Welsh localization of
|
||||
# Mac OS 7.1, and for several fonts that can be used on any version of
|
||||
# Mac OS 7.1 or later. Note that while Apple authorized
|
||||
# the Irish and Welsh localizations mentioned above, they were not
|
||||
# systems which shipped with Apple hardware, and were not otherwise
|
||||
# supported by Apple. Fonts conforming to the Mac OS Celtic character
|
||||
# set are available from Everson Typography (http://www.evertype.com)
|
||||
# and MEU Cymru (http://www.meucymru.co.uk). Information about the use
|
||||
# of this character set is available at
|
||||
# http://www.evertype.com/celtscript/celtcode.html.
|
||||
#
|
||||
# The Mac OS Celtic encoding shares the script code smRoman (0) with
|
||||
# the standard Mac OS Roman encoding. To determine if the Celtic
|
||||
# encoding is being used in Mac OS 7-9, you should also check if the
|
||||
# system region code is 50, verIreland, or 79, verWales. Otherwise,
|
||||
# you can check for particular fonts that conform to this encoding.
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman, adding
|
||||
# capital and small y with acute, grave, and circumflex, and capital
|
||||
# and small w with acute, grave, circumflex and diaeresis. It has 14
|
||||
# code point differences from standard Mac OS Roman (0xDE, 0xDF, 0xE2,
|
||||
# 0xE3, 0xF6-0xFF).
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts were updated for Mac OS 8.5 to reflect this. There is
|
||||
# a "currency sign" variant of the Mac OS Celtic encoding that still
|
||||
# maps 0xDB to U+00A4; this can be used for older fonts.
|
||||
# Note: U+20AC is new with Unicode 2.1; for earlier Unicode
|
||||
# versions, Mac OS Celtic 0xDB may be mapped to private-use
|
||||
# character U+F8A0.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x2044 # FRACTION SLASH
|
||||
0xDB 0x20AC # EURO SIGN # before Mac OS 8.5 this was U+00A4 CURRENCY SIGN
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0x0176 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0xDF 0x0177 # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||
0xE0 0x2021 # DOUBLE DAGGER
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x1EF2 # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
0xE3 0x1EF3 # LATIN SMALL LETTER Y WITH GRAVE
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0x2663 # BLACK CLUB SUIT = shamrock # future mapping U+2618 SHAMROCK
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0xF7 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0xF8 0x0174 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0xF9 0x0175 # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||
0xFA 0x1E84 # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
0xFB 0x1E85 # LATIN SMALL LETTER W WITH DIAERESIS
|
||||
0xFC 0x1E80 # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
0xFD 0x1E81 # LATIN SMALL LETTER W WITH GRAVE
|
||||
0xFE 0x1E82 # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
0xFF 0x1E83 # LATIN SMALL LETTER W WITH ACUTE
|
327
charmap/CENTEURO.TXT
Normal file
|
@ -0,0 +1,327 @@
|
|||
#=======================================================================
|
||||
# File name: CENTEURO.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Central European
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-04 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n05 1998-Feb-05 Update header comments to new format; no
|
||||
# mapping changes. Matches internal utom<n3>,
|
||||
# ufrm<n13>, and Text Encoding Converter
|
||||
# version 1.3.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n5>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Central European code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Central European code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Central European character set uses the standard control
|
||||
# characters at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Central European:
|
||||
# ---------------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
# interfaces for QuickDraw Text, the Script Manager, and related
|
||||
# Text Utilities. For other purposes it is supported via transcoding
|
||||
# to and from Unicode.
|
||||
#
|
||||
# This character set is intended to cover the following languages:
|
||||
#
|
||||
# Polish, Czech, Slovak, Hungarian, Estonian, Latvian, Lithuanian
|
||||
#
|
||||
# These are written in Latin script, but using a different set of
|
||||
# of accented characters than Mac OS Roman. The Mac OS Central
|
||||
# European character set also includes a number of characters
|
||||
# needed for the Mac OS user interface and localization (e.g.
|
||||
# ellipsis, bullet, copyright sign), several typographic
|
||||
# punctuation symbols, math symbols, etc. However, it has a
|
||||
# smaller set of punctuation and symbols than Mac OS Roman. All of
|
||||
# the characters in Mac OS Central European that are also in the
|
||||
# Mac OS Roman character set are at the same code point in both
|
||||
# character sets; this improves application compatibility.
|
||||
#
|
||||
# Note: This does not have the same letter repertoire as ISO
|
||||
# 8859-2 (Latin-2); each has some accented letters that the other
|
||||
# does not have.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x82 0x0101 # LATIN SMALL LETTER A WITH MACRON
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x0104 # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x0105 # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x89 0x010C # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x010D # LATIN SMALL LETTER C WITH CARON
|
||||
0x8C 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x8D 0x0107 # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x0179 # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x90 0x017A # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x91 0x010E # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x010F # LATIN SMALL LETTER D WITH CARON
|
||||
0x94 0x0112 # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x95 0x0113 # LATIN SMALL LETTER E WITH MACRON
|
||||
0x96 0x0116 # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x0117 # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x011A # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x9E 0x011B # LATIN SMALL LETTER E WITH CARON
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x0118 # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x0119 # LATIN SMALL LETTER E WITH OGONEK
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x0123 # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0xAF 0x012E # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0xB0 0x012F # LATIN SMALL LETTER I WITH OGONEK
|
||||
0xB1 0x012A # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x012B # LATIN SMALL LETTER I WITH MACRON
|
||||
0xB5 0x0136 # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x0142 # LATIN SMALL LETTER L WITH STROKE
|
||||
0xB9 0x013B # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0xBA 0x013C # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0xBB 0x013D # LATIN CAPITAL LETTER L WITH CARON
|
||||
0xBC 0x013E # LATIN SMALL LETTER L WITH CARON
|
||||
0xBD 0x0139 # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0xBE 0x013A # LATIN SMALL LETTER L WITH ACUTE
|
||||
0xBF 0x0145 # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0xC0 0x0146 # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0xC1 0x0143 # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0144 # LATIN SMALL LETTER N WITH ACUTE
|
||||
0xC5 0x0147 # LATIN CAPITAL LETTER N WITH CARON
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x0148 # LATIN SMALL LETTER N WITH CARON
|
||||
0xCC 0x0150 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0151 # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0xCF 0x014C # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x014D # LATIN SMALL LETTER O WITH MACRON
|
||||
0xD9 0x0154 # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0xDA 0x0155 # LATIN SMALL LETTER R WITH ACUTE
|
||||
0xDB 0x0158 # LATIN CAPITAL LETTER R WITH CARON
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0x0159 # LATIN SMALL LETTER R WITH CARON
|
||||
0xDF 0x0156 # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0xE0 0x0157 # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0xE1 0x0160 # LATIN CAPITAL LETTER S WITH CARON
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x0161 # LATIN SMALL LETTER S WITH CARON
|
||||
0xE5 0x015A # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0xE6 0x015B # LATIN SMALL LETTER S WITH ACUTE
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x0164 # LATIN CAPITAL LETTER T WITH CARON
|
||||
0xE9 0x0165 # LATIN SMALL LETTER T WITH CARON
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x017D # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0xEC 0x017E # LATIN SMALL LETTER Z WITH CARON
|
||||
0xED 0x016A # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0x016B # LATIN SMALL LETTER U WITH MACRON
|
||||
0xF1 0x016E # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x016F # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0xF4 0x0170 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0xF5 0x0171 # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0xF6 0x0172 # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0xF7 0x0173 # LATIN SMALL LETTER U WITH OGONEK
|
||||
0xF8 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0xF9 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0xFA 0x0137 # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0xFB 0x017B # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0xFC 0x0141 # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0xFD 0x017C # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0xFE 0x0122 # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0xFF 0x02C7 # CARON
|
7914
charmap/CHINSIMP.TXT
Normal file
13911
charmap/CHINTRAD.TXT
Normal file
519
charmap/CORPCHAR.TXT
Normal file
|
@ -0,0 +1,519 @@
|
|||
#=======================================================================
|
||||
# File name: CORPCHAR.TXT
|
||||
#
|
||||
# Contents: Registry (external version) of Apple use of
|
||||
# Unicode corporate-zone characters.
|
||||
#
|
||||
# Copyright: (c) 1994-2003, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c03 2005-Apr-04 Deprecate 0xF8E6. Matches internal registry
|
||||
# <c1.3>
|
||||
# c02 2003-Feb-18 Add entry for 0xF802.
|
||||
# b4,c1 2002-Dec-19 Add entries for 0xF700-0xF747 and 0xF803-
|
||||
# 0xF84F; update replacement characters for
|
||||
# 0xF883, 0xF8AA, 0xF8B4, 0xF8B7, 0xF8BD,
|
||||
# 0xF8D7-0xF8E4, 0xF8EB-0xF8F3, 0xF8F5-
|
||||
# 0xF8FE. Deprecate 0xF8E7, 0xF8F4. Delete Mac
|
||||
# OS Greek mapping for 0xF8A0. Update URLs.
|
||||
# Matches internal registry <b7>.
|
||||
# b03 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal registry <b3> and Text Encoding
|
||||
# Converter version 1.5.
|
||||
# b02 1998-Aug-18 Expanded usage of 0xF8A0. Matches internal
|
||||
# registry <b3>.
|
||||
# n11 1998-Feb-05 Minor update to header comments
|
||||
# n09 1997-Dec-14 Update to match internal registry <n23>:
|
||||
# Add source hint 0xF850, transcoding hints
|
||||
# 0xF860-0xF86B and 0xF870-0xF872, deprecate
|
||||
# almost all other non-hint corporate
|
||||
# characters.
|
||||
# n08 1997-Jul-17 Update to match internal registry <n13>:
|
||||
# Add characters for Mac OS Chinese, Korean &
|
||||
# Farsi. Add CJK source hints. Deprecate some
|
||||
# characters in favor of combinations of
|
||||
# standard characters and transcoding hints.
|
||||
# Change header format.
|
||||
# n04 1995-Nov-15 Update to match internal registry <n8>:
|
||||
# Add characters for Mac OS Hebrew and Thai.
|
||||
# n02 1995-Apr-18 First version. Matches internal registry
|
||||
# <n5>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Two tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Unicode corporate character code point
|
||||
# (in hex as 0xNNNN)
|
||||
# Column #2 is a comment containing:
|
||||
# 1) an informal name describing the Unicode corporate character,
|
||||
# or if it is deprecated, information about what to use
|
||||
# instead.
|
||||
# 2) optionally, another '#', followed by information on which
|
||||
# Mac OS encodings use the Unicode corporate character, and -
|
||||
# if relevant - the Mac OS code points that correspond to the
|
||||
# corporate character.
|
||||
#
|
||||
# The entries are in Unicode order.
|
||||
#_______________________________________________________________________
|
||||
|
||||
# NeXT's OpenStep reserved corporate characters in the range 0xF700 to
|
||||
# 0xF8FF for transient use as keyboard function keys. The ones actually
|
||||
# assigned in NextStep are 0xF700-0xF747, as follows. These are still
|
||||
# used in the Mac OS X AppKit frameworks. Note that there is no glyph
|
||||
# associated with these, and they are not mapped or used by the Mac OS
|
||||
# Text Encoding Converter.
|
||||
0xF700 # NSUpArrowFunctionKey
|
||||
0xF701 # NSDownArrowFunctionKey
|
||||
0xF702 # NSLeftArrowFunctionKey
|
||||
0xF703 # NSRightArrowFunctionKey
|
||||
0xF704 # NSF1FunctionKey
|
||||
0xF705 # NSF2FunctionKey
|
||||
0xF706 # NSF3FunctionKey
|
||||
0xF707 # NSF4FunctionKey
|
||||
0xF708 # NSF5FunctionKey
|
||||
0xF709 # NSF6FunctionKey
|
||||
0xF70A # NSF7FunctionKey
|
||||
0xF70B # NSF8FunctionKey
|
||||
0xF70C # NSF9FunctionKey
|
||||
0xF70D # NSF10FunctionKey
|
||||
0xF70E # NSF11FunctionKey
|
||||
0xF70F # NSF12FunctionKey
|
||||
0xF710 # NSF13FunctionKey
|
||||
0xF711 # NSF14FunctionKey
|
||||
0xF712 # NSF15FunctionKey
|
||||
0xF713 # NSF16FunctionKey
|
||||
0xF714 # NSF17FunctionKey
|
||||
0xF715 # NSF18FunctionKey
|
||||
0xF716 # NSF19FunctionKey
|
||||
0xF717 # NSF20FunctionKey
|
||||
0xF718 # NSF21FunctionKey
|
||||
0xF719 # NSF22FunctionKey
|
||||
0xF71A # NSF23FunctionKey
|
||||
0xF71B # NSF24FunctionKey
|
||||
0xF71C # NSF25FunctionKey
|
||||
0xF71D # NSF26FunctionKey
|
||||
0xF71E # NSF27FunctionKey
|
||||
0xF71F # NSF28FunctionKey
|
||||
0xF720 # NSF29FunctionKey
|
||||
0xF721 # NSF30FunctionKey
|
||||
0xF722 # NSF31FunctionKey
|
||||
0xF723 # NSF32FunctionKey
|
||||
0xF724 # NSF33FunctionKey
|
||||
0xF725 # NSF34FunctionKey
|
||||
0xF726 # NSF35FunctionKey
|
||||
0xF727 # NSInsertFunctionKey
|
||||
0xF728 # NSDeleteFunctionKey
|
||||
0xF729 # NSHomeFunctionKey
|
||||
0xF72A # NSBeginFunctionKey
|
||||
0xF72B # NSEndFunctionKey
|
||||
0xF72C # NSPageUpFunctionKey
|
||||
0xF72D # NSPageDownFunctionKey
|
||||
0xF72E # NSPrintScreenFunctionKey
|
||||
0xF72F # NSScrollLockFunctionKey
|
||||
0xF730 # NSPauseFunctionKey
|
||||
0xF731 # NSSysReqFunctionKey
|
||||
0xF732 # NSBreakFunctionKey
|
||||
0xF733 # NSResetFunctionKey
|
||||
0xF734 # NSStopFunctionKey
|
||||
0xF735 # NSMenuFunctionKey
|
||||
0xF736 # NSUserFunctionKey
|
||||
0xF737 # NSSystemFunctionKey
|
||||
0xF738 # NSPrintFunctionKey
|
||||
0xF739 # NSClearLineFunctionKey
|
||||
0xF73A # NSClearDisplayFunctionKey
|
||||
0xF73B # NSInsertLineFunctionKey
|
||||
0xF73C # NSDeleteLineFunctionKey
|
||||
0xF73D # NSInsertCharFunctionKey
|
||||
0xF73E # NSDeleteCharFunctionKey
|
||||
0xF73F # NSPrevFunctionKey
|
||||
0xF740 # NSNextFunctionKey
|
||||
0xF741 # NSSelectFunctionKey
|
||||
0xF742 # NSExecuteFunctionKey
|
||||
0xF743 # NSUndoFunctionKey
|
||||
0xF744 # NSRedoFunctionKey
|
||||
0xF745 # NSFindFunctionKey
|
||||
0xF746 # NSHelpFunctionKey
|
||||
0xF747 # NSModeSwitchFunctionKey
|
||||
|
||||
# The following (11) are for mapping the Mac OS Keyboard and Mac OS Korean
|
||||
# encodings (for Mac OS Korean also see 0xF83D, 0xF840-0xF84F).
|
||||
0xF802 # lower left pencil # Keyboard-0x0F
|
||||
0xF803 # contextual menu symbol # Keyboard-0x6D
|
||||
0xF804 # eject symbol # Keyboard-0x8C
|
||||
0xF805 # black diamond minus white square # Korean-0xA658
|
||||
0xF806 # black square minus white diamond # Korean-0xA663
|
||||
0xF807 # telephone dial # Korean-0xA69F
|
||||
0xF808 # five vertical lines # Korean-0xA68F
|
||||
0xF809 # one downward-pointing black triangle over two others # Korean-0xA681
|
||||
0xF80A # two interwoven eye shapes # Korean-0xA674
|
||||
0xF80B # narrow-leaf four-petal florette # Korean-0xA696
|
||||
0xF80C # four interleaved fisheyes # Korean-0xA69A
|
||||
|
||||
# The following (51) are mainly for mapping the dingbat/fleuron repetoire
|
||||
# of the Hoefler Ornaments font, which is otherwise unmappable to Unicode.
|
||||
# 0xF83D is also used for mapping MacKorean.
|
||||
0xF80D # horizontal line thickening at center # Hoefler Ornaments glyph 6
|
||||
0xF80E # dotted X design 1 # Hoefler Ornaments glyph 7
|
||||
0xF80F # dotted X design 2 # Hoefler Ornaments glyph 8
|
||||
0xF810 # dotted X design 3 # Hoefler Ornaments glyph 9
|
||||
0xF811 # dotted X design 4 # Hoefler Ornaments glyph 10
|
||||
0xF812 # horizontal line with wasp waist at center # Hoefler Ornaments glyph 11
|
||||
0xF813 # horizontal line thickening at center, alternate # Hoefler Ornaments glyph 12
|
||||
0xF814 # half-filled fleuron 1 # Hoefler Ornaments glyph 13
|
||||
0xF815 # half-filled fleuron 2 # Hoefler Ornaments glyph 14
|
||||
0xF816 # half-filled fleuron 3 # Hoefler Ornaments glyph 15
|
||||
0xF817 # half-filled fleuron 4 # Hoefler Ornaments glyph 16
|
||||
0xF818 # half-filled fleuron 5 # Hoefler Ornaments glyph 17
|
||||
0xF819 # half-filled fleuron 6 # Hoefler Ornaments glyph 18
|
||||
0xF81A # half-filled fleuron 7 # Hoefler Ornaments glyph 19
|
||||
0xF81B # half-filled fleuron 8 # Hoefler Ornaments glyph 20
|
||||
0xF81C # half-filled fleuron 9 # Hoefler Ornaments glyph 21
|
||||
0xF81D # half-filled fleuron 10 # Hoefler Ornaments glyph 22
|
||||
0xF81E # half-filled fleuron 11 # Hoefler Ornaments glyph 23
|
||||
0xF81F # half-filled fleuron 12 # Hoefler Ornaments glyph 24
|
||||
0xF820 # half-filled fleuron 13 # Hoefler Ornaments glyph 25
|
||||
0xF821 # half-filled fleuron 14 # Hoefler Ornaments glyph 26
|
||||
0xF822 # half-filled fleuron 15 # Hoefler Ornaments glyph 27
|
||||
0xF823 # half-filled fleuron 16 # Hoefler Ornaments glyph 28
|
||||
0xF824 # half-filled dingbat 1 # Hoefler Ornaments glyph 29
|
||||
0xF825 # half-filled dingbat 2 # Hoefler Ornaments glyph 30
|
||||
0xF826 # half-filled dingbat 3 # Hoefler Ornaments glyph 31
|
||||
0xF827 # filled fleuron 1 # Hoefler Ornaments glyph 34
|
||||
0xF828 # filled fleuron 2 # Hoefler Ornaments glyph 35
|
||||
0xF829 # filled fleuron 3 # Hoefler Ornaments glyph 36
|
||||
0xF82A # filled fleuron 4 # Hoefler Ornaments glyph 37
|
||||
0xF82B # filled fleuron 5 # Hoefler Ornaments glyph 38
|
||||
0xF82C # filled fleuron 6 # Hoefler Ornaments glyph 39
|
||||
0xF82D # filled fleuron 7 # Hoefler Ornaments glyph 40
|
||||
0xF82E # filled fleuron 8 # Hoefler Ornaments glyph 41
|
||||
0xF82F # filled fleuron 9 # Hoefler Ornaments glyph 42
|
||||
0xF830 # filled fleuron 10 # Hoefler Ornaments glyph 43
|
||||
0xF831 # filled fleuron 11 # Hoefler Ornaments glyph 44
|
||||
0xF832 # filled fleuron 12 # Hoefler Ornaments glyph 45
|
||||
0xF833 # filled fleuron 13 # Hoefler Ornaments glyph 46
|
||||
0xF834 # filled fleuron 14 # Hoefler Ornaments glyph 47
|
||||
0xF835 # filled fleuron 15 # Hoefler Ornaments glyph 48
|
||||
0xF836 # filled fleuron 16 # Hoefler Ornaments glyph 49
|
||||
0xF837 # filled dingbat 1 # Hoefler Ornaments glyph 50
|
||||
0xF838 # filled dingbat 2 # Hoefler Ornaments glyph 51
|
||||
0xF839 # filled dingbat 3 # Hoefler Ornaments glyph 52
|
||||
0xF83A # sun with face # Hoefler Ornaments glyph 53
|
||||
0xF83B # moon with face # Hoefler Ornaments glyph 54
|
||||
0xF83C # crown # Hoefler Ornaments glyph 55
|
||||
0xF83D # fleur-de-lis # Korean-0xA642, Hoefler Ornaments glyph 57
|
||||
0xF83E # sailing ship # Hoefler Ornaments glyph 58
|
||||
0xF83F # fleuron 17 # Hoefler Ornaments glyph 59
|
||||
|
||||
# The following (16) are for mapping the Mac OS Korean encoding
|
||||
# (also see 0xF805-0xF80C, 0xF83D).
|
||||
0xF840 # three asterisks aligned vertically # Korean-0xA16E
|
||||
0xF841 # left right up down arrow # Korean-0xA894
|
||||
0xF842 # downwards wave arrow # Korean-0xAC54
|
||||
0xF843 # leftwards white arrow from wall (cf. U+21F0) # Korean-0xAC42
|
||||
0xF844 # black leftwards arrowhead (cf. U+27A4) # Korean-0xAC49
|
||||
0xF845 # black-feathered leftwards arrow (cf. U+27B5) # Korean-0xAC5F
|
||||
0xF846 # leftwards arrowhead with tail of spreading ripples # Korean-0xA867
|
||||
0xF847 # rightwards arrowhead with tail of spreading ripples # Korean-0xA868
|
||||
0xF848 # large white leftwards arrow with white fins # Korean-0xA89D
|
||||
0xF849 # large white rightwards arrow with white fins # Korean-0xA89C
|
||||
0xF84A # leftwards arrow with bow # Korean-0xAC4B
|
||||
0xF84B # rightwards arrow with bow # Korean-0xAC4A
|
||||
0xF84C # pentagon # Korean-0xA747
|
||||
0xF84D # trapezoid # Korean-0xA74B
|
||||
0xF84E # quadrilateral with shorter right side # Korean-0xA74C
|
||||
0xF84F # quadrilateral with shorter left side # Korean-0xA74D
|
||||
|
||||
# The block of 16 characters 0xF850-0xF85F is for source hint characters.
|
||||
# These have no display (like zero-width no-break space). If they appear
|
||||
# in text, they can only be mapped to tables that include them. If a run
|
||||
# of Unicode characters such as Han characters could otherwise be mapped
|
||||
# to any of several encodings, including one of these hint characters can
|
||||
# force the text to be mapped only to an encoding whose mapping table
|
||||
# includes the hint character. Once they have forced mapping to a particular
|
||||
# encoding, they no longer apply (they don't need to be cancelled); if a
|
||||
# subsequent character cannot be mapped to that encoding, it may be mapped
|
||||
# to another encoding. Currently source hints are mainly defined for CJK
|
||||
# source disambiguation.
|
||||
# NOTE: These are only defined for application developers who have requested
|
||||
# them. The Mac OS Text Encoding Converter does not generate these when
|
||||
# converting from other CJK encodings to Unicode. However, it will handle
|
||||
# these characters correctly when converting from Unicode to other encodings.
|
||||
0xF850 # source hint: Reset, try all candidate encodings in preferred order.
|
||||
0xF85C # source hint: Chinese simplified
|
||||
0xF85D # source hint: Chinese traditional
|
||||
0xF85E # source hint: Japanese
|
||||
0xF85F # source hint: Korean
|
||||
|
||||
# The block of 32 characters 0xF860-0xF87F is for transcoding hints.
|
||||
# These are used in combination with standard Unicode characters to force
|
||||
# them to be treated in a special way for mapping to other encodings;
|
||||
# they have no other effect.
|
||||
#
|
||||
# 0xF870-0xF87F are "variant tags" - they are like combining characters,
|
||||
# and can follow a standard Unicode (or a sequence consisting of a base
|
||||
# character and other combining characters) to tag it so that it will be
|
||||
# unique, treated in a special way for transcoding. These always terminate
|
||||
# a sequence of combining characters.
|
||||
#
|
||||
# 0xF860-0xF86B are "grouping hints" - they precede a group of two to
|
||||
# four standard Unicode characters to indicate that they are treated as a
|
||||
# group for transcoding. This grouping overrides any other combining
|
||||
# behavior.
|
||||
#
|
||||
# Here are the ones defined so far:
|
||||
0xF860 # transcoding hint: group next 2 characters # Japanese,Korean
|
||||
0xF861 # transcoding hint: group next 3 characters # Japanese,Korean
|
||||
0xF862 # transcoding hint: group next 4 characters # Japanese,Korean
|
||||
0xF863 # transcoding hint: group next 4 characters, alt1 # Korean
|
||||
0xF864 # transcoding hint: group next 4 characters, alt2 # Korean
|
||||
0xF865 # transcoding hint: group next 4 characters, alt3 # Korean
|
||||
0xF866 # transcoding hint: group next 4 characters, alt4 # Korean
|
||||
0xF867 # transcoding hint: group next 2 characters, alt1 # Korean
|
||||
0xF868 # transcoding hint: group next 2 characters, alt2 # Korean
|
||||
0xF869 # transcoding hint: group next 2 characters, alt3 # Korean
|
||||
0xF86A # transcoding hint: group next 2 characters, RL # Hebrew
|
||||
0xF86B # transcoding hint: group next 4 characters, RL # Farsi variant
|
||||
#
|
||||
0xF870 # transcoding hint: variant tag 16 # Symbol, Korean
|
||||
0xF871 # transcoding hint: variant tag 15 # Symbol, Korean
|
||||
0xF872 # transcoding hint: variant tag 14 # Symbol
|
||||
0xF873 # transcoding hint: variant tag 13 # Korean, Thai
|
||||
0xF874 # transcoding hint: variant tag 12 # Korean, Thai
|
||||
0xF875 # transcoding hint: variant tag 11 # Korean, Thai
|
||||
0xF876 # transcoding hint: variant tag 10 # Korean
|
||||
0xF877 # transcoding hint: variant tag 9 # Korean
|
||||
0xF878 # transcoding hint: variant tag 8 # Korean
|
||||
0xF879 # transcoding hint: variant tag 7 # Korean
|
||||
0xF87A # transcoding hint: variant tag 6 # Korean
|
||||
0xF87B # transcoding hint: variant tag 5 # Korean
|
||||
0xF87C # transcoding hint: variant tag 4 # ChineseTrad, Korean, Dingbats
|
||||
0xF87D # transcoding hint: variant tag 3 # ChineseTrad
|
||||
0xF87E # transcoding hint: variant tag 2 # Chinese,Japanese
|
||||
0xF87F # transcoding hint: variant tag 1 # CJK,Symbol,Dingbats,Hebrew
|
||||
|
||||
# The following (2) are metrics "characters" so applications can get the
|
||||
# height and width of double-byte character glyphs by measuring the glyph of a
|
||||
# one-byte character (e.g. calling CharWidth for character 0x82 in a Chinese
|
||||
# Traditional font); this approach assumes that the glyphs for all double-byte
|
||||
# characters in a font have the same metrics, which is currently true. Note
|
||||
# that the width-metric character glyphs are used differently for TrueType and
|
||||
# old-style bitmap fonts; for TrueType fonts the metric glyph width is equal
|
||||
# to the full width of a double-byte character glyph, while for FBIT/FDEF
|
||||
# bitmap fonts the metric glyph width is half the width of a double-byte
|
||||
# character glyph.
|
||||
0xF880 # height-metric character for double-byte fonts # Chinese Simp&Trad-0x81
|
||||
0xF881 # width-metric character for double-byte fonts # Chinese Simp&Trad-0x82
|
||||
|
||||
# The following (2) are for the TrueType variant of Mac OS Farsi.
|
||||
# NOTE: 0xF883 is deprecated, but is still loosely mapped to 0xA4 in the
|
||||
# Mac OS Farsi TrueType variant.
|
||||
0xF882 # Arabic ligature "peace on him" # Farsi(TrueType variant)-0x8B
|
||||
0xF883 # deprecated, use 0xFDFC (3.2) or 0xF86B+0x0631+0x06CC+0x0627+0x0644 # Farsi(TrueType variant)-0xA4
|
||||
|
||||
# The following (22) are for the Mac OS Thai encoding.
|
||||
# In this encoding, positional variants of upper vowels, tone marks,
|
||||
# and other marks are normally handled automatically by WorldScript I.
|
||||
# However, the Thai-DTP keyboard allows the codes for the positional
|
||||
# variants to be entered directly, so they must be treated as
|
||||
# characters. When the abstract character is treated as a positional
|
||||
# variant, it has the right (and high, if relevant) position.
|
||||
# NOTE: These are now all deprecated in favor of combinations of standard
|
||||
# characters and transcoding hints. The deprecated characters will still
|
||||
# be loosely mapped to the appropriate Mac OS Thai character.
|
||||
0xF884 # deprecated, use 0x0E31+0xF874 # Thai-0x92
|
||||
0xF885 # deprecated, use 0x0E34+0xF874 # Thai-0x94
|
||||
0xF886 # deprecated, use 0x0E35+0xF874 # Thai-0x95
|
||||
0xF887 # deprecated, use 0x0E36+0xF874 # Thai-0x96
|
||||
0xF888 # deprecated, use 0x0E37+0xF874 # Thai-0x97
|
||||
0xF889 # deprecated, use 0x0E47+0xF874 # Thai-0x93
|
||||
0xF88A # deprecated, use 0x0E48+0xF874 # Thai-0x98
|
||||
0xF88B # deprecated, use 0x0E48+0xF873 # Thai-0x88
|
||||
0xF88C # deprecated, use 0x0E48+0xF875 # Thai-0x83
|
||||
0xF88D # deprecated, use 0x0E49+0xF874 # Thai-0x99
|
||||
0xF88E # deprecated, use 0x0E49+0xF873 # Thai-0x89
|
||||
0xF88F # deprecated, use 0x0E49+0xF875 # Thai-0x84
|
||||
0xF890 # deprecated, use 0x0E4A+0xF874 # Thai-0x9A
|
||||
0xF891 # deprecated, use 0x0E4A+0xF873 # Thai-0x8A
|
||||
0xF892 # deprecated, use 0x0E4A+0xF875 # Thai-0x85
|
||||
0xF893 # deprecated, use 0x0E4B+0xF874 # Thai-0x9B
|
||||
0xF894 # deprecated, use 0x0E4B+0xF873 # Thai-0x8B
|
||||
0xF895 # deprecated, use 0x0E4B+0xF875 # Thai-0x86
|
||||
0xF896 # deprecated, use 0x0E4C+0xF874 # Thai-0x9C
|
||||
0xF897 # deprecated, use 0x0E4C+0xF873 # Thai-0x8C
|
||||
0xF898 # deprecated, use 0x0E4C+0xF875 # Thai-0x87
|
||||
0xF899 # deprecated, use 0x0E4D+0xF874 # Thai-0x8F
|
||||
|
||||
# The following (6) are for the Mac OS Hebrew encoding. Four of
|
||||
# these are for the obsolete "canoral" codes that were used before
|
||||
# System 7.1/Worldscript to control positioning of nikud marks (points).
|
||||
# In the future these 4 code points may be redefined.
|
||||
# NOTE: Some of these are deprecated in favor of a combination of standard
|
||||
# character and transcoding hint. The deprecated characters will still
|
||||
# be loosely mapped to the appropriate Mac OS Hebrew character.
|
||||
0xF89A # deprecated, use 0xF86A+0x05DC+0x05B9 # Hebrew-0xC0
|
||||
0xF89B # Hebrew canoral 1 # Hebrew-0xC2
|
||||
0xF89C # Hebrew canoral 2 # Hebrew-0xC3
|
||||
0xF89D # Hebrew canoral 3 # Hebrew-0xC4
|
||||
0xF89E # Hebrew canoral 4 # Hebrew-0xC5
|
||||
0xF89F # deprecated, use 0x05B8+0xF87F # Hebrew-0xDE
|
||||
|
||||
# The following (1) is for mapping the single undefined code point in
|
||||
# the Mac OS Greek and Turkish encodings, thus permitting full
|
||||
# round-trip fidelity. This character is also used for mapping EURO SIGN
|
||||
# when mapping to Unicode 1.1 (e.g. for Mac OS Roman and Symbol).
|
||||
0xF8A0 # undefined1, also EURO SIGN for Unicode 1.1 # Turkish-0xF5, Roman-0xDB, Symbol-0xA0
|
||||
|
||||
# The following (54) are for the Mac OS Japanese encoding.
|
||||
# part 1 - Apple corporate Unicode chars for Mac OS Japanese extended
|
||||
# characters not in Unicode.
|
||||
# NOTE: These are now all deprecated in favor of combinations of standard
|
||||
# characters and transcoding hints. The deprecated characters will still
|
||||
# be loosely mapped to the appropriate Mac OS Japanese character.
|
||||
0xF8A1 # deprecated, use 0xF860+0x0030+0x002E # Jpn-0x8591
|
||||
0xF8A2 # deprecated, use 0xF862+0x0058+0x0049+0x0049+0x0049 # Jpn-0x85AB
|
||||
0xF8A3 # deprecated, use 0xF861+0x0058+0x0049+0x0056 # Jpn-0x85AC
|
||||
0xF8A4 # deprecated, use 0xF860+0x0058+0x0056 # Jpn-0x85AD
|
||||
0xF8A5 # deprecated, use 0xF862+0x0078+0x0069+0x0069+0x0069 # Jpn-0x85BF
|
||||
0xF8A6 # deprecated, use 0xF861+0x0078+0x0069+0x0076 # Jpn-0x85C0
|
||||
0xF8A7 # deprecated, use 0xF860+0x0078+0x0076 # Jpn-0x85C1
|
||||
0xF8A8 # deprecated, use 0xFF4D+0xF87F # Jpn-0x8645
|
||||
0xF8A9 # deprecated, use 0xFF47+0xF87F # Jpn-0x864B
|
||||
0xF8AA # deprecated, use 0x2113 # Jpn-0x8650
|
||||
0xF8AB # deprecated, use 0xF860+0x0054+0x0042 # Jpn-0x865D
|
||||
0xF8AC # deprecated, use 0xF861+0x0046+0x0041+0x0058 # Jpn-0x869E
|
||||
0xF8AD # deprecated, use 0xF860+0x2193+0x2191 # Jpn-0x86CE
|
||||
0xF8AE # deprecated, use 0x21E8+0xF87A # Jpn-0x86D3
|
||||
0xF8AF # deprecated, use 0x21E6+0xF87A # Jpn-0x86D4
|
||||
0xF8B0 # deprecated, use 0x21E7+0xF87A # Jpn-0x86D5
|
||||
0xF8B1 # deprecated, use 0x21E9+0xF87A # Jpn-0x86D6
|
||||
0xF8B2 # deprecated, use 0xF862+0x6709+0x9650+0x4F1A+0x793E # Jpn-0x87FB
|
||||
0xF8B3 # deprecated, use 0xF862+0x8CA1+0x56E3+0x6CD5+0x4EBA # Jpn-0x87FC
|
||||
0xF8B4 # deprecated, use 0x301F # Jpn-0x8855
|
||||
# part 2 - Apple corporate Unicode chars for Mac OS Japanese vertical
|
||||
# forms not in Unicode.
|
||||
# NOTE: These are now all deprecated in favor of combinations of standard
|
||||
# characters and transcoding hints. The deprecated characters will still
|
||||
# be loosely mapped to the appropriate Mac OS Japanese character.
|
||||
0xF8B5 # deprecated, use 0x3001+0xF87E # Jpn-0xEB41
|
||||
0xF8B6 # deprecated, use 0x3002+0xF87E # Jpn-0xEB42
|
||||
0xF8B7 # deprecated, use 0xFFE3+0xF87E # Jpn-0xEB50
|
||||
0xF8B8 # deprecated, use 0x30FC+0xF87E # Jpn-0xEB5B
|
||||
0xF8B9 # deprecated, use 0x2010+0xF87E # Jpn-0xEB5D
|
||||
0xF8BA # deprecated, use 0x301C+0xF87E # Jpn-0xEB60
|
||||
0xF8BB # deprecated, use 0x2016+0xF87E # Jpn-0xEB61
|
||||
0xF8BC # deprecated, use 0xFF5C+0xF87E # Jpn-0xEB62
|
||||
0xF8BD # deprecated, use 0x2026+0xF87E # Jpn-0xEB63
|
||||
0xF8BE # deprecated, use 0xFF3B+0xF87E # Jpn-0xEB6D
|
||||
0xF8BF # deprecated, use 0xFF3D+0xF87E # Jpn-0xEB6E
|
||||
0xF8C0 # deprecated, use 0xFF1D+0xF87E # Jpn-0xEB81
|
||||
0xF8C1 # deprecated, use 0x3041+0xF87E # Jpn-0xEC9F
|
||||
0xF8C2 # deprecated, use 0x3043+0xF87E # Jpn-0xECA1
|
||||
0xF8C3 # deprecated, use 0x3045+0xF87E # Jpn-0xECA3
|
||||
0xF8C4 # deprecated, use 0x3047+0xF87E # Jpn-0xECA5
|
||||
0xF8C5 # deprecated, use 0x3049+0xF87E # Jpn-0xECA7
|
||||
0xF8C6 # deprecated, use 0x3063+0xF87E # Jpn-0xECC1
|
||||
0xF8C7 # deprecated, use 0x3083+0xF87E # Jpn-0xECE1
|
||||
0xF8C8 # deprecated, use 0x3085+0xF87E # Jpn-0xECE3
|
||||
0xF8C9 # deprecated, use 0x3087+0xF87E # Jpn-0xECE5
|
||||
0xF8CA # deprecated, use 0x308E+0xF87E # Jpn-0xECEC
|
||||
0xF8CB # deprecated, use 0x30A1+0xF87E # Jpn-0xED40
|
||||
0xF8CC # deprecated, use 0x30A3+0xF87E # Jpn-0xED42
|
||||
0xF8CD # deprecated, use 0x30A5+0xF87E # Jpn-0xED44
|
||||
0xF8CE # deprecated, use 0x30A7+0xF87E # Jpn-0xED46
|
||||
0xF8CF # deprecated, use 0x30A9+0xF87E # Jpn-0xED48
|
||||
0xF8D0 # deprecated, use 0x30C3+0xF87E # Jpn-0xED62
|
||||
0xF8D1 # deprecated, use 0x30E3+0xF87E # Jpn-0xED83
|
||||
0xF8D2 # deprecated, use 0x30E5+0xF87E # Jpn-0xED85
|
||||
0xF8D3 # deprecated, use 0x30E7+0xF87E # Jpn-0xED87
|
||||
0xF8D4 # deprecated, use 0x30EE+0xF87E # Jpn-0xED8E
|
||||
0xF8D5 # deprecated, use 0x30F5+0xF87E # Jpn-0xED95
|
||||
0xF8D6 # deprecated, use 0x30F6+0xF87E # Jpn-0xED96
|
||||
|
||||
# The following (14) are for the Mac OS Dingbats encoding.
|
||||
# NOTE: These are now all deprecated in favor of standard characters or
|
||||
# combinations of standard characters and transcoding hints. The
|
||||
# deprecated characters will still be loosely mapped to the appropriate
|
||||
# Mac OS Dingbats character.
|
||||
0xF8D7 # deprecated, use 0x2768 (3.2) or 0x0028 # Dingbats-0x80
|
||||
0xF8D8 # deprecated, use 0x2769 (3.2) or 0x0029 # Dingbats-0x81
|
||||
0xF8D9 # deprecated, use 0x276A (3.2) or 0x0028+0xF87F # Dingbats-0x82
|
||||
0xF8DA # deprecated, use 0x276B (3.2) or 0x0029+0xF87F # Dingbats-0x83
|
||||
0xF8DB # deprecated, use 0x276C (3.2) or 0x3008 # Dingbats-0x84
|
||||
0xF8DC # deprecated, use 0x276D (3.2) or 0x3009 # Dingbats-0x85
|
||||
0xF8DD # deprecated, use 0x276E (3.2) or 0x2039 # Dingbats-0x86
|
||||
0xF8DE # deprecated, use 0x276F (3.2) or 0x203A # Dingbats-0x87
|
||||
0xF8DF # deprecated, use 0x2770 (3.2) or 0x3008+0xF87C # Dingbats-0x88
|
||||
0xF8E0 # deprecated, use 0x2771 (3.2) or 0x3009+0xF87C # Dingbats-0x89
|
||||
0xF8E1 # deprecated, use 0x2772 (3.2) or 0x3014 # Dingbats-0x8A
|
||||
0xF8E2 # deprecated, use 0x2773 (3.2) or 0x3015 # Dingbats-0x8B
|
||||
0xF8E3 # deprecated, use 0x2774 (3.2) or 0x007B # Dingbats-0x8C
|
||||
0xF8E4 # deprecated, use 0x2775 (3.2) or 0x007D # Dingbats-0x8D
|
||||
|
||||
# The following (26) are for the Mac OS Symbol encoding.
|
||||
# NOTE: Some of these are deprecated in favor of combinations of standard
|
||||
# characters and transcoding hints. The deprecated characters will still
|
||||
# be loosely mapped to the appropriate Mac OS Symbol character.
|
||||
0xF8E5 # radical extender # Symbol-0x60
|
||||
0xF8E6 # deprecated, use 0x23D0 (4.0) # Symbol-0xBD
|
||||
0xF8E7 # deprecated, use 0x23AF (3.2) # Symbol-0xBE
|
||||
0xF8E8 # deprecated, use 0x00AE+0xF87F # Symbol-0xE2
|
||||
0xF8E9 # deprecated, use 0x00A9+0xF87F # Symbol-0xE3
|
||||
0xF8EA # deprecated, use 0x2122+0xF87F # Symbol-0xE4
|
||||
0xF8EB # deprecated, use 0x239B (3.2) or 0x0028+0xF870 # Symbol-0xE6
|
||||
0xF8EC # deprecated, use 0x239C (3.2) or 0x0028+0xF871 # Symbol-0xE7
|
||||
0xF8ED # deprecated, use 0x239D (3.2) or 0x0028+0xF872 # Symbol-0xE8
|
||||
0xF8EE # deprecated, use 0x23A1 (3.2) or 0x005B+0xF870 # Symbol-0xE9
|
||||
0xF8EF # deprecated, use 0x23A2 (3.2) or 0x005B+0xF871 # Symbol-0xEA
|
||||
0xF8F0 # deprecated, use 0x23A3 (3.2) or 0x005B+0xF872 # Symbol-0xEB
|
||||
0xF8F1 # deprecated, use 0x23A7 (3.2) or 0x007B+0xF870 # Symbol-0xEC
|
||||
0xF8F2 # deprecated, use 0x23A8 (3.2) or 0x007B+0xF871 # Symbol-0xED
|
||||
0xF8F3 # deprecated, use 0x23A9 (3.2) or 0x007B+0xF872 # Symbol-0xEE
|
||||
0xF8F4 # deprecated, use 0x23AA (3.2) # Symbol-0xEF
|
||||
0xF8F5 # deprecated, use 0x23AE (3.2) or 0x222B+0xF871 # Symbol-0xF4
|
||||
0xF8F6 # deprecated, use 0x239E (3.2) or 0x0029+0xF870 # Symbol-0xF6
|
||||
0xF8F7 # deprecated, use 0x239F (3.2) or 0x0029+0xF871 # Symbol-0xF7
|
||||
0xF8F8 # deprecated, use 0x23A0 (3.2) or 0x0029+0xF872 # Symbol-0xF8
|
||||
0xF8F9 # deprecated, use 0x23A4 (3.2) or 0x005D+0xF870 # Symbol-0xF9
|
||||
0xF8FA # deprecated, use 0x23A5 (3.2) or 0x005D+0xF871 # Symbol-0xFA
|
||||
0xF8FB # deprecated, use 0x23A6 (3.2) or 0x005D+0xF872 # Symbol-0xFB
|
||||
0xF8FC # deprecated, use 0x23AB (3.2) or 0x007D+0xF870 # Symbol-0xFC
|
||||
0xF8FD # deprecated, use 0x23AC (3.2) or 0x007D+0xF871 # Symbol-0xFD
|
||||
0xF8FE # deprecated, use 0x23AD (3.2) or 0x007D+0xF872 # Symbol-0xFE
|
||||
|
||||
# The following (1) is for the Mac OS Roman encoding
|
||||
# (also used in Symbol & Croatian).
|
||||
# NOTE: The graphic image associated with the Apple logo character is
|
||||
# not authorized for use without permission of Apple, and unauthorized
|
||||
# use might constitute trademark infringement.
|
||||
0xF8FF # Apple logo # Roman-0xF0, Symbol-0xF0, Croatian-0xD8
|
351
charmap/CROATIAN.TXT
Normal file
|
@ -0,0 +1,351 @@
|
|||
#=======================================================================
|
||||
# File name: CROATIAN.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Croatian
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-04 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b3>.
|
||||
# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change
|
||||
# mapping of 0xDB from CURRENCY SIGN to EURO
|
||||
# SIGN. Update contact e-mail address. Matches
|
||||
# internal utom<b2>, ufrm<b2>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n07 1998-Feb-05 Minor update to header comments
|
||||
# n05 1997-Dec-14 Update to match internal utom<5>, ufrm<16>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<6>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Croatian code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Croatian code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Croatian character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Croatian:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Croatian is used for Croatian and Slovene.
|
||||
#
|
||||
# The Mac OS Croatian encoding shares the script code smRoman
|
||||
# (0) with the standard Mac OS Roman encoding. To determine if
|
||||
# the Croatian encoding is being used, you must check if the
|
||||
# system region code is 68, verCroatia (or 25, verYugoCroatian,
|
||||
# only used in older systems).
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman
|
||||
# encoding, adding five accented letter case pairs to handle
|
||||
# Croatian. It has 20 code point differences from standard
|
||||
# Mac OS Roman, but only 10 differences in repertoire.
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
|
||||
# a "currency sign" variant of the Mac OS Croatian encoding that
|
||||
# still maps 0xDB to U+00A4; this can be used for older fonts.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n07 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||
#
|
||||
# Changes from version n03 to version n05:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x0160 # LATIN CAPITAL LETTER S WITH CARON
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x017D # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x2206 # INCREMENT
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x0161 # LATIN SMALL LETTER S WITH CARON
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x017E # LATIN SMALL LETTER Z WITH CARON
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x0106 # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x010C # LATIN CAPITAL LETTER C WITH CARON
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x0110 # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0xF8FF # Apple logo
|
||||
0xD9 0x00A9 # COPYRIGHT SIGN
|
||||
0xDA 0x2044 # FRACTION SLASH
|
||||
0xDB 0x20AC # EURO SIGN
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xDF 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xE0 0x2013 # EN DASH
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x0107 # LATIN SMALL LETTER C WITH ACUTE
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x010D # LATIN SMALL LETTER C WITH CARON
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0x0111 # LATIN SMALL LETTER D WITH STROKE
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0xF7 0x02DC # SMALL TILDE
|
||||
0xF8 0x00AF # MACRON
|
||||
0xF9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xFA 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xFB 0x02DA # RING ABOVE
|
||||
0xFC 0x00B8 # CEDILLA
|
||||
0xFD 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xFE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xFF 0x02C7 # CARON
|
352
charmap/CYRILLIC.TXT
Normal file
|
@ -0,0 +1,352 @@
|
|||
#=======================================================================
|
||||
# File name: CYRILLIC.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Cyrillic
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c03 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b2>.
|
||||
# b02 1999-Sep-22 Encoding changed for Mac OS 9.0 to merge
|
||||
# with Mac OS Ukrainian and support EURO SIGN;
|
||||
# Change mappings for 0xA2, 0xB6, and 0xFF.
|
||||
# Update contact e-mail address. Matches
|
||||
# internal utom<b2>, ufrm<b2>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n05 1998-Feb-05 Update header comments to new format; no
|
||||
# mapping changes. Matches internal utom<n3>,
|
||||
# ufrm<n13>, and Text Encoding Converter
|
||||
# version 1.3.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n5>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Cyrillic code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Cyrillic code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Cyrillic character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Cyrillic:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
# interfaces for QuickDraw Text, the Script Manager, and related
|
||||
# Text Utilities. For other purposes it is supported via transcoding
|
||||
# to and from Unicode.
|
||||
#
|
||||
# This is the "Euro sign" version of Mac Cyrillic for Mac OS 9.0 and
|
||||
# later. Before Mac OS 9.0, there were two separate Slavic Cyrillic
|
||||
# encodings:
|
||||
#
|
||||
# 1. The Cyrillic currency sign variant (used for localized Russian
|
||||
# and Bulgarian systems), which had the following:
|
||||
# 0xA2 U+00A2 CENT SIGN
|
||||
# 0xB6 U+2202 PARTIAL DIFFERENTIAL
|
||||
# 0xFF U+00A4 CURRENCY SIGN
|
||||
#
|
||||
# 2. The Ukrainian currency sign variant (used for localized Ukrainian
|
||||
# systems and the pre-9.0 Cyrillic Language Kit), which had the
|
||||
# following:
|
||||
# 0xA2 U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
# 0xB6 U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
# 0xFF U+00A4 CURRENCY SIGN
|
||||
#
|
||||
# This new Cyrillic Euro sign version is based on the old Ukrainian
|
||||
# currency sign variant, with 0xFF changed to be EURO SIGN.
|
||||
#
|
||||
# The Mac OS Cyrillic encoding includes the Cyrillic letter repertoire
|
||||
# of ISO 8859-5 (although not at the same code points). This covers
|
||||
# most of the Slavic languages written in Cyrillic script.
|
||||
#
|
||||
# The Mac OS Cyrillic encoding also includes a number of characters
|
||||
# needed for the Mac OS user interface and localization (e.g.
|
||||
# ellipsis, bullet, copyright sign). All of the characters in Mac OS
|
||||
# Cyrillic that are also in the Mac OS Roman encoding are at the
|
||||
# same code point in both; this improves application compatibility.
|
||||
#
|
||||
# Note: There is a common Ukrainian glyph variation in which the glyph
|
||||
# for CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I may or may not
|
||||
# have a dot above.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n05 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 9.0 to merge with Mac OS Ukrainian and
|
||||
# support EURO SIGN. 0xA2 changed from U+00A2 to U+0490; 0xB6 changed
|
||||
# from U+2202 to U+0491; 0xFF changed from U+00A4 to U+20AC.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x0410 # CYRILLIC CAPITAL LETTER A
|
||||
0x81 0x0411 # CYRILLIC CAPITAL LETTER BE
|
||||
0x82 0x0412 # CYRILLIC CAPITAL LETTER VE
|
||||
0x83 0x0413 # CYRILLIC CAPITAL LETTER GHE
|
||||
0x84 0x0414 # CYRILLIC CAPITAL LETTER DE
|
||||
0x85 0x0415 # CYRILLIC CAPITAL LETTER IE
|
||||
0x86 0x0416 # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x87 0x0417 # CYRILLIC CAPITAL LETTER ZE
|
||||
0x88 0x0418 # CYRILLIC CAPITAL LETTER I
|
||||
0x89 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x8A 0x041A # CYRILLIC CAPITAL LETTER KA
|
||||
0x8B 0x041B # CYRILLIC CAPITAL LETTER EL
|
||||
0x8C 0x041C # CYRILLIC CAPITAL LETTER EM
|
||||
0x8D 0x041D # CYRILLIC CAPITAL LETTER EN
|
||||
0x8E 0x041E # CYRILLIC CAPITAL LETTER O
|
||||
0x8F 0x041F # CYRILLIC CAPITAL LETTER PE
|
||||
0x90 0x0420 # CYRILLIC CAPITAL LETTER ER
|
||||
0x91 0x0421 # CYRILLIC CAPITAL LETTER ES
|
||||
0x92 0x0422 # CYRILLIC CAPITAL LETTER TE
|
||||
0x93 0x0423 # CYRILLIC CAPITAL LETTER U
|
||||
0x94 0x0424 # CYRILLIC CAPITAL LETTER EF
|
||||
0x95 0x0425 # CYRILLIC CAPITAL LETTER HA
|
||||
0x96 0x0426 # CYRILLIC CAPITAL LETTER TSE
|
||||
0x97 0x0427 # CYRILLIC CAPITAL LETTER CHE
|
||||
0x98 0x0428 # CYRILLIC CAPITAL LETTER SHA
|
||||
0x99 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x9A 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x9B 0x042B # CYRILLIC CAPITAL LETTER YERU
|
||||
0x9C 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x9D 0x042D # CYRILLIC CAPITAL LETTER E
|
||||
0x9E 0x042E # CYRILLIC CAPITAL LETTER YU
|
||||
0x9F 0x042F # CYRILLIC CAPITAL LETTER YA
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x0490 # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x0402 # CYRILLIC CAPITAL LETTER DJE
|
||||
0xAC 0x0452 # CYRILLIC SMALL LETTER DJE
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x0403 # CYRILLIC CAPITAL LETTER GJE
|
||||
0xAF 0x0453 # CYRILLIC SMALL LETTER GJE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x0491 # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0xB7 0x0408 # CYRILLIC CAPITAL LETTER JE
|
||||
0xB8 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0xB9 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0xBA 0x0407 # CYRILLIC CAPITAL LETTER YI
|
||||
0xBB 0x0457 # CYRILLIC SMALL LETTER YI
|
||||
0xBC 0x0409 # CYRILLIC CAPITAL LETTER LJE
|
||||
0xBD 0x0459 # CYRILLIC SMALL LETTER LJE
|
||||
0xBE 0x040A # CYRILLIC CAPITAL LETTER NJE
|
||||
0xBF 0x045A # CYRILLIC SMALL LETTER NJE
|
||||
0xC0 0x0458 # CYRILLIC SMALL LETTER JE
|
||||
0xC1 0x0405 # CYRILLIC CAPITAL LETTER DZE
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x040B # CYRILLIC CAPITAL LETTER TSHE
|
||||
0xCC 0x045B # CYRILLIC SMALL LETTER TSHE
|
||||
0xCD 0x040C # CYRILLIC CAPITAL LETTER KJE
|
||||
0xCE 0x045C # CYRILLIC SMALL LETTER KJE
|
||||
0xCF 0x0455 # CYRILLIC SMALL LETTER DZE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xD8 0x040E # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0xD9 0x045E # CYRILLIC SMALL LETTER SHORT U
|
||||
0xDA 0x040F # CYRILLIC CAPITAL LETTER DZHE
|
||||
0xDB 0x045F # CYRILLIC SMALL LETTER DZHE
|
||||
0xDC 0x2116 # NUMERO SIGN
|
||||
0xDD 0x0401 # CYRILLIC CAPITAL LETTER IO
|
||||
0xDE 0x0451 # CYRILLIC SMALL LETTER IO
|
||||
0xDF 0x044F # CYRILLIC SMALL LETTER YA
|
||||
0xE0 0x0430 # CYRILLIC SMALL LETTER A
|
||||
0xE1 0x0431 # CYRILLIC SMALL LETTER BE
|
||||
0xE2 0x0432 # CYRILLIC SMALL LETTER VE
|
||||
0xE3 0x0433 # CYRILLIC SMALL LETTER GHE
|
||||
0xE4 0x0434 # CYRILLIC SMALL LETTER DE
|
||||
0xE5 0x0435 # CYRILLIC SMALL LETTER IE
|
||||
0xE6 0x0436 # CYRILLIC SMALL LETTER ZHE
|
||||
0xE7 0x0437 # CYRILLIC SMALL LETTER ZE
|
||||
0xE8 0x0438 # CYRILLIC SMALL LETTER I
|
||||
0xE9 0x0439 # CYRILLIC SMALL LETTER SHORT I
|
||||
0xEA 0x043A # CYRILLIC SMALL LETTER KA
|
||||
0xEB 0x043B # CYRILLIC SMALL LETTER EL
|
||||
0xEC 0x043C # CYRILLIC SMALL LETTER EM
|
||||
0xED 0x043D # CYRILLIC SMALL LETTER EN
|
||||
0xEE 0x043E # CYRILLIC SMALL LETTER O
|
||||
0xEF 0x043F # CYRILLIC SMALL LETTER PE
|
||||
0xF0 0x0440 # CYRILLIC SMALL LETTER ER
|
||||
0xF1 0x0441 # CYRILLIC SMALL LETTER ES
|
||||
0xF2 0x0442 # CYRILLIC SMALL LETTER TE
|
||||
0xF3 0x0443 # CYRILLIC SMALL LETTER U
|
||||
0xF4 0x0444 # CYRILLIC SMALL LETTER EF
|
||||
0xF5 0x0445 # CYRILLIC SMALL LETTER HA
|
||||
0xF6 0x0446 # CYRILLIC SMALL LETTER TSE
|
||||
0xF7 0x0447 # CYRILLIC SMALL LETTER CHE
|
||||
0xF8 0x0448 # CYRILLIC SMALL LETTER SHA
|
||||
0xF9 0x0449 # CYRILLIC SMALL LETTER SHCHA
|
||||
0xFA 0x044A # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0xFB 0x044B # CYRILLIC SMALL LETTER YERU
|
||||
0xFC 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0xFD 0x044D # CYRILLIC SMALL LETTER E
|
||||
0xFE 0x044E # CYRILLIC SMALL LETTER YU
|
||||
0xFF 0x20AC # EURO SIGN
|
447
charmap/DEVANAGA.TXT
Normal file
|
@ -0,0 +1,447 @@
|
|||
#=======================================================================
|
||||
# File name: DEVANAGA.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Devanagari
|
||||
# encoding to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments; add section on
|
||||
# roundtrip considerations. Matches internal
|
||||
# xml <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n04 1998-Feb-05 First version; matches internal utom<n9>,
|
||||
# ufrm<n15>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Devanagari code or code sequence
|
||||
# (in hex as 0xNN or 0xNN+0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name or sequence
|
||||
# of names. In some cases an additional comment follows the
|
||||
# Unicode name(s).
|
||||
#
|
||||
# The entries are in two sections. The first section is for pairs of
|
||||
# Mac OS Devanagari code points that must be mapped in a special way.
|
||||
# The second section maps individual code points.
|
||||
#
|
||||
# Within each section, the entries are in Mac OS Devanagari code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Devanagari character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Devanagari:
|
||||
# ---------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Devanagari is based on IS 13194:1991 (ISCII-91), with the
|
||||
# addition of several punctuation and symbol characters. However,
|
||||
# Mac OS Devanagari does not support the ATR (attribute) mechanism of
|
||||
# ISCII-91.
|
||||
#
|
||||
# 1. ISCII-91 features in Mac OS Devanagari include:
|
||||
#
|
||||
# a) Overloading of nukta
|
||||
#
|
||||
# In addition to using the nukta (0xE9) like a combining dot below,
|
||||
# nukta is overloaded to function as a general character modifier.
|
||||
# In this role, certain code points followed by 0xE9 are treated as
|
||||
# a two-byte code point representing a character which may be
|
||||
# rather different than the characters represented by either of
|
||||
# the code points alone. For example, the character DEVANAGARI OM
|
||||
# (U+0950) is represented in ISCII-91 as candrabindu + nukta.
|
||||
#
|
||||
# b) Explicit halant and soft halant
|
||||
#
|
||||
# A double halant (0xE8 + 0xE8) constitutes an "explicit halant",
|
||||
# which will always appear as a halant instead of causing formation
|
||||
# of a ligature or half-form consonant.
|
||||
#
|
||||
# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft
|
||||
# halant", which prevents formation of a ligature and instead
|
||||
# retains the half-form of the first consonant.
|
||||
#
|
||||
# c) Invisible consonant
|
||||
#
|
||||
# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant:
|
||||
# It behaves like a consonant but has no visible appearance. It is
|
||||
# intended to be used (often in combination with halant) to display
|
||||
# dependent forms in isolation, such as the RA forms or consonant
|
||||
# half-forms.
|
||||
#
|
||||
# d) Extensions for Vedic, etc.
|
||||
#
|
||||
# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in
|
||||
# the range 0xA1-0xEE constitutes a two-byte code point which can
|
||||
# be used to represent additional characters for Vedic (or other
|
||||
# extensions); 0xF0 followed by any other byte value constitutes
|
||||
# malformed text. Mac OS Devanagari supports this mechanism, but
|
||||
# does not currently map any of these two-byte code points to
|
||||
# anything.
|
||||
#
|
||||
# 2. Mac OS Devanagari additions
|
||||
#
|
||||
# Mac OS Devanagari adds characters using the code points
|
||||
# 0x80-0x8A and 0x90-0x91 (the latter are some Devanagari additions
|
||||
# from Unicode).
|
||||
#
|
||||
# 3. Unused code points
|
||||
#
|
||||
# The following code points are currently unused, and are not shown
|
||||
# here: 0x8B-0x8F, 0x92-0xA0, 0xEB-0xEF, 0xFB-0xFF. In addition,
|
||||
# 0xF0 is not shown here, but it has a special function as described
|
||||
# above.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Mapping the byte pairs
|
||||
#
|
||||
# If one of the following byte values is encountered when mapping
|
||||
# Mac OS Devanagari text - 0xA1, 0xA6, 0xA7, 0xAA, 0xDB, 0xDC, 0xDF,
|
||||
# 0xE8, or 0xEA - then the next byte (if there is one) should be
|
||||
# examined. If the next byte is 0xE9 - or also 0xE8, if the first
|
||||
# byte was 0xE8 - then the byte pair should be mapped using the
|
||||
# first section of the mapping table below. Otherwise, each byte
|
||||
# should be mapped using the second section of the mapping table
|
||||
# below.
|
||||
#
|
||||
# - The Unicode Standard, Version 2.0, specifies how explicit
|
||||
# halant and soft halant should be represented in Unicode;
|
||||
# these mappings are used below.
|
||||
#
|
||||
# If the byte value 0xF0 is encountered when mapping Mac OS
|
||||
# Devanagari text, then the next byte should be examined. If there
|
||||
# is no next byte (e.g. 0xF0 at end of buffer), the mapping
|
||||
# process should indicate incomplete character. If there is a next
|
||||
# byte but it is not in the range 0xA1-0xEE, the mapping process
|
||||
# should indicate malformed text. Otherwise, the mapping process
|
||||
# should treat the byte pair as a valid two-byte code point with no
|
||||
# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER,
|
||||
# etc.).
|
||||
#
|
||||
# 2. Mapping the invisible consonant
|
||||
#
|
||||
# It has been suggested that INV in ISCII-91 should map to ZERO
|
||||
# WIDTH NON-JOINER in Unicode. However, this causes problems with
|
||||
# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9
|
||||
# would map to the same sequence of Unicode characters. We have
|
||||
# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these
|
||||
# problems.
|
||||
#
|
||||
# 3. Additional loose mappings from Unicode
|
||||
#
|
||||
# These are not preserved in roundtrip mappings.
|
||||
#
|
||||
# U+0958 0xB3+0xE9 # DEVANAGARI LETTER QA
|
||||
# U+0959 0xB4+0xE9 # DEVANAGARI LETTER KHHA
|
||||
# U+095A 0xB5+0xE9 # DEVANAGARI LETTER GHHA
|
||||
# U+095B 0xBA+0xE9 # DEVANAGARI LETTER ZA
|
||||
# U+095C 0xBF+0xE9 # DEVANAGARI LETTER DDDHA
|
||||
# U+095D 0xC0+0xE9 # DEVANAGARI LETTER RHA
|
||||
# U+095E 0xC9+0xE9 # DEVANAGARI LETTER FA
|
||||
#
|
||||
# 4. Roundtrip considerations when mapping to decomposed Unicode
|
||||
#
|
||||
# Both ISCII-91 (hence Mac OS Devanagari) and Unicode provide multiple
|
||||
# ways of representing certain Devanagari consonants. For example,
|
||||
# DEVANAGARI LETTER NNNA can be represented in Unicode as the single
|
||||
# character 0x0929 or as the sequence 0x0928 0x093C; similarly, this
|
||||
# consonant can be represented in Mac OS Devanagari as 0xC7 or as the
|
||||
# sequence 0xC6 0xE9. This leads to some roundtrip problems. First
|
||||
# note that we have the following mappings without such problems:
|
||||
#
|
||||
# ISCII/ standard decomposition of reverse mapping
|
||||
# Mac OS Unicode mapping standard mapping of decomposition
|
||||
# ------ ----------------------- ---------------- ----------------
|
||||
# 0xC6 0x0928 ... LETTER NA 0x0928 (same) 0xC6
|
||||
# 0xCD 0x092F ... LETTER YA 0x092F (same) 0xCD
|
||||
# 0xCF 0x0930 ... LETTER RA 0x0930 (same) 0xCF
|
||||
# 0xD2 0x0933 ... LETTER LLA 0x0933 (same) 0xD2
|
||||
# 0xE9 0x093C ... SIGN NUKTA 0x093C (same) 0xE9
|
||||
#
|
||||
# However, those mappings above cause roundtrip problems for the
|
||||
# the following mappings if they are decomposed:
|
||||
#
|
||||
# ISCII/ standard decomposition of reverse mapping
|
||||
# Mac OS Unicode mapping standard mapping of decomposition
|
||||
# ------ ----------------------- ---------------- ----------------
|
||||
# 0xC7 0x0929 ... LETTER NNNA 0x0928 0x093C 0xC6 0xE9
|
||||
# 0xCE 0x095F ... LETTER YYA 0x092F 0x093C 0xCD 0xE9
|
||||
# 0xD0 0x0931 ... LETTER RRA 0x0930 0x093C 0xCF 0xE9
|
||||
# 0xD3 0x0934 ... LETTER LLLA 0x0933 0x093C 0xD2 0xE9
|
||||
#
|
||||
# One solution is to use a grouping transcoding hint with the four
|
||||
# decompositions above to mark the decomposed sequence for special
|
||||
# treatment in transcoding. This yields the following mappings to
|
||||
# decomposed Unicode:
|
||||
#
|
||||
# ISCII/ decomposed
|
||||
# Mac OS Unicode mapping
|
||||
# ------ ----------------
|
||||
# 0xC7 0xF860 0x0928 0x093C
|
||||
# 0xCE 0xF860 0x092F 0x093C
|
||||
# 0xD0 0xF860 0x0930 0x093C
|
||||
# 0xD3 0xF860 0x0933 0x093C
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
# Section 1: Map the following byte pairs as indicated:
|
||||
# (ZWNJ means ZERO WIDTH NON-JOINER, ZWJ means ZERO WIDTH JOINER)
|
||||
# (Also see note about 0xF0 in comments above)
|
||||
|
||||
0xA1+0xE9 0x0950 # DEVANAGARI OM
|
||||
0xA6+0xE9 0x090C # DEVANAGARI LETTER VOCALIC L
|
||||
0xA7+0xE9 0x0961 # DEVANAGARI LETTER VOCALIC LL
|
||||
0xAA+0xE9 0x0960 # DEVANAGARI LETTER VOCALIC RR
|
||||
0xDB+0xE9 0x0962 # DEVANAGARI VOWEL SIGN VOCALIC L
|
||||
0xDC+0xE9 0x0963 # DEVANAGARI VOWEL SIGN VOCALIC LL
|
||||
0xDF+0xE9 0x0944 # DEVANAGARI VOWEL SIGN VOCALIC RR
|
||||
0xE8+0xE8 0x094D+0x200C # DEVANAGARI SIGN VIRAMA + ZWNJ # explicit halant
|
||||
0xE8+0xE9 0x094D+0x200D # DEVANAGARI SIGN VIRAMA + ZWJ # soft halant
|
||||
0xEA+0xE9 0x093D # DEVANAGARI SIGN AVAGRAHA
|
||||
|
||||
# Section 2: Map the remaining bytes as follows:
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00D7 # MULTIPLICATION SIGN
|
||||
0x81 0x2212 # MINUS SIGN
|
||||
0x82 0x2013 # EN DASH
|
||||
0x83 0x2014 # EM DASH
|
||||
0x84 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0x85 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0x86 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0x87 0x2022 # BULLET
|
||||
0x88 0x00A9 # COPYRIGHT SIGN
|
||||
0x89 0x00AE # REGISTERED SIGN
|
||||
0x8A 0x2122 # TRADE MARK SIGN
|
||||
#
|
||||
0x90 0x0965 # DEVANAGARI DOUBLE DANDA
|
||||
0x91 0x0970 # DEVANAGARI ABBREVIATION SIGN
|
||||
#
|
||||
0xA1 0x0901 # DEVANAGARI SIGN CANDRABINDU
|
||||
0xA2 0x0902 # DEVANAGARI SIGN ANUSVARA
|
||||
0xA3 0x0903 # DEVANAGARI SIGN VISARGA
|
||||
0xA4 0x0905 # DEVANAGARI LETTER A
|
||||
0xA5 0x0906 # DEVANAGARI LETTER AA
|
||||
0xA6 0x0907 # DEVANAGARI LETTER I
|
||||
0xA7 0x0908 # DEVANAGARI LETTER II
|
||||
0xA8 0x0909 # DEVANAGARI LETTER U
|
||||
0xA9 0x090A # DEVANAGARI LETTER UU
|
||||
0xAA 0x090B # DEVANAGARI LETTER VOCALIC R
|
||||
0xAB 0x090E # DEVANAGARI LETTER SHORT E
|
||||
0xAC 0x090F # DEVANAGARI LETTER E
|
||||
0xAD 0x0910 # DEVANAGARI LETTER AI
|
||||
0xAE 0x090D # DEVANAGARI LETTER CANDRA E
|
||||
0xAF 0x0912 # DEVANAGARI LETTER SHORT O
|
||||
0xB0 0x0913 # DEVANAGARI LETTER O
|
||||
0xB1 0x0914 # DEVANAGARI LETTER AU
|
||||
0xB2 0x0911 # DEVANAGARI LETTER CANDRA O
|
||||
0xB3 0x0915 # DEVANAGARI LETTER KA
|
||||
0xB4 0x0916 # DEVANAGARI LETTER KHA
|
||||
0xB5 0x0917 # DEVANAGARI LETTER GA
|
||||
0xB6 0x0918 # DEVANAGARI LETTER GHA
|
||||
0xB7 0x0919 # DEVANAGARI LETTER NGA
|
||||
0xB8 0x091A # DEVANAGARI LETTER CA
|
||||
0xB9 0x091B # DEVANAGARI LETTER CHA
|
||||
0xBA 0x091C # DEVANAGARI LETTER JA
|
||||
0xBB 0x091D # DEVANAGARI LETTER JHA
|
||||
0xBC 0x091E # DEVANAGARI LETTER NYA
|
||||
0xBD 0x091F # DEVANAGARI LETTER TTA
|
||||
0xBE 0x0920 # DEVANAGARI LETTER TTHA
|
||||
0xBF 0x0921 # DEVANAGARI LETTER DDA
|
||||
0xC0 0x0922 # DEVANAGARI LETTER DDHA
|
||||
0xC1 0x0923 # DEVANAGARI LETTER NNA
|
||||
0xC2 0x0924 # DEVANAGARI LETTER TA
|
||||
0xC3 0x0925 # DEVANAGARI LETTER THA
|
||||
0xC4 0x0926 # DEVANAGARI LETTER DA
|
||||
0xC5 0x0927 # DEVANAGARI LETTER DHA
|
||||
0xC6 0x0928 # DEVANAGARI LETTER NA
|
||||
0xC7 0x0929 # DEVANAGARI LETTER NNNA
|
||||
0xC8 0x092A # DEVANAGARI LETTER PA
|
||||
0xC9 0x092B # DEVANAGARI LETTER PHA
|
||||
0xCA 0x092C # DEVANAGARI LETTER BA
|
||||
0xCB 0x092D # DEVANAGARI LETTER BHA
|
||||
0xCC 0x092E # DEVANAGARI LETTER MA
|
||||
0xCD 0x092F # DEVANAGARI LETTER YA
|
||||
0xCE 0x095F # DEVANAGARI LETTER YYA
|
||||
0xCF 0x0930 # DEVANAGARI LETTER RA
|
||||
0xD0 0x0931 # DEVANAGARI LETTER RRA
|
||||
0xD1 0x0932 # DEVANAGARI LETTER LA
|
||||
0xD2 0x0933 # DEVANAGARI LETTER LLA
|
||||
0xD3 0x0934 # DEVANAGARI LETTER LLLA
|
||||
0xD4 0x0935 # DEVANAGARI LETTER VA
|
||||
0xD5 0x0936 # DEVANAGARI LETTER SHA
|
||||
0xD6 0x0937 # DEVANAGARI LETTER SSA
|
||||
0xD7 0x0938 # DEVANAGARI LETTER SA
|
||||
0xD8 0x0939 # DEVANAGARI LETTER HA
|
||||
0xD9 0x200E # LEFT-TO-RIGHT MARK # invisible consonant
|
||||
0xDA 0x093E # DEVANAGARI VOWEL SIGN AA
|
||||
0xDB 0x093F # DEVANAGARI VOWEL SIGN I
|
||||
0xDC 0x0940 # DEVANAGARI VOWEL SIGN II
|
||||
0xDD 0x0941 # DEVANAGARI VOWEL SIGN U
|
||||
0xDE 0x0942 # DEVANAGARI VOWEL SIGN UU
|
||||
0xDF 0x0943 # DEVANAGARI VOWEL SIGN VOCALIC R
|
||||
0xE0 0x0946 # DEVANAGARI VOWEL SIGN SHORT E
|
||||
0xE1 0x0947 # DEVANAGARI VOWEL SIGN E
|
||||
0xE2 0x0948 # DEVANAGARI VOWEL SIGN AI
|
||||
0xE3 0x0945 # DEVANAGARI VOWEL SIGN CANDRA E
|
||||
0xE4 0x094A # DEVANAGARI VOWEL SIGN SHORT O
|
||||
0xE5 0x094B # DEVANAGARI VOWEL SIGN O
|
||||
0xE6 0x094C # DEVANAGARI VOWEL SIGN AU
|
||||
0xE7 0x0949 # DEVANAGARI VOWEL SIGN CANDRA O
|
||||
0xE8 0x094D # DEVANAGARI SIGN VIRAMA # halant
|
||||
0xE9 0x093C # DEVANAGARI SIGN NUKTA
|
||||
0xEA 0x0964 # DEVANAGARI DANDA
|
||||
#
|
||||
0xF1 0x0966 # DEVANAGARI DIGIT ZERO
|
||||
0xF2 0x0967 # DEVANAGARI DIGIT ONE
|
||||
0xF3 0x0968 # DEVANAGARI DIGIT TWO
|
||||
0xF4 0x0969 # DEVANAGARI DIGIT THREE
|
||||
0xF5 0x096A # DEVANAGARI DIGIT FOUR
|
||||
0xF6 0x096B # DEVANAGARI DIGIT FIVE
|
||||
0xF7 0x096C # DEVANAGARI DIGIT SIX
|
||||
0xF8 0x096D # DEVANAGARI DIGIT SEVEN
|
||||
0xF9 0x096E # DEVANAGARI DIGIT EIGHT
|
||||
0xFA 0x096F # DEVANAGARI DIGIT NINE
|
329
charmap/DINGBATS.TXT
Normal file
|
@ -0,0 +1,329 @@
|
|||
#=======================================================================
|
||||
# File name: DINGBATS.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Dingbats
|
||||
# character set to Unicode 3.2 and later.
|
||||
#
|
||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update mappings for 0x80-0x8D to use new
|
||||
# Unicode 3.2 characters. Update URLs, notes.
|
||||
# Matches internal utom<b2>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n05 1998-Feb-05 Update to match internal utom<n4>, ufrm<n14>,
|
||||
# and Text Encoding Converter version 1.3:
|
||||
# Change all mappings to single corporate-zone
|
||||
# Unicodes to either use standard Unicodes
|
||||
# or standard Unicodes plus transcoding hints;
|
||||
# see details below. Also update header
|
||||
# comments to new format.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n4>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Dingbats code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
# In some cases an additional comment follows the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Dingbats code order.
|
||||
#
|
||||
# Some of these mappings require the use of corporate characters.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Dingbats character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Dingbats:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
# interfaces for QuickDraw Text, the Script Manager, and related
|
||||
# Text Utilities. For other purposes it is supported via transcoding
|
||||
# to and from Unicode.
|
||||
#
|
||||
# The Mac OS Dingbats encoding shares the script code smRoman
|
||||
# (0) with the standard Mac OS Roman encoding. To determine if
|
||||
# the Dingbats encoding is being used, you must check if the
|
||||
# font name is "Zapf Dingbats".
|
||||
#
|
||||
# The layout of the Dingbats character set is identical to or
|
||||
# a superset of the layout of the Adobe Zapf Dingbats encoding
|
||||
# vector.
|
||||
#
|
||||
# The following code points are unused, and are not shown here:
|
||||
# 0x8E-0xA0, 0xF0, 0xFF.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - The mappings for the following Mac OS Dingbats characters
|
||||
# were changed to use standard Unicode characters added for
|
||||
# Unicode 3.2: 0x80-0x8D.
|
||||
#
|
||||
# Changes from version n03 to version n05:
|
||||
#
|
||||
# - The mappings for the following Mac OS Dingbats characters
|
||||
# were changed from single corporate-zone Unicode characters
|
||||
# to standard Unicode characters:
|
||||
# 0x80-0x81, 0x84-0x87, 0x8A-0x8D.
|
||||
#
|
||||
# - The mappings for the following Mac OS Dingbats characters
|
||||
# were changed from single corporate-zone Unicode characters
|
||||
# to combinations of a standard Unicode and a transcoding hint:
|
||||
# 0x82-0x83, 0x88-0x89.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x2701 # UPPER BLADE SCISSORS
|
||||
0x22 0x2702 # BLACK SCISSORS
|
||||
0x23 0x2703 # LOWER BLADE SCISSORS
|
||||
0x24 0x2704 # WHITE SCISSORS
|
||||
0x25 0x260E # BLACK TELEPHONE
|
||||
0x26 0x2706 # TELEPHONE LOCATION SIGN
|
||||
0x27 0x2707 # TAPE DRIVE
|
||||
0x28 0x2708 # AIRPLANE
|
||||
0x29 0x2709 # ENVELOPE
|
||||
0x2A 0x261B # BLACK RIGHT POINTING INDEX
|
||||
0x2B 0x261E # WHITE RIGHT POINTING INDEX
|
||||
0x2C 0x270C # VICTORY HAND
|
||||
0x2D 0x270D # WRITING HAND
|
||||
0x2E 0x270E # LOWER RIGHT PENCIL
|
||||
0x2F 0x270F # PENCIL
|
||||
0x30 0x2710 # UPPER RIGHT PENCIL
|
||||
0x31 0x2711 # WHITE NIB
|
||||
0x32 0x2712 # BLACK NIB
|
||||
0x33 0x2713 # CHECK MARK
|
||||
0x34 0x2714 # HEAVY CHECK MARK
|
||||
0x35 0x2715 # MULTIPLICATION X
|
||||
0x36 0x2716 # HEAVY MULTIPLICATION X
|
||||
0x37 0x2717 # BALLOT X
|
||||
0x38 0x2718 # HEAVY BALLOT X
|
||||
0x39 0x2719 # OUTLINED GREEK CROSS
|
||||
0x3A 0x271A # HEAVY GREEK CROSS
|
||||
0x3B 0x271B # OPEN CENTRE CROSS
|
||||
0x3C 0x271C # HEAVY OPEN CENTRE CROSS
|
||||
0x3D 0x271D # LATIN CROSS
|
||||
0x3E 0x271E # SHADOWED WHITE LATIN CROSS
|
||||
0x3F 0x271F # OUTLINED LATIN CROSS
|
||||
0x40 0x2720 # MALTESE CROSS
|
||||
0x41 0x2721 # STAR OF DAVID
|
||||
0x42 0x2722 # FOUR TEARDROP-SPOKED ASTERISK
|
||||
0x43 0x2723 # FOUR BALLOON-SPOKED ASTERISK
|
||||
0x44 0x2724 # HEAVY FOUR BALLOON-SPOKED ASTERISK
|
||||
0x45 0x2725 # FOUR CLUB-SPOKED ASTERISK
|
||||
0x46 0x2726 # BLACK FOUR POINTED STAR
|
||||
0x47 0x2727 # WHITE FOUR POINTED STAR
|
||||
0x48 0x2605 # BLACK STAR
|
||||
0x49 0x2729 # STRESS OUTLINED WHITE STAR
|
||||
0x4A 0x272A # CIRCLED WHITE STAR
|
||||
0x4B 0x272B # OPEN CENTRE BLACK STAR
|
||||
0x4C 0x272C # BLACK CENTRE WHITE STAR
|
||||
0x4D 0x272D # OUTLINED BLACK STAR
|
||||
0x4E 0x272E # HEAVY OUTLINED BLACK STAR
|
||||
0x4F 0x272F # PINWHEEL STAR
|
||||
0x50 0x2730 # SHADOWED WHITE STAR
|
||||
0x51 0x2731 # HEAVY ASTERISK
|
||||
0x52 0x2732 # OPEN CENTRE ASTERISK
|
||||
0x53 0x2733 # EIGHT SPOKED ASTERISK
|
||||
0x54 0x2734 # EIGHT POINTED BLACK STAR
|
||||
0x55 0x2735 # EIGHT POINTED PINWHEEL STAR
|
||||
0x56 0x2736 # SIX POINTED BLACK STAR
|
||||
0x57 0x2737 # EIGHT POINTED RECTILINEAR BLACK STAR
|
||||
0x58 0x2738 # HEAVY EIGHT POINTED RECTILINEAR BLACK STAR
|
||||
0x59 0x2739 # TWELVE POINTED BLACK STAR
|
||||
0x5A 0x273A # SIXTEEN POINTED ASTERISK
|
||||
0x5B 0x273B # TEARDROP-SPOKED ASTERISK
|
||||
0x5C 0x273C # OPEN CENTRE TEARDROP-SPOKED ASTERISK
|
||||
0x5D 0x273D # HEAVY TEARDROP-SPOKED ASTERISK
|
||||
0x5E 0x273E # SIX PETALLED BLACK AND WHITE FLORETTE
|
||||
0x5F 0x273F # BLACK FLORETTE
|
||||
0x60 0x2740 # WHITE FLORETTE
|
||||
0x61 0x2741 # EIGHT PETALLED OUTLINED BLACK FLORETTE
|
||||
0x62 0x2742 # CIRCLED OPEN CENTRE EIGHT POINTED STAR
|
||||
0x63 0x2743 # HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK
|
||||
0x64 0x2744 # SNOWFLAKE
|
||||
0x65 0x2745 # TIGHT TRIFOLIATE SNOWFLAKE
|
||||
0x66 0x2746 # HEAVY CHEVRON SNOWFLAKE
|
||||
0x67 0x2747 # SPARKLE
|
||||
0x68 0x2748 # HEAVY SPARKLE
|
||||
0x69 0x2749 # BALLOON-SPOKED ASTERISK
|
||||
0x6A 0x274A # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
|
||||
0x6B 0x274B # HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
|
||||
0x6C 0x25CF # BLACK CIRCLE
|
||||
0x6D 0x274D # SHADOWED WHITE CIRCLE
|
||||
0x6E 0x25A0 # BLACK SQUARE
|
||||
0x6F 0x274F # LOWER RIGHT DROP-SHADOWED WHITE SQUARE
|
||||
0x70 0x2750 # UPPER RIGHT DROP-SHADOWED WHITE SQUARE
|
||||
0x71 0x2751 # LOWER RIGHT SHADOWED WHITE SQUARE
|
||||
0x72 0x2752 # UPPER RIGHT SHADOWED WHITE SQUARE
|
||||
0x73 0x25B2 # BLACK UP-POINTING TRIANGLE
|
||||
0x74 0x25BC # BLACK DOWN-POINTING TRIANGLE
|
||||
0x75 0x25C6 # BLACK DIAMOND
|
||||
0x76 0x2756 # BLACK DIAMOND MINUS WHITE X
|
||||
0x77 0x25D7 # RIGHT HALF BLACK CIRCLE
|
||||
0x78 0x2758 # LIGHT VERTICAL BAR
|
||||
0x79 0x2759 # MEDIUM VERTICAL BAR
|
||||
0x7A 0x275A # HEAVY VERTICAL BAR
|
||||
0x7B 0x275B # HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
|
||||
0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
|
||||
0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
|
||||
0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
|
||||
#
|
||||
0x80 0x2768 # MEDIUM LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
|
||||
0x81 0x2769 # MEDIUM RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
|
||||
0x82 0x276A # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
|
||||
0x83 0x276B # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
|
||||
0x84 0x276C # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x85 0x276D # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x86 0x276E # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later
|
||||
0x87 0x276F # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later
|
||||
0x88 0x2770 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x89 0x2771 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x8A 0x2772 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x8B 0x2773 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x8C 0x2774 # MEDIUM LEFT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
0x8D 0x2775 # MEDIUM RIGHT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later
|
||||
#
|
||||
0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT
|
||||
0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT
|
||||
0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT
|
||||
0xA4 0x2764 # HEAVY BLACK HEART
|
||||
0xA5 0x2765 # ROTATED HEAVY BLACK HEART BULLET
|
||||
0xA6 0x2766 # FLORAL HEART
|
||||
0xA7 0x2767 # ROTATED FLORAL HEART BULLET
|
||||
0xA8 0x2663 # BLACK CLUB SUIT
|
||||
0xA9 0x2666 # BLACK DIAMOND SUIT
|
||||
0xAA 0x2665 # BLACK HEART SUIT
|
||||
0xAB 0x2660 # BLACK SPADE SUIT
|
||||
0xAC 0x2460 # CIRCLED DIGIT ONE
|
||||
0xAD 0x2461 # CIRCLED DIGIT TWO
|
||||
0xAE 0x2462 # CIRCLED DIGIT THREE
|
||||
0xAF 0x2463 # CIRCLED DIGIT FOUR
|
||||
0xB0 0x2464 # CIRCLED DIGIT FIVE
|
||||
0xB1 0x2465 # CIRCLED DIGIT SIX
|
||||
0xB2 0x2466 # CIRCLED DIGIT SEVEN
|
||||
0xB3 0x2467 # CIRCLED DIGIT EIGHT
|
||||
0xB4 0x2468 # CIRCLED DIGIT NINE
|
||||
0xB5 0x2469 # CIRCLED NUMBER TEN
|
||||
0xB6 0x2776 # DINGBAT NEGATIVE CIRCLED DIGIT ONE
|
||||
0xB7 0x2777 # DINGBAT NEGATIVE CIRCLED DIGIT TWO
|
||||
0xB8 0x2778 # DINGBAT NEGATIVE CIRCLED DIGIT THREE
|
||||
0xB9 0x2779 # DINGBAT NEGATIVE CIRCLED DIGIT FOUR
|
||||
0xBA 0x277A # DINGBAT NEGATIVE CIRCLED DIGIT FIVE
|
||||
0xBB 0x277B # DINGBAT NEGATIVE CIRCLED DIGIT SIX
|
||||
0xBC 0x277C # DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
|
||||
0xBD 0x277D # DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
|
||||
0xBE 0x277E # DINGBAT NEGATIVE CIRCLED DIGIT NINE
|
||||
0xBF 0x277F # DINGBAT NEGATIVE CIRCLED NUMBER TEN
|
||||
0xC0 0x2780 # DINGBAT CIRCLED SANS-SERIF DIGIT ONE
|
||||
0xC1 0x2781 # DINGBAT CIRCLED SANS-SERIF DIGIT TWO
|
||||
0xC2 0x2782 # DINGBAT CIRCLED SANS-SERIF DIGIT THREE
|
||||
0xC3 0x2783 # DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
|
||||
0xC4 0x2784 # DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
|
||||
0xC5 0x2785 # DINGBAT CIRCLED SANS-SERIF DIGIT SIX
|
||||
0xC6 0x2786 # DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
|
||||
0xC7 0x2787 # DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
|
||||
0xC8 0x2788 # DINGBAT CIRCLED SANS-SERIF DIGIT NINE
|
||||
0xC9 0x2789 # DINGBAT CIRCLED SANS-SERIF NUMBER TEN
|
||||
0xCA 0x278A # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
|
||||
0xCB 0x278B # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
|
||||
0xCC 0x278C # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
|
||||
0xCD 0x278D # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
|
||||
0xCE 0x278E # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
|
||||
0xCF 0x278F # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
|
||||
0xD0 0x2790 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
|
||||
0xD1 0x2791 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
|
||||
0xD2 0x2792 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
|
||||
0xD3 0x2793 # DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
|
||||
0xD4 0x2794 # HEAVY WIDE-HEADED RIGHTWARDS ARROW
|
||||
0xD5 0x2192 # RIGHTWARDS ARROW
|
||||
0xD6 0x2194 # LEFT RIGHT ARROW
|
||||
0xD7 0x2195 # UP DOWN ARROW
|
||||
0xD8 0x2798 # HEAVY SOUTH EAST ARROW
|
||||
0xD9 0x2799 # HEAVY RIGHTWARDS ARROW
|
||||
0xDA 0x279A # HEAVY NORTH EAST ARROW
|
||||
0xDB 0x279B # DRAFTING POINT RIGHTWARDS ARROW
|
||||
0xDC 0x279C # HEAVY ROUND-TIPPED RIGHTWARDS ARROW
|
||||
0xDD 0x279D # TRIANGLE-HEADED RIGHTWARDS ARROW
|
||||
0xDE 0x279E # HEAVY TRIANGLE-HEADED RIGHTWARDS ARROW
|
||||
0xDF 0x279F # DASHED TRIANGLE-HEADED RIGHTWARDS ARROW
|
||||
0xE0 0x27A0 # HEAVY DASHED TRIANGLE-HEADED RIGHTWARDS ARROW
|
||||
0xE1 0x27A1 # BLACK RIGHTWARDS ARROW
|
||||
0xE2 0x27A2 # THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD
|
||||
0xE3 0x27A3 # THREE-D BOTTOM-LIGHTED RIGHTWARDS ARROWHEAD
|
||||
0xE4 0x27A4 # BLACK RIGHTWARDS ARROWHEAD
|
||||
0xE5 0x27A5 # HEAVY BLACK CURVED DOWNWARDS AND RIGHTWARDS ARROW
|
||||
0xE6 0x27A6 # HEAVY BLACK CURVED UPWARDS AND RIGHTWARDS ARROW
|
||||
0xE7 0x27A7 # SQUAT BLACK RIGHTWARDS ARROW
|
||||
0xE8 0x27A8 # HEAVY CONCAVE-POINTED BLACK RIGHTWARDS ARROW
|
||||
0xE9 0x27A9 # RIGHT-SHADED WHITE RIGHTWARDS ARROW
|
||||
0xEA 0x27AA # LEFT-SHADED WHITE RIGHTWARDS ARROW
|
||||
0xEB 0x27AB # BACK-TILTED SHADOWED WHITE RIGHTWARDS ARROW
|
||||
0xEC 0x27AC # FRONT-TILTED SHADOWED WHITE RIGHTWARDS ARROW
|
||||
0xED 0x27AD # HEAVY LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
0xEE 0x27AE # HEAVY UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
0xEF 0x27AF # NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
#
|
||||
0xF1 0x27B1 # NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
|
||||
0xF2 0x27B2 # CIRCLED HEAVY WHITE RIGHTWARDS ARROW
|
||||
0xF3 0x27B3 # WHITE-FEATHERED RIGHTWARDS ARROW
|
||||
0xF4 0x27B4 # BLACK-FEATHERED SOUTH EAST ARROW
|
||||
0xF5 0x27B5 # BLACK-FEATHERED RIGHTWARDS ARROW
|
||||
0xF6 0x27B6 # BLACK-FEATHERED NORTH EAST ARROW
|
||||
0xF7 0x27B7 # HEAVY BLACK-FEATHERED SOUTH EAST ARROW
|
||||
0xF8 0x27B8 # HEAVY BLACK-FEATHERED RIGHTWARDS ARROW
|
||||
0xF9 0x27B9 # HEAVY BLACK-FEATHERED NORTH EAST ARROW
|
||||
0xFA 0x27BA # TEARDROP-BARBED RIGHTWARDS ARROW
|
||||
0xFB 0x27BB # HEAVY TEARDROP-SHANKED RIGHTWARDS ARROW
|
||||
0xFC 0x27BC # WEDGE-TAILED RIGHTWARDS ARROW
|
||||
0xFD 0x27BD # HEAVY WEDGE-TAILED RIGHTWARDS ARROW
|
||||
0xFE 0x27BE # OPEN-OUTLINED RIGHTWARDS ARROW
|
521
charmap/FARSI.TXT
Normal file
|
@ -0,0 +1,521 @@
|
|||
#=======================================================================
|
||||
# File name: FARSI.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Farsi
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Add comments about character display and
|
||||
# direction overrides. Update URLs, notes.
|
||||
# Matches internal utom<b3>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n04 1998-Feb-05 Show required Unicode character
|
||||
# directionality in a different way. Matches
|
||||
# internal utom<n3>, ufrm<n9>, and Text
|
||||
# Encoding Converter version 1.3. Update
|
||||
# header comments; include information on
|
||||
# loose mapping of digits, and changes to
|
||||
# mapping for the TrueType variant.
|
||||
# n01 1997-Jul-17 First version. Matches internal utom<n1>,
|
||||
# ufrm<n2>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Farsi code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN),
|
||||
# possibly preceded by a tag indicating required directionality
|
||||
# (i.e. <LR>+0xNNNN or <RL>+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Farsi code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Farsi character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Farsi:
|
||||
# ----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# 1. General
|
||||
#
|
||||
# The Mac OS Farsi character set is based on the Mac OS Arabic
|
||||
# character set. The main difference is in the right-to-left digits
|
||||
# 0xB0-0xB9: For Mac OS Arabic these correspond to right-left
|
||||
# versions of the Unicode ARABIC-INDIC DIGITs 0660-0669; for
|
||||
# Mac OS Farsi these correspond to right-left versions of the
|
||||
# Unicode EXTENDED ARABIC-INDIC DIGITs 06F0-06F9. The other
|
||||
# difference is in the nature of the font variants.
|
||||
#
|
||||
# For more information, see the comments in the mapping table for
|
||||
# Mac OS Arabic.
|
||||
#
|
||||
# Mac OS Farsi characters 0xEB-0xF2 are non-spacing/combining marks.
|
||||
#
|
||||
# 2. Directional characters and roundtrip fidelity
|
||||
#
|
||||
# The Mac OS Arabic character set (on which Mac OS Farsi is based)
|
||||
# was developed in 1986-1987. At that time the bidirectional line
|
||||
# layout algorithm used in the Mac OS Arabic system was fairly simple;
|
||||
# it used only a few direction classes (instead of the 19 now used in
|
||||
# the Unicode bidirectional algorithm). In order to permit users to
|
||||
# handle some tricky layout problems, certain punctuation and symbol
|
||||
# characters were encoded twice, one with a left-right direction
|
||||
# attribute and the other with a right-left direction attribute. This
|
||||
# is the case in Mac OS Farsi too.
|
||||
#
|
||||
# For example, plus sign is encoded at 0x2B with a left-right
|
||||
# attribute, and at 0xAB with a right-left attribute. However, there
|
||||
# is only one PLUS SIGN character in Unicode. This leads to some
|
||||
# interesting problems when mapping between Mac OS Farsi and Unicode;
|
||||
# see below.
|
||||
#
|
||||
# A related problem is that even when a particular character is
|
||||
# encoded only once in Mac OS Farsi, it may have a different
|
||||
# direction attribute than the corresponding Unicode character.
|
||||
#
|
||||
# For example, the Mac OS Farsi character at 0x93 is HORIZONTAL
|
||||
# ELLIPSIS with strong right-left direction. However, the Unicode
|
||||
# character HORIZONTAL ELLIPSIS has direction class neutral.
|
||||
#
|
||||
# 3. Behavior of ASCII-range numbers in WorldScript
|
||||
#
|
||||
# Mac OS Farsi also has two sets of digit codes.
|
||||
|
||||
# The digits at 0x30-0x39 may be displayed using either European
|
||||
# digit forms or Persian digit forms, depending on context. If there
|
||||
# is a "strong European" character such as a Latin letter on either
|
||||
# side of a sequence consisting of digits 0x30-0x39 and possibly comma
|
||||
# 0x2C or period 0x2E, then the characters will be displayed using
|
||||
# European forms (This will happen even if there are neutral characters
|
||||
# between the digits and the strong European character). Otherwise, the
|
||||
# digits will be displayed using Persian forms, the comma will be
|
||||
# displayed as Arabic thousands separator, and the period as Arabic
|
||||
# decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always
|
||||
# left-right.
|
||||
#
|
||||
# The digits at 0xB0-0xB9 are always displayed using Persian digit
|
||||
# shapes, and moreover, these digits always have strong right-left
|
||||
# directionality. These are mainly intended for special layout
|
||||
# purposes such as part numbers, etc.
|
||||
#
|
||||
# 4. Font variants
|
||||
#
|
||||
# The table in this file gives the Unicode mappings for the standard
|
||||
# Mac OS Farsi encoding. This encoding is supported by the Tehran font
|
||||
# (the system font for Farsi), and is the encoding supported by the
|
||||
# text processing utilities. However, the other Farsi fonts actually
|
||||
# implement a somewhat different encoding; this affects nine code
|
||||
# points including 0xAA and 0xC0 (which are also affected by font
|
||||
# variants in Mac OS Arabic). For these nine code points the standard
|
||||
# Mac OS Farsi encoding has the following mappings:
|
||||
# 0x8B -> 0x06BA ARABIC LETTER NOON GHUNNA (Urdu)
|
||||
# 0xA4 -> <RL>+0x0024 DOLLAR SIGN, right-left
|
||||
# 0xAA -> <RL>+0x002A ASTERISK, right-left
|
||||
# 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK,
|
||||
# right-left
|
||||
# 0xF4 -> 0x0679 ARABIC LETTER TTEH (Urdu)
|
||||
# 0xF7 -> 0x06A4 ARABIC LETTER VEH (for transliteration)
|
||||
# 0xF9 -> 0x0688 ARABIC LETTER DDAL (Urdu)
|
||||
# 0xFA -> 0x0691 ARABIC LETTER RREH (Urdu)
|
||||
# 0xFF -> 0x06D2 ARABIC LETTER YEH BARREE (Urdu)
|
||||
#
|
||||
# The TrueType variant is used for the Farsi TrueType fonts: Ashfahan,
|
||||
# Amir, Kamran, Mashad, NadeemFarsi. It differs from the standard
|
||||
# variant in the following ways:
|
||||
# 0x8B -> 0xF882 Arabic ligature "peace on him" (corporate char.)
|
||||
# 0xA4 -> 0xFDFC RIAL SIGN (added in Unicode 3.2)
|
||||
# 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
|
||||
# 0xC0 -> <RL>+0x002A ASTERISK, right-left
|
||||
# 0xF4 -> <RL>+0x00B0 DEGREE SIGN, right-left
|
||||
# 0xF7 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM
|
||||
# 0xF9 -> <RL>+0x25CF BLACK CIRCLE, right-left
|
||||
# 0xFA -> <RL>+0x25A0 BLACK SQUARE, right-left
|
||||
# 0xFF -> <RL>+0x25B2 BLACK UP-POINTING TRIANGLE, right-left
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Matching the direction of Mac OS Farsi characters
|
||||
#
|
||||
# When Mac OS Farsi encodes a character twice but with different
|
||||
# direction attributes for the two code points - as in the case of
|
||||
# plus sign mentioned above - we need a way to map both Mac OS Farsi
|
||||
# code points to Unicode and back again without loss of information.
|
||||
# With the plus sign, for example, mapping one of the Mac OS Farsi
|
||||
# characters to a code in the Unicode corporate use zone is
|
||||
# undesirable, since both of the plus sign characters are likely to
|
||||
# be used in text that is interchanged.
|
||||
#
|
||||
# The problem is solved with the use of direction override characters
|
||||
# and direction-dependent mappings. When mapping from Mac OS Farsi
|
||||
# to Unicode, we use direction overrides as necessary to force the
|
||||
# direction of the resulting Unicode characters.
|
||||
#
|
||||
# The required direction is indicated by a direction tag in the
|
||||
# mappings. A tag of <LR> means the corresponding Unicode character
|
||||
# must have a strong left-right context, and a tag of <RL> indicates
|
||||
# a right-left context.
|
||||
#
|
||||
# For example, the mapping of 0x2B is given as <LR>+0x002B; the
|
||||
# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated
|
||||
# instance of 0x2B to Unicode, it should be mapped as follows (LRO
|
||||
# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION
|
||||
# FORMATTING):
|
||||
#
|
||||
# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
|
||||
#
|
||||
# When mapping several characters in a row that require direction
|
||||
# forcing, the overrides need only be used at the beginning and end.
|
||||
# For example:
|
||||
#
|
||||
# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C
|
||||
#
|
||||
# If neutral characters that require direction forcing are already
|
||||
# between strong-direction characters with matching directionality,
|
||||
# then direction overrides need not be used. Direction overrides are
|
||||
# always needed to map the right-left digits at 0xB0-0xB9.
|
||||
#
|
||||
# When mapping from Unicode to Mac OS Farsi, the Unicode
|
||||
# bidirectional algorithm should be used to determine resolved
|
||||
# direction of the Unicode characters. The mapping from Unicode to
|
||||
# Mac OS Farsi can then be disambiguated by the use of the resolved
|
||||
# direction:
|
||||
#
|
||||
# Unicode 0x002B -> Mac OS Farsi 0x2B (if L) or 0xAB (if R)
|
||||
#
|
||||
# However, this also means the direction override characters should
|
||||
# be discarded when mapping from Unicode to Mac OS Farsi (after
|
||||
# they have been used to determine resolved direction), since the
|
||||
# direction override information is carried by the code point itself.
|
||||
#
|
||||
# Even when direction overrides are not needed for roundtrip
|
||||
# fidelity, they are sometimes used when mapping Mac OS Farsi
|
||||
# characters to Unicode in order to achieve similar text layout with
|
||||
# the resulting Unicode text. For example, the single Mac OS Farsi
|
||||
# ellipsis character has direction class right-left,and there is no
|
||||
# left-right version. However, the Unicode HORIZONTAL ELLIPSIS
|
||||
# character has direction class neutral (which means it may end up
|
||||
# with a resolved direction of left-right if surrounded by left-right
|
||||
# characters). When mapping the Mac OS Farsi ellipsis to Unicode, it
|
||||
# is surrounded with a direction override to help preserve proper
|
||||
# text layout. The resolved direction is not needed or used when
|
||||
# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Farsi.
|
||||
#
|
||||
# 2. Mapping the Mac OS Farsi digits
|
||||
#
|
||||
# The main table below contains mappings that should be used when
|
||||
# strict round-trip fidelity is required. However, for numeric
|
||||
# values, the mappings in that table will produce Unicode characters
|
||||
# that may appear different than the Mac OS Farsi text displayed on
|
||||
# a Mac OS system using WorldScript. This is because WorldScript
|
||||
# uses context-dependent display for the 0x30-0x39 digits.
|
||||
#
|
||||
# If roundtrip fidelity is not required, then the following
|
||||
# alternate mappings should be used when a sequence of 0x30-0x39
|
||||
# digits - possibly including 0x2C and 0x2E - occurs in an Arabic
|
||||
# context (that is, when the first "strong" character on either side
|
||||
# of the digit sequence is Arabic, or there is no strong character):
|
||||
#
|
||||
# 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR
|
||||
# 0x2E 0x066B # ARABIC DECIMAL SEPARATOR
|
||||
# 0x30 0x06F0 # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
# 0x31 0x06F1 # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
# 0x32 0x06F2 # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
# 0x33 0x06F3 # EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
# 0x34 0x06F4 # EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
# 0x35 0x06F5 # EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
# 0x36 0x06F6 # EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
# 0x37 0x06F7 # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
# 0x38 0x06F8 # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
# 0x39 0x06F9 # EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
#
|
||||
# 3. Use of corporate-zone Unicodes (mapping the TrueType variant)
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF882 Arabic ligature "peace on him"
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Update mapping of 0xA4 in TrueType variant to use new Unicode
|
||||
# character U+FDFC RIAL SIGN addded for Unicode 3.2
|
||||
#
|
||||
# Changes from version n01 to version n04:
|
||||
#
|
||||
# - Change mapping of 0xA4 in TrueType variant (just described in
|
||||
# header comment) from single corporate character to use
|
||||
# grouping hint
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 <LR>+0x0020 # SPACE, left-right
|
||||
0x21 <LR>+0x0021 # EXCLAMATION MARK, left-right
|
||||
0x22 <LR>+0x0022 # QUOTATION MARK, left-right
|
||||
0x23 <LR>+0x0023 # NUMBER SIGN, left-right
|
||||
0x24 <LR>+0x0024 # DOLLAR SIGN, left-right
|
||||
0x25 <LR>+0x0025 # PERCENT SIGN, left-right
|
||||
0x26 <LR>+0x0026 # AMPERSAND, left-right
|
||||
0x27 <LR>+0x0027 # APOSTROPHE, left-right
|
||||
0x28 <LR>+0x0028 # LEFT PARENTHESIS, left-right
|
||||
0x29 <LR>+0x0029 # RIGHT PARENTHESIS, left-right
|
||||
0x2A <LR>+0x002A # ASTERISK, left-right
|
||||
0x2B <LR>+0x002B # PLUS SIGN, left-right
|
||||
0x2C <LR>+0x002C # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
|
||||
0x2D <LR>+0x002D # HYPHEN-MINUS, left-right
|
||||
0x2E <LR>+0x002E # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
|
||||
0x2F <LR>+0x002F # SOLIDUS, left-right
|
||||
0x30 0x0030 # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
0x3A <LR>+0x003A # COLON, left-right
|
||||
0x3B <LR>+0x003B # SEMICOLON, left-right
|
||||
0x3C <LR>+0x003C # LESS-THAN SIGN, left-right
|
||||
0x3D <LR>+0x003D # EQUALS SIGN, left-right
|
||||
0x3E <LR>+0x003E # GREATER-THAN SIGN, left-right
|
||||
0x3F <LR>+0x003F # QUESTION MARK, left-right
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B <LR>+0x005B # LEFT SQUARE BRACKET, left-right
|
||||
0x5C <LR>+0x005C # REVERSE SOLIDUS, left-right
|
||||
0x5D <LR>+0x005D # RIGHT SQUARE BRACKET, left-right
|
||||
0x5E <LR>+0x005E # CIRCUMFLEX ACCENT, left-right
|
||||
0x5F <LR>+0x005F # LOW LINE, left-right
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B <LR>+0x007B # LEFT CURLY BRACKET, left-right
|
||||
0x7C <LR>+0x007C # VERTICAL LINE, left-right
|
||||
0x7D <LR>+0x007D # RIGHT CURLY BRACKET, left-right
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 <RL>+0x00A0 # NO-BREAK SPACE, right-left
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x06BA # ARABIC LETTER NOON GHUNNA
|
||||
0x8C <RL>+0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 <RL>+0x2026 # HORIZONTAL ELLIPSIS, right-left
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 <RL>+0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B <RL>+0x00F7 # DIVISION SIGN, right-left
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 <RL>+0x0020 # SPACE, right-left
|
||||
0xA1 <RL>+0x0021 # EXCLAMATION MARK, right-left
|
||||
0xA2 <RL>+0x0022 # QUOTATION MARK, right-left
|
||||
0xA3 <RL>+0x0023 # NUMBER SIGN, right-left
|
||||
0xA4 <RL>+0x0024 # DOLLAR SIGN, right-left
|
||||
0xA5 0x066A # ARABIC PERCENT SIGN
|
||||
0xA6 <RL>+0x0026 # AMPERSAND, right-left
|
||||
0xA7 <RL>+0x0027 # APOSTROPHE, right-left
|
||||
0xA8 <RL>+0x0028 # LEFT PARENTHESIS, right-left
|
||||
0xA9 <RL>+0x0029 # RIGHT PARENTHESIS, right-left
|
||||
0xAA <RL>+0x002A # ASTERISK, right-left
|
||||
0xAB <RL>+0x002B # PLUS SIGN, right-left
|
||||
0xAC 0x060C # ARABIC COMMA
|
||||
0xAD <RL>+0x002D # HYPHEN-MINUS, right-left
|
||||
0xAE <RL>+0x002E # FULL STOP, right-left
|
||||
0xAF <RL>+0x002F # SOLIDUS, right-left
|
||||
0xB0 <RL>+0x06F0 # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override)
|
||||
0xB1 <RL>+0x06F1 # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override)
|
||||
0xB2 <RL>+0x06F2 # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override)
|
||||
0xB3 <RL>+0x06F3 # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override)
|
||||
0xB4 <RL>+0x06F4 # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override)
|
||||
0xB5 <RL>+0x06F5 # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override)
|
||||
0xB6 <RL>+0x06F6 # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override)
|
||||
0xB7 <RL>+0x06F7 # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override)
|
||||
0xB8 <RL>+0x06F8 # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override)
|
||||
0xB9 <RL>+0x06F9 # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override)
|
||||
0xBA <RL>+0x003A # COLON, right-left
|
||||
0xBB 0x061B # ARABIC SEMICOLON
|
||||
0xBC <RL>+0x003C # LESS-THAN SIGN, right-left
|
||||
0xBD <RL>+0x003D # EQUALS SIGN, right-left
|
||||
0xBE <RL>+0x003E # GREATER-THAN SIGN, right-left
|
||||
0xBF 0x061F # ARABIC QUESTION MARK
|
||||
0xC0 <RL>+0x274A # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
|
||||
0xC1 0x0621 # ARABIC LETTER HAMZA
|
||||
0xC2 0x0622 # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0xC3 0x0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0xC4 0x0624 # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0xC5 0x0625 # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0xC6 0x0626 # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0xC7 0x0627 # ARABIC LETTER ALEF
|
||||
0xC8 0x0628 # ARABIC LETTER BEH
|
||||
0xC9 0x0629 # ARABIC LETTER TEH MARBUTA
|
||||
0xCA 0x062A # ARABIC LETTER TEH
|
||||
0xCB 0x062B # ARABIC LETTER THEH
|
||||
0xCC 0x062C # ARABIC LETTER JEEM
|
||||
0xCD 0x062D # ARABIC LETTER HAH
|
||||
0xCE 0x062E # ARABIC LETTER KHAH
|
||||
0xCF 0x062F # ARABIC LETTER DAL
|
||||
0xD0 0x0630 # ARABIC LETTER THAL
|
||||
0xD1 0x0631 # ARABIC LETTER REH
|
||||
0xD2 0x0632 # ARABIC LETTER ZAIN
|
||||
0xD3 0x0633 # ARABIC LETTER SEEN
|
||||
0xD4 0x0634 # ARABIC LETTER SHEEN
|
||||
0xD5 0x0635 # ARABIC LETTER SAD
|
||||
0xD6 0x0636 # ARABIC LETTER DAD
|
||||
0xD7 0x0637 # ARABIC LETTER TAH
|
||||
0xD8 0x0638 # ARABIC LETTER ZAH
|
||||
0xD9 0x0639 # ARABIC LETTER AIN
|
||||
0xDA 0x063A # ARABIC LETTER GHAIN
|
||||
0xDB <RL>+0x005B # LEFT SQUARE BRACKET, right-left
|
||||
0xDC <RL>+0x005C # REVERSE SOLIDUS, right-left
|
||||
0xDD <RL>+0x005D # RIGHT SQUARE BRACKET, right-left
|
||||
0xDE <RL>+0x005E # CIRCUMFLEX ACCENT, right-left
|
||||
0xDF <RL>+0x005F # LOW LINE, right-left
|
||||
0xE0 0x0640 # ARABIC TATWEEL
|
||||
0xE1 0x0641 # ARABIC LETTER FEH
|
||||
0xE2 0x0642 # ARABIC LETTER QAF
|
||||
0xE3 0x0643 # ARABIC LETTER KAF
|
||||
0xE4 0x0644 # ARABIC LETTER LAM
|
||||
0xE5 0x0645 # ARABIC LETTER MEEM
|
||||
0xE6 0x0646 # ARABIC LETTER NOON
|
||||
0xE7 0x0647 # ARABIC LETTER HEH
|
||||
0xE8 0x0648 # ARABIC LETTER WAW
|
||||
0xE9 0x0649 # ARABIC LETTER ALEF MAKSURA
|
||||
0xEA 0x064A # ARABIC LETTER YEH
|
||||
0xEB 0x064B # ARABIC FATHATAN
|
||||
0xEC 0x064C # ARABIC DAMMATAN
|
||||
0xED 0x064D # ARABIC KASRATAN
|
||||
0xEE 0x064E # ARABIC FATHA
|
||||
0xEF 0x064F # ARABIC DAMMA
|
||||
0xF0 0x0650 # ARABIC KASRA
|
||||
0xF1 0x0651 # ARABIC SHADDA
|
||||
0xF2 0x0652 # ARABIC SUKUN
|
||||
0xF3 0x067E # ARABIC LETTER PEH
|
||||
0xF4 0x0679 # ARABIC LETTER TTEH
|
||||
0xF5 0x0686 # ARABIC LETTER TCHEH
|
||||
0xF6 0x06D5 # ARABIC LETTER AE
|
||||
0xF7 0x06A4 # ARABIC LETTER VEH
|
||||
0xF8 0x06AF # ARABIC LETTER GAF
|
||||
0xF9 0x0688 # ARABIC LETTER DDAL
|
||||
0xFA 0x0691 # ARABIC LETTER RREH
|
||||
0xFB <RL>+0x007B # LEFT CURLY BRACKET, right-left
|
||||
0xFC <RL>+0x007C # VERTICAL LINE, right-left
|
||||
0xFD <RL>+0x007D # RIGHT CURLY BRACKET, right-left
|
||||
0xFE 0x0698 # ARABIC LETTER JEH
|
||||
0xFF 0x06D2 # ARABIC LETTER YEH BARREE
|
337
charmap/GAELIC.TXT
Normal file
|
@ -0,0 +1,337 @@
|
|||
#=======================================================================
|
||||
# File name: GAELIC.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Celtic
|
||||
# character set to Unicode 3.0 and later
|
||||
#
|
||||
# Contacts: charsets@apple.com, everson@evertype.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c01 2005-Apr-01 First posted version. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Gaelic code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Gaelic code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Gaelic character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Gaelic (partly from Michael Everson):
|
||||
# -----------------------------------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# This character set was developed by Michael Everson of Everson
|
||||
# Typography (everson@evertype.com) and was used for fonts in his
|
||||
# Celtic Utilities and CeltScript font packages for the Mac, as well
|
||||
# as some fonts included with the Irish localizations of Mac OS 6.0.8
|
||||
# and 7.1. Note that while Apple authorized this Irish localization,
|
||||
# it was not a system which shipped with Apple hardware, and was not
|
||||
# otherwise supported by Apple. Fonts conforming to the Mac OS Gaelic
|
||||
# character set are available from Everson Typography
|
||||
# (http://www.evertype.com/celtscript/). Information about the use of
|
||||
# this character set is available at
|
||||
# http://www.evertype.com/celtscript/celtcode.html.
|
||||
#
|
||||
# The Mac OS Gaelic encoding shares the script code smRoman (0) with
|
||||
# the standard Mac OS Roman encoding. To determine if the Gaelic
|
||||
# encoding is being used in Mac OS 7-9, you should also check if the
|
||||
# system region code is 81. Otherwise, you can check for particular
|
||||
# fonts that conform to this encoding (since in practice Gaelic fonts
|
||||
# are used with the ordinary US or UK system versions).
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman, adding
|
||||
# capital and small y with acute, grave, and circumflex; capital and
|
||||
# small w with acute, grave, circumflex and diaeresis; capital and
|
||||
# small b, c, d, f, g, m, p, s, t with dot above; tironian et; small
|
||||
# long r, small long s, and small long s with dot above. It has 36
|
||||
# code point differences from standard Mac OS Roman.
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
|
||||
# a "currency sign" variant of the Latin 8 Extended encoding that still
|
||||
# maps 0xDB to U+00A4; this can be used for older fonts.
|
||||
# Note: U+20AC is new with Unicode 2.1; for earlier Unicode
|
||||
# versions, Latin 8 Extended 0xDB may be mapped to private-use
|
||||
# character U+F8A0.
|
||||
#
|
||||
# Before Unicode 3.0, code point 0xE4 was PER MILLE SIGN, and was
|
||||
# mapped to U+2030. Since August 1998, code point 0xE4 is changed
|
||||
# to TIRONIAN SIGN ET and maps to U+204A. There is a "per mille
|
||||
# sign" variant of the Mac OS Gaelic encoding that still
|
||||
# maps 0xE4 to U+2030; this can be used for older fonts.
|
||||
# Note: U+204A is new with Unicode 3.0; for earlier Unicode
|
||||
# versions, Mac OS Gaelic was unified with AMPERSAND.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x1E02 # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x1E03 # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||
0xB5 0x010A # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0xB6 0x010B # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0xB7 0x1E0A # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
0xB8 0x1E0B # LATIN SMALL LETTER D WITH DOT ABOVE
|
||||
0xB9 0x1E1E # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
0xBA 0x1E1F # LATIN SMALL LETTER F WITH DOT ABOVE
|
||||
0xBB 0x0120 # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0xBC 0x0121 # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0xBD 0x1E40 # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
0xBE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x1E41 # LATIN SMALL LETTER M WITH DOT ABOVE
|
||||
0xC1 0x1E56 # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
0xC2 0x1E57 # LATIN SMALL LETTER P WITH DOT ABOVE
|
||||
0xC3 0x027C # LATIN SMALL LETTER R WITH LONG LEG
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x017F # LATIN SMALL LETTER LONG S
|
||||
0xC6 0x1E60 # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x1E61 # LATIN SMALL LETTER S WITH DOT ABOVE
|
||||
0xD7 0x1E9B # LATIN SMALL LETTER LONG S WITH DOT ABOVE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x1E6A # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
0xDB 0x20AC # EURO SIGN # before Mac OS 8.5 this was U+00A4 CURRENCY SIGN
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0x0176 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0xDF 0x0177 # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||
0xE0 0x1E6B # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x1EF2 # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
0xE3 0x1EF3 # LATIN SMALL LETTER Y WITH GRAVE
|
||||
0xE4 0x204A # TIRONIAN SIGN ET # change from MacCeltic for Unicode 3.0; before Aug. 1998 this was U+2030 PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0x2663 # BLACK CLUB SUIT = shamrock # future mapping U+2618 SHAMROCK
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0xF7 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0xF8 0x0174 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0xF9 0x0175 # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||
0xFA 0x1E84 # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
0xFB 0x1E85 # LATIN SMALL LETTER W WITH DIAERESIS
|
||||
0xFC 0x1E80 # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
0xFD 0x1E81 # LATIN SMALL LETTER W WITH GRAVE
|
||||
0xFE 0x1E82 # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
0xFF 0x1E83 # LATIN SMALL LETTER W WITH ACUTE
|
355
charmap/GREEK.TXT
Normal file
|
@ -0,0 +1,355 @@
|
|||
#=======================================================================
|
||||
# File name: GREEK.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Greek
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update to match changes in Mac OS Greek
|
||||
# encoding for Mac OS 9.2.2 and later.
|
||||
# Update URLs, notes. Matches internal
|
||||
# utom<b3>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n06 1998-Feb-05 Update to match internal utom<n4>, ufrm<n17>,
|
||||
# and Text Encoding Converter versions 1.3:
|
||||
# Change mapping for 0xAF from U+0387 to its
|
||||
# canonical decomposition, U+00B7. Also
|
||||
# update header comments to new format.
|
||||
# n04 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n7>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Greek code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Greek code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Greek character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Greek:
|
||||
# ----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Although a Mac OS script code is defined for Greek (smGreek = 6),
|
||||
# the Greek localized system does not currently use it (the font
|
||||
# family IDs are in the Mac OS Roman range). To determine if the
|
||||
# Greek encoding is being used when the script code is smRoman (0),
|
||||
# you must check if the system region code is 20, verGreece.
|
||||
#
|
||||
# The Mac OS Greek encoding is a superset of the repertoire of
|
||||
# ISO 8859-7 (although characters are not at the same code points),
|
||||
# except that LEFT & RIGHT SINGLE QUOTATION MARK replace the
|
||||
# MODIFIER LETTER REVERSED COMMA & APOSTROPHE (spacing versions of
|
||||
# Greek rough & smooth breathing marks) that are in ISO 8859-7.
|
||||
# The added characters in Mac OS Greek include more punctuation and
|
||||
# symbols and several accented Latin letters.
|
||||
#
|
||||
# Before Mac OS 9.2.2, code point 0x9C was SOFT HYPHEN (U+00AD), and
|
||||
# code point 0xFF was undefined. In Mac OS 9.2.2 and later versions,
|
||||
# SOFT HYPHEN was moved to 0xFF, and code point 0x9C was changed to be
|
||||
# EURO SIGN (U+20AC); the standard Apple fonts are updated for Mac OS
|
||||
# 9.2.2 to reflect this. There is a "no Euro sign" variant of the Mac
|
||||
# OS Greek encoding that uses the older mapping; this can be used for
|
||||
# older fonts.
|
||||
#
|
||||
# This "no Euro sign" variant of Mac OS Greek was the character set
|
||||
# used by Mac OS Greek systems before 9.2.2 except for system 6.0.7,
|
||||
# which used a variant character set but was quickly replaced with
|
||||
# Greek system 6.0.7.1 using the no Euro sign" character set
|
||||
# documented here. Greek system 4.1 used a variant Greek set that had
|
||||
# ISO 8859-7 in 0xA0-0xFF (with some holes filled in with DTP
|
||||
# characters), and Mac OS Roman accented Roman letters in 0x80-0x9F.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - The Mac OS Greek encoding changed for Mac OS 9.2.2 and later
|
||||
# as follows:
|
||||
# 0x9C, changed from 0x00AD SOFT HYPHEN to 0x20AC EURO SIGN
|
||||
# 0xFF, changed from undefined to 0x00AD SOFT HYPHEN
|
||||
#
|
||||
# Changes from version n04 to version n06:
|
||||
#
|
||||
# - Change mapping of 0xAF from U+0387 to its canonical
|
||||
# decomposition, U+00B7.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00B9 # SUPERSCRIPT ONE
|
||||
0x82 0x00B2 # SUPERSCRIPT TWO
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00B3 # SUPERSCRIPT THREE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x0385 # GREEK DIALYTIKA TONOS
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x0384 # GREEK TONOS
|
||||
0x8C 0x00A8 # DIAERESIS
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00A3 # POUND SIGN
|
||||
0x93 0x2122 # TRADE MARK SIGN
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x2022 # BULLET
|
||||
0x97 0x00BD # VULGAR FRACTION ONE HALF
|
||||
0x98 0x2030 # PER MILLE SIGN
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00A6 # BROKEN BAR
|
||||
0x9C 0x20AC # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x0393 # GREEK CAPITAL LETTER GAMMA
|
||||
0xA2 0x0394 # GREEK CAPITAL LETTER DELTA
|
||||
0xA3 0x0398 # GREEK CAPITAL LETTER THETA
|
||||
0xA4 0x039B # GREEK CAPITAL LETTER LAMDA
|
||||
0xA5 0x039E # GREEK CAPITAL LETTER XI
|
||||
0xA6 0x03A0 # GREEK CAPITAL LETTER PI
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x03A3 # GREEK CAPITAL LETTER SIGMA
|
||||
0xAB 0x03AA # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0xAC 0x00A7 # SECTION SIGN
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00B0 # DEGREE SIGN
|
||||
0xAF 0x00B7 # MIDDLE DOT
|
||||
0xB0 0x0391 # GREEK CAPITAL LETTER ALPHA
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x0392 # GREEK CAPITAL LETTER BETA
|
||||
0xB6 0x0395 # GREEK CAPITAL LETTER EPSILON
|
||||
0xB7 0x0396 # GREEK CAPITAL LETTER ZETA
|
||||
0xB8 0x0397 # GREEK CAPITAL LETTER ETA
|
||||
0xB9 0x0399 # GREEK CAPITAL LETTER IOTA
|
||||
0xBA 0x039A # GREEK CAPITAL LETTER KAPPA
|
||||
0xBB 0x039C # GREEK CAPITAL LETTER MU
|
||||
0xBC 0x03A6 # GREEK CAPITAL LETTER PHI
|
||||
0xBD 0x03AB # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0xBE 0x03A8 # GREEK CAPITAL LETTER PSI
|
||||
0xBF 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xC0 0x03AC # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0xC1 0x039D # GREEK CAPITAL LETTER NU
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x039F # GREEK CAPITAL LETTER OMICRON
|
||||
0xC4 0x03A1 # GREEK CAPITAL LETTER RHO
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x03A4 # GREEK CAPITAL LETTER TAU
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x03A5 # GREEK CAPITAL LETTER UPSILON
|
||||
0xCC 0x03A7 # GREEK CAPITAL LETTER CHI
|
||||
0xCD 0x0386 # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0xCE 0x0388 # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2015 # HORIZONTAL BAR
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x0389 # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0xD8 0x038A # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0xD9 0x038C # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0xDA 0x038E # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0xDB 0x03AD # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0xDC 0x03AE # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0xDD 0x03AF # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0xDE 0x03CC # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0xDF 0x038F # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0xE0 0x03CD # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0xE1 0x03B1 # GREEK SMALL LETTER ALPHA
|
||||
0xE2 0x03B2 # GREEK SMALL LETTER BETA
|
||||
0xE3 0x03C8 # GREEK SMALL LETTER PSI
|
||||
0xE4 0x03B4 # GREEK SMALL LETTER DELTA
|
||||
0xE5 0x03B5 # GREEK SMALL LETTER EPSILON
|
||||
0xE6 0x03C6 # GREEK SMALL LETTER PHI
|
||||
0xE7 0x03B3 # GREEK SMALL LETTER GAMMA
|
||||
0xE8 0x03B7 # GREEK SMALL LETTER ETA
|
||||
0xE9 0x03B9 # GREEK SMALL LETTER IOTA
|
||||
0xEA 0x03BE # GREEK SMALL LETTER XI
|
||||
0xEB 0x03BA # GREEK SMALL LETTER KAPPA
|
||||
0xEC 0x03BB # GREEK SMALL LETTER LAMDA
|
||||
0xED 0x03BC # GREEK SMALL LETTER MU
|
||||
0xEE 0x03BD # GREEK SMALL LETTER NU
|
||||
0xEF 0x03BF # GREEK SMALL LETTER OMICRON
|
||||
0xF0 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xF1 0x03CE # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0xF2 0x03C1 # GREEK SMALL LETTER RHO
|
||||
0xF3 0x03C3 # GREEK SMALL LETTER SIGMA
|
||||
0xF4 0x03C4 # GREEK SMALL LETTER TAU
|
||||
0xF5 0x03B8 # GREEK SMALL LETTER THETA
|
||||
0xF6 0x03C9 # GREEK SMALL LETTER OMEGA
|
||||
0xF7 0x03C2 # GREEK SMALL LETTER FINAL SIGMA
|
||||
0xF8 0x03C7 # GREEK SMALL LETTER CHI
|
||||
0xF9 0x03C5 # GREEK SMALL LETTER UPSILON
|
||||
0xFA 0x03B6 # GREEK SMALL LETTER ZETA
|
||||
0xFB 0x03CA # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0xFC 0x03CB # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0xFD 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0xFE 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0xFF 0x00AD # SOFT HYPHEN # before Mac OS 9.2.2, was undefined
|
383
charmap/GUJARATI.TXT
Normal file
|
@ -0,0 +1,383 @@
|
|||
#=======================================================================
|
||||
# File name: GUJARATI.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Gujarati
|
||||
# encoding to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n02 1998-Feb-05 First version; matches internal utom<n4>,
|
||||
# ufrm<n5>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Gujarati code or code sequence
|
||||
# (in hex as 0xNN or 0xNN+0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name or sequence
|
||||
# of names. In some cases an additional comment follows the
|
||||
# Unicode name(s).
|
||||
#
|
||||
# The entries are in two sections. The first section is for pairs of
|
||||
# Mac OS Gujarati code points that must be mapped in a special way.
|
||||
# The second section maps individual code points.
|
||||
#
|
||||
# Within each section, the entries are in Mac OS Gujarati code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Gujarati character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Gujarati:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Gujarati is based on IS 13194:1991 (ISCII-91), with the
|
||||
# addition of several punctuation and symbol characters. However,
|
||||
# Mac OS Gujarati does not support the ATR (attribute) mechanism of
|
||||
# ISCII-91.
|
||||
#
|
||||
# 1. ISCII-91 features in Mac OS Gujarati include:
|
||||
#
|
||||
# a) Overloading of nukta
|
||||
#
|
||||
# In addition to using the nukta (0xE9) like a combining dot below,
|
||||
# nukta is overloaded to function as a general character modifier.
|
||||
# In this role, certain code points followed by 0xE9 are treated as
|
||||
# a two-byte code point representing a character which may be
|
||||
# rather different than the characters represented by either of
|
||||
# the code points alone. For example, the character GUJARATI OM
|
||||
# (U+0AD0) is represented in ISCII-91 as candrabindu + nukta.
|
||||
#
|
||||
# b) Explicit halant and soft halant
|
||||
#
|
||||
# A double halant (0xE8 + 0xE8) constitutes an "explicit halant",
|
||||
# which will always appear as a halant instead of causing formation
|
||||
# of a ligature or half-form consonant.
|
||||
#
|
||||
# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft
|
||||
# halant", which prevents formation of a ligature and instead
|
||||
# retains the half-form of the first consonant.
|
||||
#
|
||||
# c) Invisible consonant
|
||||
#
|
||||
# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant:
|
||||
# It behaves like a consonant but has no visible appearance. It is
|
||||
# intended to be used (often in combination with halant) to display
|
||||
# dependent forms in isolation, such as the RA forms or consonant
|
||||
# half-forms.
|
||||
#
|
||||
# d) Extensions for Vedic, etc.
|
||||
#
|
||||
# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in
|
||||
# the range 0xA1-0xEE constitutes a two-byte code point which can
|
||||
# be used to represent additional characters for Vedic (or other
|
||||
# extensions); 0xF0 followed by any other byte value constitutes
|
||||
# malformed text. Mac OS Gujarati supports this mechanism, but
|
||||
# does not currently map any of these two-byte code points to
|
||||
# anything.
|
||||
#
|
||||
# 2. Mac OS Gujarati additions
|
||||
#
|
||||
# Mac OS Gujarati adds characters using the code points
|
||||
# 0x80-0x8A and 0x90.
|
||||
#
|
||||
# 3. Unused code points
|
||||
#
|
||||
# The following code points are currently unused, and are not shown
|
||||
# here: 0x8B-0x8F, 0x91-0xA0, 0xAB, 0xAF, 0xC7, 0xCE, 0xD0, 0xD3,
|
||||
# 0xE0, 0xE4, 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown
|
||||
# here, but it has a special function as described above.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Mapping the byte pairs
|
||||
#
|
||||
# If one of the following byte values is encountered when mapping
|
||||
# Mac OS Gujarati text - xA1, xAA, xDF, or 0xE8 - then the next
|
||||
# byte (if there is one) should be examined. If the next byte is
|
||||
# 0xE9 - or also 0xE8, if the first byte was 0xE8 - then the byte
|
||||
# pair should be mapped using the first section of the mapping
|
||||
# table below. Otherwise, each byte should be mapped using the
|
||||
# second section of the mapping table below.
|
||||
#
|
||||
# - The Unicode Standard, Version 2.0, specifies how explicit
|
||||
# halant and soft halant should be represented in Unicode;
|
||||
# these mappings are used below.
|
||||
#
|
||||
# If the byte value 0xF0 is encountered when mapping Mac OS
|
||||
# Gujarati text, then the next byte should be examined. If there
|
||||
# is no next byte (e.g. 0xF0 at end of buffer), the mapping
|
||||
# process should indicate incomplete character. If there is a next
|
||||
# byte but it is not in the range 0xA1-0xEE, the mapping process
|
||||
# should indicate malformed text. Otherwise, the mapping process
|
||||
# should treat the byte pair as a valid two-byte code point with no
|
||||
# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER,
|
||||
# etc.).
|
||||
#
|
||||
# 2. Mapping the invisible consonant
|
||||
#
|
||||
# It has been suggested that INV in ISCII-91 should map to ZERO
|
||||
# WIDTH NON-JOINER in Unicode. However, this causes problems with
|
||||
# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9
|
||||
# would map to the same sequence of Unicode characters. We have
|
||||
# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these
|
||||
# problems.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
# Section 1: Map the following byte pairs as indicated:
|
||||
# (ZWNJ means ZERO WIDTH NON-JOINER, ZWJ means ZERO WIDTH JOINER)
|
||||
# (Also see note about 0xF0 in comments above)
|
||||
|
||||
0xA1+0xE9 0x0AD0 # GUJARATI OM
|
||||
0xAA+0xE9 0x0AE0 # GUJARATI LETTER VOCALIC RR
|
||||
0xDF+0xE9 0x0AC4 # GUJARATI VOWEL SIGN VOCALIC RR
|
||||
0xE8+0xE8 0x0ACD+0x200C # GUJARATI SIGN VIRAMA + ZWNJ # explicit halant
|
||||
0xE8+0xE9 0x0ACD+0x200D # GUJARATI SIGN VIRAMA + ZWJ # soft halant
|
||||
|
||||
# Section 2: Map the remaining bytes as follows:
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00D7 # MULTIPLICATION SIGN
|
||||
0x81 0x2212 # MINUS SIGN
|
||||
0x82 0x2013 # EN DASH
|
||||
0x83 0x2014 # EM DASH
|
||||
0x84 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0x85 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0x86 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0x87 0x2022 # BULLET
|
||||
0x88 0x00A9 # COPYRIGHT SIGN
|
||||
0x89 0x00AE # REGISTERED SIGN
|
||||
0x8A 0x2122 # TRADE MARK SIGN
|
||||
#
|
||||
0x90 0x0965 # DEVANAGARI DOUBLE DANDA
|
||||
#
|
||||
0xA1 0x0A81 # GUJARATI SIGN CANDRABINDU
|
||||
0xA2 0x0A82 # GUJARATI SIGN ANUSVARA
|
||||
0xA3 0x0A83 # GUJARATI SIGN VISARGA
|
||||
0xA4 0x0A85 # GUJARATI LETTER A
|
||||
0xA5 0x0A86 # GUJARATI LETTER AA
|
||||
0xA6 0x0A87 # GUJARATI LETTER I
|
||||
0xA7 0x0A88 # GUJARATI LETTER II
|
||||
0xA8 0x0A89 # GUJARATI LETTER U
|
||||
0xA9 0x0A8A # GUJARATI LETTER UU
|
||||
0xAA 0x0A8B # GUJARATI LETTER VOCALIC R
|
||||
#
|
||||
0xAC 0x0A8F # GUJARATI LETTER E
|
||||
0xAD 0x0A90 # GUJARATI LETTER AI
|
||||
0xAE 0x0A8D # GUJARATI VOWEL CANDRA E
|
||||
#
|
||||
0xB0 0x0A93 # GUJARATI LETTER O
|
||||
0xB1 0x0A94 # GUJARATI LETTER AU
|
||||
0xB2 0x0A91 # GUJARATI VOWEL CANDRA O
|
||||
0xB3 0x0A95 # GUJARATI LETTER KA
|
||||
0xB4 0x0A96 # GUJARATI LETTER KHA
|
||||
0xB5 0x0A97 # GUJARATI LETTER GA
|
||||
0xB6 0x0A98 # GUJARATI LETTER GHA
|
||||
0xB7 0x0A99 # GUJARATI LETTER NGA
|
||||
0xB8 0x0A9A # GUJARATI LETTER CA
|
||||
0xB9 0x0A9B # GUJARATI LETTER CHA
|
||||
0xBA 0x0A9C # GUJARATI LETTER JA
|
||||
0xBB 0x0A9D # GUJARATI LETTER JHA
|
||||
0xBC 0x0A9E # GUJARATI LETTER NYA
|
||||
0xBD 0x0A9F # GUJARATI LETTER TTA
|
||||
0xBE 0x0AA0 # GUJARATI LETTER TTHA
|
||||
0xBF 0x0AA1 # GUJARATI LETTER DDA
|
||||
0xC0 0x0AA2 # GUJARATI LETTER DDHA
|
||||
0xC1 0x0AA3 # GUJARATI LETTER NNA
|
||||
0xC2 0x0AA4 # GUJARATI LETTER TA
|
||||
0xC3 0x0AA5 # GUJARATI LETTER THA
|
||||
0xC4 0x0AA6 # GUJARATI LETTER DA
|
||||
0xC5 0x0AA7 # GUJARATI LETTER DHA
|
||||
0xC6 0x0AA8 # GUJARATI LETTER NA
|
||||
#
|
||||
0xC8 0x0AAA # GUJARATI LETTER PA
|
||||
0xC9 0x0AAB # GUJARATI LETTER PHA
|
||||
0xCA 0x0AAC # GUJARATI LETTER BA
|
||||
0xCB 0x0AAD # GUJARATI LETTER BHA
|
||||
0xCC 0x0AAE # GUJARATI LETTER MA
|
||||
0xCD 0x0AAF # GUJARATI LETTER YA
|
||||
#
|
||||
0xCF 0x0AB0 # GUJARATI LETTER RA
|
||||
#
|
||||
0xD1 0x0AB2 # GUJARATI LETTER LA
|
||||
0xD2 0x0AB3 # GUJARATI LETTER LLA
|
||||
#
|
||||
0xD4 0x0AB5 # GUJARATI LETTER VA
|
||||
0xD5 0x0AB6 # GUJARATI LETTER SHA
|
||||
0xD6 0x0AB7 # GUJARATI LETTER SSA
|
||||
0xD7 0x0AB8 # GUJARATI LETTER SA
|
||||
0xD8 0x0AB9 # GUJARATI LETTER HA
|
||||
0xD9 0x200E # LEFT-TO-RIGHT MARK # invisible consonant
|
||||
0xDA 0x0ABE # GUJARATI VOWEL SIGN AA
|
||||
0xDB 0x0ABF # GUJARATI VOWEL SIGN I
|
||||
0xDC 0x0AC0 # GUJARATI VOWEL SIGN II
|
||||
0xDD 0x0AC1 # GUJARATI VOWEL SIGN U
|
||||
0xDE 0x0AC2 # GUJARATI VOWEL SIGN UU
|
||||
0xDF 0x0AC3 # GUJARATI VOWEL SIGN VOCALIC R
|
||||
#
|
||||
0xE1 0x0AC7 # GUJARATI VOWEL SIGN E
|
||||
0xE2 0x0AC8 # GUJARATI VOWEL SIGN AI
|
||||
0xE3 0x0AC5 # GUJARATI VOWEL SIGN CANDRA E
|
||||
#
|
||||
0xE5 0x0ACB # GUJARATI VOWEL SIGN O
|
||||
0xE6 0x0ACC # GUJARATI VOWEL SIGN AU
|
||||
0xE7 0x0AC9 # GUJARATI VOWEL SIGN CANDRA O
|
||||
0xE8 0x0ACD # GUJARATI SIGN VIRAMA # halant
|
||||
0xE9 0x0ABC # GUJARATI SIGN NUKTA
|
||||
0xEA 0x0964 # DEVANAGARI DANDA
|
||||
#
|
||||
0xF1 0x0AE6 # GUJARATI DIGIT ZERO
|
||||
0xF2 0x0AE7 # GUJARATI DIGIT ONE
|
||||
0xF3 0x0AE8 # GUJARATI DIGIT TWO
|
||||
0xF4 0x0AE9 # GUJARATI DIGIT THREE
|
||||
0xF5 0x0AEA # GUJARATI DIGIT FOUR
|
||||
0xF6 0x0AEB # GUJARATI DIGIT FIVE
|
||||
0xF7 0x0AEC # GUJARATI DIGIT SIX
|
||||
0xF8 0x0AED # GUJARATI DIGIT SEVEN
|
||||
0xF9 0x0AEE # GUJARATI DIGIT EIGHT
|
||||
0xFA 0x0AEF # GUJARATI DIGIT NINE
|
441
charmap/GURMUKHI.TXT
Normal file
|
@ -0,0 +1,441 @@
|
|||
#=======================================================================
|
||||
# File name: GURMUKHI.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Gurmukhi
|
||||
# encoding to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1997-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Change mappings for 0x91, 0xD5 based on
|
||||
# new decomposition rules. Update URLs,
|
||||
# notes. Matches internal utom<b2>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n02 1998-Feb-05 First version; matches internal utom<n5>,
|
||||
# ufrm<n6>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Gurmukhi code or code sequence
|
||||
# (in hex as 0xNN or 0xNN+0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name or sequence
|
||||
# of names. In some cases an additional comment follows the
|
||||
# Unicode name(s).
|
||||
#
|
||||
# The entries are in two sections. The first section is for pairs of
|
||||
# Mac OS Gurmukhi code points that must be mapped in a special way.
|
||||
# The second section maps individual code points.
|
||||
#
|
||||
# Within each section, the entries are in Mac OS Gurmukhi code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Gurmukhi character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Gurmukhi:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Gurmukhi is based on IS 13194:1991 (ISCII-91), with the
|
||||
# addition of several punctuation and symbol characters. However,
|
||||
# Mac OS Gurmukhi does not support the ATR (attribute) mechanism of
|
||||
# ISCII-91.
|
||||
#
|
||||
# 1. ISCII-91 features in Mac OS Gurmukhi include:
|
||||
#
|
||||
# a) Explicit halant and soft halant
|
||||
#
|
||||
# A double halant (0xE8 + 0xE8) constitutes an "explicit halant",
|
||||
# which will always appear as a halant instead of causing formation
|
||||
# of a ligature or half-form consonant.
|
||||
#
|
||||
# Halant followed by nukta (0xE8 + 0xE9) constitutes a "soft
|
||||
# halant", which prevents formation of a ligature and instead
|
||||
# retains the half-form of the first consonant.
|
||||
#
|
||||
# b) Invisible consonant
|
||||
#
|
||||
# The byte 0xD9 (called INV in ISCII-91) is an invisible consonant:
|
||||
# It behaves like a consonant but has no visible appearance. It is
|
||||
# intended to be used (often in combination with halant) to display
|
||||
# dependent forms in isolation, such as the RA forms or consonant
|
||||
# half-forms.
|
||||
#
|
||||
# c) Extensions for Vedic, etc.
|
||||
#
|
||||
# The byte 0xF0 (called EXT in ISCII-91) followed by any byte in
|
||||
# the range 0xA1-0xEE constitutes a two-byte code point which can
|
||||
# be used to represent additional characters for Vedic (or other
|
||||
# extensions); 0xF0 followed by any other byte value constitutes
|
||||
# malformed text. Mac OS Gurmukhi supports this mechanism, but
|
||||
# does not currently map any of these two-byte code points to
|
||||
# anything.
|
||||
#
|
||||
# 2. Mac OS Gurmukhi additions
|
||||
#
|
||||
# Mac OS Gurmukhi adds characters using the code points
|
||||
# 0x80-0x8A and 0x90-0x94 (the latter are some Gurmukhi additions).
|
||||
#
|
||||
# 3. Unused code points
|
||||
#
|
||||
# The following code points are currently unused, and are not shown
|
||||
# here: 0x8B-0x8F, 0x95-0xA1, 0xA3, 0xAA-0xAB, 0xAE-0xAF, 0xB2,
|
||||
# 0xC7, 0xCE, 0xD0, 0xD2-0xD3, 0xD6, 0xDF-0xE0, 0xE3-0xE4, 0xE7,
|
||||
# 0xEB-0xEF, 0xFB-0xFF. In addition, 0xF0 is not shown here, but it
|
||||
# has a special function as described above.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Mapping the byte pairs
|
||||
#
|
||||
# If the byte value 0xE8 is encountered when mapping Mac OS
|
||||
# Gurmukhi text, then the next byte (if there is one) should be
|
||||
# examined. If the next byte is 0xE8 or 0xE9, then the byte pair
|
||||
# should be mapped using the first section of the mapping table
|
||||
# below. Otherwise, each byte should be mapped using the second
|
||||
# section of the mapping table below.
|
||||
#
|
||||
# - The Unicode Standard, Version 2.0, specifies how explicit
|
||||
# halant and soft halant should be represented in Unicode;
|
||||
# these mappings are used below.
|
||||
#
|
||||
# If the byte value 0xF0 is encountered when mapping Mac OS
|
||||
# Gurmukhi text, then the next byte should be examined. If there
|
||||
# is no next byte (e.g. 0xF0 at end of buffer), the mapping
|
||||
# process should indicate incomplete character. If there is a next
|
||||
# byte but it is not in the range 0xA1-0xEE, the mapping process
|
||||
# should indicate malformed text. Otherwise, the mapping process
|
||||
# should treat the byte pair as a valid two-byte code point with no
|
||||
# mapping (e.g. map it to QUESTION MARK, REPLACEMENT CHARACTER,
|
||||
# etc.).
|
||||
#
|
||||
# 2. Mapping the invisible consonant
|
||||
#
|
||||
# It has been suggested that INV in ISCII-91 should map to ZERO
|
||||
# WIDTH NON-JOINER in Unicode. However, this causes problems with
|
||||
# roundtrip fidelity: The ISCII-91 sequences 0xE8+0xE8 and 0xE8+0xD9
|
||||
# would map to the same sequence of Unicode characters. We have
|
||||
# instead mapped INV to LEFT-TO-RIGHT MARK, which avoids these
|
||||
# problems.
|
||||
#
|
||||
# 3. Mappings using corporate characters
|
||||
#
|
||||
# Mapping the GURMUKHI LETTER SHA 0xD5 presents an interesting
|
||||
# problem. At first glance, we could map it to the single Unicode
|
||||
# character 0x0A36.
|
||||
#
|
||||
# However, our goal is that the mappings provided here should also
|
||||
# be able to generate the mappings to maximally decomposed Unicode
|
||||
# by simple recursive substitution of the canonical decompositions
|
||||
# in the Unicode database. We want mapping tables derived this way
|
||||
# to retain full roundtrip fidelity.
|
||||
#
|
||||
# Since the canonical decomposition of 0x0A36 is 0x0A38+0x0A3C,
|
||||
# the decomposition mapping for 0xD5 would be identical with the
|
||||
# decomposition mapping for 0xD7+0xE9, and roundtrip fidelity would
|
||||
# be lost.
|
||||
#
|
||||
# We solve this problem by using a grouping hint (one of the set of
|
||||
# transcoding hints defined by Apple).
|
||||
#
|
||||
# Apple has defined a block of 32 corporate characters as "transcoding
|
||||
# hints." These are used in combination with standard Unicode characters
|
||||
# to force them to be treated in a special way for mapping to other
|
||||
# encodings; they have no other effect. Sixteen of these transcoding
|
||||
# hints are "grouping hints" - they indicate that the next 2-4 Unicode
|
||||
# characters should be treated as a single entity for transcoding. The
|
||||
# other sixteen transcoding hints are "variant tags" - they are like
|
||||
# combining characters, and can follow a standard Unicode (or a sequence
|
||||
# consisting of a base character and other combining characters) to
|
||||
# cause it to be treated in a special way for transcoding. These always
|
||||
# terminate a combining-character sequence.
|
||||
#
|
||||
# The transcoding coding hint used in this mapping table is:
|
||||
# 0xF860 group next 2 characters
|
||||
#
|
||||
# Then we can map 0x91 as follows:
|
||||
# 0xD5 -> 0xF860+0x0A38+0x0A3C
|
||||
#
|
||||
# We could also have used a variant tag such as 0xF87F and mapped it
|
||||
# this way:
|
||||
# 0xD5 -> 0x0A36+0xF87F
|
||||
#
|
||||
# 4. Additional loose mappings from Unicode
|
||||
#
|
||||
# These are not preserved in roundtrip mappings.
|
||||
#
|
||||
# 0A59 -> 0xB4+0xE9 # GURMUKHI LETTER KHHA
|
||||
# 0A5A -> 0xB5+0xE9 # GURMUKHI LETTER GHHA
|
||||
# 0A5B -> 0xBA+0xE9 # GURMUKHI LETTER ZA
|
||||
# 0A5E -> 0xC9+0xE9 # GURMUKHI LETTER FA
|
||||
#
|
||||
# 0A70 -> 0xA2 # GURMUKHI TIPPI
|
||||
#
|
||||
# Loose mappings from Unicode should also map U+0A71 (GURMUKHI ADDAK)
|
||||
# followed by any Gurmukhi consonant to the equivalent ISCII-91
|
||||
# consonant plus halant plus the consonant again. For example:
|
||||
#
|
||||
# 0A71+0A15 -> 0xB3+0xE8+0xB3
|
||||
# 0A71+0A16 -> 0xB4+0xE8+0xB4
|
||||
# ...
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Change mapping of 0x91 from 0xF860+0x0A21+0x0A3C to 0x0A5C GURMUKHI
|
||||
# LETTER RRA, now that the canonical decomposition of 0x0A5C to
|
||||
# 0x0A21+0x0A3C has been deleted
|
||||
#
|
||||
# - Change mapping of 0xD5 from 0x0A36 GURMUKHI LETTER SHA to
|
||||
# 0xF860+0x0A38+0x0A3C, now that a canonical decomposition of 0x0A36
|
||||
# to 0x0A38+0x0A3C has been added.
|
||||
#
|
||||
##################
|
||||
|
||||
# Section 1: Map the following byte pairs as indicated:
|
||||
# (ZWNJ means ZERO WIDTH NON-JOINER, ZWJ means ZERO WIDTH JOINER)
|
||||
# (Also see note about 0xF0 in comments above)
|
||||
|
||||
0xE8+0xE8 0x0A4D+0x200C # GURMUKHI SIGN VIRAMA + ZWNJ # explicit halant
|
||||
0xE8+0xE9 0x0A4D+0x200D # GURMUKHI SIGN VIRAMA + ZWJ # soft halant
|
||||
|
||||
# Section 2: Map the remaining bytes as follows:
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00D7 # MULTIPLICATION SIGN
|
||||
0x81 0x2212 # MINUS SIGN
|
||||
0x82 0x2013 # EN DASH
|
||||
0x83 0x2014 # EM DASH
|
||||
0x84 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0x85 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0x86 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0x87 0x2022 # BULLET
|
||||
0x88 0x00A9 # COPYRIGHT SIGN
|
||||
0x89 0x00AE # REGISTERED SIGN
|
||||
0x8A 0x2122 # TRADE MARK SIGN
|
||||
#
|
||||
0x90 0x0A71 # GURMUKHI ADDAK
|
||||
0x91 0x0A5C # GURMUKHI LETTER RRA
|
||||
0x92 0x0A73 # GURMUKHI URA
|
||||
0x93 0x0A72 # GURMUKHI IRI
|
||||
0x94 0x0A74 # GURMUKHI EK ONKAR
|
||||
#
|
||||
0xA2 0x0A02 # GURMUKHI SIGN BINDI
|
||||
#
|
||||
0xA4 0x0A05 # GURMUKHI LETTER A
|
||||
0xA5 0x0A06 # GURMUKHI LETTER AA
|
||||
0xA6 0x0A07 # GURMUKHI LETTER I
|
||||
0xA7 0x0A08 # GURMUKHI LETTER II
|
||||
0xA8 0x0A09 # GURMUKHI LETTER U
|
||||
0xA9 0x0A0A # GURMUKHI LETTER UU
|
||||
#
|
||||
0xAC 0x0A0F # GURMUKHI LETTER EE
|
||||
0xAD 0x0A10 # GURMUKHI LETTER AI
|
||||
#
|
||||
0xB0 0x0A13 # GURMUKHI LETTER OO
|
||||
0xB1 0x0A14 # GURMUKHI LETTER AU
|
||||
#
|
||||
0xB3 0x0A15 # GURMUKHI LETTER KA
|
||||
0xB4 0x0A16 # GURMUKHI LETTER KHA
|
||||
0xB5 0x0A17 # GURMUKHI LETTER GA
|
||||
0xB6 0x0A18 # GURMUKHI LETTER GHA
|
||||
0xB7 0x0A19 # GURMUKHI LETTER NGA
|
||||
0xB8 0x0A1A # GURMUKHI LETTER CA
|
||||
0xB9 0x0A1B # GURMUKHI LETTER CHA
|
||||
0xBA 0x0A1C # GURMUKHI LETTER JA
|
||||
0xBB 0x0A1D # GURMUKHI LETTER JHA
|
||||
0xBC 0x0A1E # GURMUKHI LETTER NYA
|
||||
0xBD 0x0A1F # GURMUKHI LETTER TTA
|
||||
0xBE 0x0A20 # GURMUKHI LETTER TTHA
|
||||
0xBF 0x0A21 # GURMUKHI LETTER DDA
|
||||
0xC0 0x0A22 # GURMUKHI LETTER DDHA
|
||||
0xC1 0x0A23 # GURMUKHI LETTER NNA
|
||||
0xC2 0x0A24 # GURMUKHI LETTER TA
|
||||
0xC3 0x0A25 # GURMUKHI LETTER THA
|
||||
0xC4 0x0A26 # GURMUKHI LETTER DA
|
||||
0xC5 0x0A27 # GURMUKHI LETTER DHA
|
||||
0xC6 0x0A28 # GURMUKHI LETTER NA
|
||||
#
|
||||
0xC8 0x0A2A # GURMUKHI LETTER PA
|
||||
0xC9 0x0A2B # GURMUKHI LETTER PHA
|
||||
0xCA 0x0A2C # GURMUKHI LETTER BA
|
||||
0xCB 0x0A2D # GURMUKHI LETTER BHA
|
||||
0xCC 0x0A2E # GURMUKHI LETTER MA
|
||||
0xCD 0x0A2F # GURMUKHI LETTER YA
|
||||
#
|
||||
0xCF 0x0A30 # GURMUKHI LETTER RA
|
||||
#
|
||||
0xD1 0x0A32 # GURMUKHI LETTER LA
|
||||
#
|
||||
0xD4 0x0A35 # GURMUKHI LETTER VA
|
||||
0xD5 0xF860+0x0A38+0x0A3C # GURMUKHI LETTER SHA
|
||||
#
|
||||
0xD7 0x0A38 # GURMUKHI LETTER SA
|
||||
0xD8 0x0A39 # GURMUKHI LETTER HA
|
||||
0xD9 0x200E # LEFT-TO-RIGHT MARK # invisible consonant
|
||||
0xDA 0x0A3E # GURMUKHI VOWEL SIGN AA
|
||||
0xDB 0x0A3F # GURMUKHI VOWEL SIGN I
|
||||
0xDC 0x0A40 # GURMUKHI VOWEL SIGN II
|
||||
0xDD 0x0A41 # GURMUKHI VOWEL SIGN U
|
||||
0xDE 0x0A42 # GURMUKHI VOWEL SIGN UU
|
||||
#
|
||||
0xE1 0x0A47 # GURMUKHI VOWEL SIGN EE
|
||||
0xE2 0x0A48 # GURMUKHI VOWEL SIGN AI
|
||||
#
|
||||
0xE5 0x0A4B # GURMUKHI VOWEL SIGN OO
|
||||
0xE6 0x0A4C # GURMUKHI VOWEL SIGN AU
|
||||
#
|
||||
0xE8 0x0A4D # GURMUKHI SIGN VIRAMA # halant
|
||||
0xE9 0x0A3C # GURMUKHI SIGN NUKTA
|
||||
0xEA 0x0964 # DEVANAGARI DANDA
|
||||
#
|
||||
0xF1 0x0A66 # GURMUKHI DIGIT ZERO
|
||||
0xF2 0x0A67 # GURMUKHI DIGIT ONE
|
||||
0xF3 0x0A68 # GURMUKHI DIGIT TWO
|
||||
0xF4 0x0A69 # GURMUKHI DIGIT THREE
|
||||
0xF5 0x0A6A # GURMUKHI DIGIT FOUR
|
||||
0xF6 0x0A6B # GURMUKHI DIGIT FIVE
|
||||
0xF7 0x0A6C # GURMUKHI DIGIT SIX
|
||||
0xF8 0x0A6D # GURMUKHI DIGIT SEVEN
|
||||
0xF9 0x0A6E # GURMUKHI DIGIT EIGHT
|
||||
0xFA 0x0A6F # GURMUKHI DIGIT NINE
|
601
charmap/HEBREW.TXT
Normal file
|
@ -0,0 +1,601 @@
|
|||
#=======================================================================
|
||||
# File name: HEBREW.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Hebrew
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments; add section on
|
||||
# roundtrip considerations. Matches internal
|
||||
# xml <c1.4> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Don't require left-right context for digits
|
||||
# 0x30-0x39. Change mapping of 0x81 to use
|
||||
# decomposition. Reverse the mappings of 0xA8,
|
||||
# 0xA9. Update URLs, notes. Matches internal
|
||||
# utom<b7>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n03 1998-Feb-05 Show required Unicode character
|
||||
# directionality in a different way. Update
|
||||
# mappings for 0xC0 and 0xDE to use
|
||||
# transcoding hints; matches internal utom<n6>,
|
||||
# ufrm<n20>, and Text Encoding Converter
|
||||
# version 1.3. Rewrite header comments.
|
||||
# n01 1995-Nov-15 First version. Matches internal ufrm<n8>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Hebrew code (in hex as 0xNN).
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence (in
|
||||
# hex as 0xNNNN, 0xNNNN+0xNNNN, etc.). Sequences of up to 3
|
||||
# Unicode characters are used here. A single Unicode character
|
||||
# may be preceded by a tag indicating required directionality
|
||||
# (i.e. <LR>+0xNNNN or <RL>+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Hebrew code order.
|
||||
#
|
||||
# Some of these mappings require the use of corporate characters.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Hebrew character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Hebrew:
|
||||
# -----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# 1. General
|
||||
#
|
||||
# The Mac OS Hebrew character set supports the Hebrew and Yiddish
|
||||
# languages. It incorporates the Hebrew letter repertoire of
|
||||
# ISO 8859-8, and uses the same code points for them, 0xE0-0xFA.
|
||||
# It also incorporates the ASCII character set. In addition, the
|
||||
# Mac OS Hebrew character set includes the following:
|
||||
#
|
||||
# - Hebrew points (nikud marks) at 0xC6, 0xCB-0xCF and 0xD8-0xDF.
|
||||
# These are non-spacing combining marks. Note that the RAFE point
|
||||
# at 0xD8 is not displayed correctly in some fonts, and cannot be
|
||||
# typed using the keyboard layouts in the current Hebrew localized
|
||||
# systems. Also note: The character given in Unicode as QAMATS
|
||||
# (U+05B8) actually refers to two different sounds, depending on
|
||||
# context. For example, when ALEF is followed by QAMATS, the QAMATS
|
||||
# can actually refer to two different sounds depending on the
|
||||
# following letters. The Mac OS Hebrew character set separately
|
||||
# encodes these two sounds for the same graphic shape, as "qamats"
|
||||
# (0xCB) and "qamats qatan" (0xDE). The "qamats" character is more
|
||||
# common, so it is mapped to the Unicode QAMATS; "qamats qatan" can
|
||||
# only be used with a limited number of characters, and it is
|
||||
# mapped using a corporate-zone variant tag (see below).
|
||||
#
|
||||
# - Various Hebrew ligatures at 0x81, 0xC0, 0xC7, 0xC8, 0xD6, and
|
||||
# 0xD7. Also note that the Yiddish YOD YOD PATAH ligature at 0x81
|
||||
# is missing in some fonts.
|
||||
#
|
||||
# - The NEW SHEQEL SIGN at 0xA6.
|
||||
#
|
||||
# - Latin characters with diacritics at 0x80 and 0x82-0x9F. However,
|
||||
# most of these cannot be typed using the keyboard layouts in the
|
||||
# Hebrew localized systems.
|
||||
#
|
||||
# - Right-left versions of certain ASCII punctuation, symbols and
|
||||
# digits: 0xA0-0xA5, 0xA7-0xBF, 0xFB-0xFF. See below.
|
||||
#
|
||||
# - Miscellaneous additional punctuation at 0xC1, 0xC9, 0xCA, and
|
||||
# 0xD0-0xD5. There is a variant of the Hebrew encoding in which
|
||||
# the LEFT SINGLE QUOTATION MARK at 0xD4 is replaced by FIGURE
|
||||
# SPACE. The glyphs for some of the other punctuation characters
|
||||
# are missing in some fonts.
|
||||
#
|
||||
# - Four obsolete characters at 0xC2-0xC5 known as canorals (not to
|
||||
# be confused with cantillation marks!). These were used for
|
||||
# manual positioning of nikud marks before System 7.1 (at which
|
||||
# point nikud positioning became automatic with WorldScript.).
|
||||
#
|
||||
# 2. Directional characters and roundtrip fidelity
|
||||
#
|
||||
# The Mac OS Hebrew character set was developed around 1987. At that
|
||||
# time the bidirectional line line layout algorithm used in the Mac OS
|
||||
# Hebrew system was fairly simple; it used only a few direction
|
||||
# classes (instead of the 19 now used in the Unicode bidirectional
|
||||
# algorithm). In order to permit users to handle some tricky layou
|
||||
# problems, certain punctuation, symbol, and digit characters have
|
||||
# duplicate code points, one with a left-right direction attribute and
|
||||
# the other with a right-left direction attribute.
|
||||
#
|
||||
# For example, plus sign is encoded at 0x2B with a left-right
|
||||
# attribute, and at 0xAB with a right-left attribute. However, there
|
||||
# is only one PLUS SIGN character in Unicode. This leads to some
|
||||
# interesting problems when mapping between Mac OS Hebrew and Unicode;
|
||||
# see below.
|
||||
#
|
||||
# A related problem is that even when a particular character is
|
||||
# encoded only once in Mac OS Hebrew, it may have a different
|
||||
# direction attribute than the corresponding Unicode character.
|
||||
#
|
||||
# For example, the Mac OS Hebrew character at 0xC9 is HORIZONTAL
|
||||
# ELLIPSIS with strong right-left direction. However, the Unicode
|
||||
# character HORIZONTAL ELLIPSIS has direction class neutral.
|
||||
#
|
||||
# 3. Font variants
|
||||
#
|
||||
# The table in this file gives the Unicode mappings for the standard
|
||||
# Mac OS Hebrew encoding. This encoding is supported by many of the
|
||||
# Apple fonts (including all of the fonts in the Hebrew Language Kit),
|
||||
# and is the encoding supported by the text processing utilities.
|
||||
# However, some TrueType fonts provided with the localized Hebrew
|
||||
# system implement a slightly different encoding; the difference is
|
||||
# only in one code point, 0xD4. For the standard variant, this is:
|
||||
# 0xD4 -> <RL>+0x2018 LEFT SINGLE QUOTATION MARK, right-left
|
||||
#
|
||||
# The TrueType variant is used by the following TrueType fonts from
|
||||
# the localized system: Caesarea, Carmel Book, Gilboa, Ramat Sharon,
|
||||
# and Sinai Book. For these, 0xD4 is as follows:
|
||||
# 0xD4 -> <RL>+0x2007 FIGURE SPACE, right-left
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# 1. Matching the direction of Mac OS Hebrew characters
|
||||
#
|
||||
# When Mac OS Hebrew encodes a character twice but with different
|
||||
# direction attributes for the two code points - as in the case of
|
||||
# plus sign mentioned above - we need a way to map both Mac OS Hebrew
|
||||
# code points to Unicode and back again without loss of information.
|
||||
# With the plus sign, for example, mapping one of the Mac OS Hebrew
|
||||
# characters to a code in the Unicode corporate use zone is
|
||||
# undesirable, since both of the plus sign characters are likely to
|
||||
# be used in text that is interchanged.
|
||||
#
|
||||
# The problem is solved with the use of direction override characters
|
||||
# and direction-dependent mappings. When mapping from Mac OS Hebrew
|
||||
# to Unicode, we use direction overrides as necessary to force the
|
||||
# direction of the resulting Unicode characters.
|
||||
#
|
||||
# The required direction is indicated by a direction tag in the
|
||||
# mappings. A tag of <LR> means the corresponding Unicode character
|
||||
# must have a strong left-right context, and a tag of <RL> indicates
|
||||
# a right-left context.
|
||||
#
|
||||
# For example, the mapping of 0x2B is given as <LR>+0x002B; the
|
||||
# mapping of 0xAB is given as <RL>+0x002B. If we map an isolated
|
||||
# instance of 0x2B to Unicode, it should be mapped as follows (LRO
|
||||
# indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION
|
||||
# FORMATTING):
|
||||
#
|
||||
# 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
|
||||
#
|
||||
# When mapping several characters in a row that require direction
|
||||
# forcing, the overrides need only be used at the beginning and end.
|
||||
# For example:
|
||||
#
|
||||
# 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C
|
||||
#
|
||||
# If neutral characters that require direction forcing are already
|
||||
# between strong-direction characters with matching directionality,
|
||||
# then direction overrides need not be used. Direction overrides are
|
||||
# always needed to map the right-left digits at 0xB0-0xB9.
|
||||
#
|
||||
# When mapping from Unicode to Mac OS Hebrew, the Unicode
|
||||
# bidirectional algorithm should be used to determine resolved
|
||||
# direction of the Unicode characters. The mapping from Unicode to
|
||||
# Mac OS Hebrew can then be disambiguated by the use of the resolved
|
||||
# direction:
|
||||
#
|
||||
# Unicode 0x002B -> Mac OS Hebrew 0x2B (if L) or 0xAB (if R)
|
||||
#
|
||||
# However, this also means the direction override characters should
|
||||
# be discarded when mapping from Unicode to Mac OS Hebrew (after
|
||||
# they have been used to determine resolved direction), since the
|
||||
# direction override information is carried by the code point itself.
|
||||
#
|
||||
# Even when direction overrides are not needed for roundtrip
|
||||
# fidelity, they are sometimes used when mapping Mac OS Hebrew
|
||||
# characters to Unicode in order to achieve similar text layout with
|
||||
# the resulting Unicode text. For example, the single Mac OS Hebrew
|
||||
# ellipsis character has direction class right-left,and there is no
|
||||
# left-right version. However, the Unicode HORIZONTAL ELLIPSIS
|
||||
# character has direction class neutral (which means it may end up
|
||||
# with a resolved direction of left-right if surrounded by left-right
|
||||
# characters). When mapping the Mac OS Hebrew ellipsis to Unicode, it
|
||||
# is surrounded with a direction override to help preserve proper
|
||||
# text layout. The resolved direction is not needed or used when
|
||||
# mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Hebrew.
|
||||
#
|
||||
# 2. Use of corporate-zone Unicodes
|
||||
#
|
||||
# The goals in the mappings provided here are:
|
||||
# - Ensure roundtrip mapping from every character in the Mac OS
|
||||
# Hebrew character set to Unicode and back
|
||||
# - Use standard Unicode characters as much as possible, to
|
||||
# maximize interchangeability of the resulting Unicode text.
|
||||
# Whenever possible, avoid having content carried by private-use
|
||||
# characters.
|
||||
#
|
||||
# Some of the characters in the Mac OS Hebrew character set do not
|
||||
# correspond to distinct, single Unicode characters. To map these
|
||||
# and satisfy both goals above, we employ various strategies.
|
||||
#
|
||||
# a) If possible, use private use characters in combination with
|
||||
# standard Unicode characters to mark variants of the standard
|
||||
# Unicode character.
|
||||
#
|
||||
# Apple has defined a block of 32 corporate characters as "transcoding
|
||||
# hints." These are used in combination with standard Unicode characters
|
||||
# to force them to be treated in a special way for mapping to other
|
||||
# encodings; they have no other effect. Sixteen of these transcoding
|
||||
# hints are "grouping hints" - they indicate that the next 2-4 Unicode
|
||||
# characters should be treated as a single entity for transcoding. The
|
||||
# other sixteen transcoding hints are "variant tags" - they are like
|
||||
# combining characters, and can follow a standard Unicode (or a sequence
|
||||
# consisting of a base character and other combining characters) to
|
||||
# cause it to be treated in a special way for transcoding. These always
|
||||
# terminate a combining-character sequence.
|
||||
#
|
||||
# Two transcoding hints are used in this mapping table: a grouping hint
|
||||
# and a variant tag:
|
||||
# hint:
|
||||
# 0xF86A group next 2 characters, right-left directionality
|
||||
# 0xF87F variant tag
|
||||
#
|
||||
# In Mac OS Hebrew, 0xC0 is a ligature for lamed holam. This can also
|
||||
# be represented in Mac OS Hebrew as 0xEC+0xDD, using separate
|
||||
# characters for lamed and holam. The latter sequence is mapped to
|
||||
# Unicode as 0x05DC+0x05B9, i.e. as the sequence HEBREW LETTER LAMED +
|
||||
# HEBREW POINT HOLAM. We want to map the ligature 0xC0 using the same
|
||||
# standard Unicode characters, but for round-trip fidelity we need to
|
||||
# distinguish it from the mapping of the sequence 0xEC+0xDD. Thus for
|
||||
# 0xC0 we use a grouping hint, and map as follows:
|
||||
#
|
||||
# 0xC0 -> 0xF86A+0x05DC+0x05B9
|
||||
#
|
||||
# The variant tag is used for "qamats qatan" to mark it as an alternate
|
||||
# for HEBREW POINT QAMATS, as follows:
|
||||
#
|
||||
# 0xDE -> 0x05B8+0xF87F
|
||||
#
|
||||
# b) Otherwise, use private use characters by themselves to map Mac OS
|
||||
# Hebrew characters which have no relationship to any standard Unicode
|
||||
# character.
|
||||
#
|
||||
# The following additional corporate zone Unicode characters are used
|
||||
# for this purpose here (to map the obsolete "canorals", see above):
|
||||
#
|
||||
# 0xF89B Hebrew canoral 1
|
||||
# 0xF89C Hebrew canoral 2
|
||||
# 0xF89D Hebrew canoral 3
|
||||
# 0xF89E Hebrew canoral 4
|
||||
#
|
||||
# 3. Roundtrip considerations when mapping to decomposed Unicode
|
||||
#
|
||||
# Both Mac OS Hebrew and Unicode provide multiple ways of representing
|
||||
# certain letter-and-point combinations. For example, HEBREW LETTER
|
||||
# VAV WITH HOLAM can be represented in Unicode as the single character
|
||||
# 0xFB4B or as the sequence 0x05D5 0x05B9; similarly, it can be
|
||||
# represented in Mac OS Hebrew as 0xC7 or as the sequence 0xE5 0xDD.
|
||||
# This leads to some roundtrip problems. First note that we have the
|
||||
# following mappings without such problems:
|
||||
#
|
||||
# Mac standard decomp. of reverse map
|
||||
# OS Unicode mapping std. mapping of decomp.
|
||||
# ---- ---------------------------------- ------------- -----------
|
||||
# 0xC6 0x05BC ... POINT DAGESH OR MAPIQ 0x05BC (same) 0xC6
|
||||
# 0xE5 0x05D5 ... LETTER VAV 0x05D5 (same) 0xE5
|
||||
# 0xDD 0x05B9 ... POINT HOLAM 0x05B9 (same) 0xDD
|
||||
#
|
||||
# However, those mappings above cause roundtrip problems for the
|
||||
# the following mappings if they are decomposed:
|
||||
#
|
||||
# Mac standard decomp. of reverse map
|
||||
# OS Unicode mapping std. mapping of decomp.
|
||||
# ---- ---------------------------------- ------------- -----------
|
||||
# 0xC7 0xFB4B ... LETTER VAV WITH HOLAM 0x05D5 0x05B9 0xE5 0xDD
|
||||
# 0xC8 0xFB35 ... LETTER VAV WITH DAGESH 0x05D5 0x05BC 0xE5 0xC6
|
||||
#
|
||||
# One solution is to use a grouping transcoding hint with the two
|
||||
# decompositions above to mark the decomposed sequence for special
|
||||
# treatment in transcoding. This yields the following mappings to
|
||||
# decomposed Unicode:
|
||||
#
|
||||
# Mac decomposed
|
||||
# OS Unicode mapping
|
||||
# ---- --------------------
|
||||
# 0xC7 0xF86A 0x05D5 0x05B9
|
||||
# 0xC8 0xF86A 0x05D5 0x05BC
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Stop specifying left-right context for digits 0x30-0x39, since the
|
||||
# corresponding Unicodes 0x0030-0x0039 already have left-right
|
||||
# directionality.
|
||||
#
|
||||
# - Change mapping of 0x81 from 0xFB1F HEBREW LIGATURE YIDDISH YOD YOD
|
||||
# PATAH to its canonical decomposition 0x05F2+0x05B7 to improve
|
||||
# cross-platform compatibility (Windows doesn't handle 0xFB1F)
|
||||
#
|
||||
# - Interchange the mappings of 0xA8 and 0xA9 to obtain the correct
|
||||
# open/close behavior; they work differently than in Mac Arabic.
|
||||
# The old mapping was
|
||||
# 0xA8 <RL>+0x0028 # LEFT PARENTHESIS, right-left
|
||||
# 0xA9 <RL>+0x0029 # RIGHT PARENTHESIS, right-left
|
||||
# and the new mapping is
|
||||
# 0xA8 <RL>+0x0029 # RIGHT PARENTHESIS, right-left
|
||||
# 0xA9 <RL>+0x0028 # LEFT PARENTHESIS, right-left
|
||||
#
|
||||
# Changes from version n01 to version n03:
|
||||
#
|
||||
# - Change mapping for 0xC0 from single corporate character to
|
||||
# grouping hint plus standard Unicodes
|
||||
#
|
||||
# - Change mapping for 0xDE from single corporate character to
|
||||
# standard Unicode plus variant tag
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 <LR>+0x0020 # SPACE, left-right
|
||||
0x21 <LR>+0x0021 # EXCLAMATION MARK, left-right
|
||||
0x22 <LR>+0x0022 # QUOTATION MARK, left-right
|
||||
0x23 <LR>+0x0023 # NUMBER SIGN, left-right
|
||||
0x24 <LR>+0x0024 # DOLLAR SIGN, left-right
|
||||
0x25 <LR>+0x0025 # PERCENT SIGN, left-right
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 <LR>+0x0027 # APOSTROPHE, left-right
|
||||
0x28 <LR>+0x0028 # LEFT PARENTHESIS, left-right
|
||||
0x29 <LR>+0x0029 # RIGHT PARENTHESIS, left-right
|
||||
0x2A <LR>+0x002A # ASTERISK, left-right
|
||||
0x2B <LR>+0x002B # PLUS SIGN, left-right
|
||||
0x2C <LR>+0x002C # COMMA, left-right
|
||||
0x2D <LR>+0x002D # HYPHEN-MINUS, left-right
|
||||
0x2E <LR>+0x002E # FULL STOP, left-right
|
||||
0x2F <LR>+0x002F # SOLIDUS, left-right
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A <LR>+0x003A # COLON, left-right
|
||||
0x3B <LR>+0x003B # SEMICOLON, left-right
|
||||
0x3C <LR>+0x003C # LESS-THAN SIGN, left-right
|
||||
0x3D <LR>+0x003D # EQUALS SIGN, left-right
|
||||
0x3E <LR>+0x003E # GREATER-THAN SIGN, left-right
|
||||
0x3F <LR>+0x003F # QUESTION MARK, left-right
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B <LR>+0x005B # LEFT SQUARE BRACKET, left-right
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D <LR>+0x005D # RIGHT SQUARE BRACKET, left-right
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B <LR>+0x007B # LEFT CURLY BRACKET, left-right
|
||||
0x7C <LR>+0x007C # VERTICAL LINE, left-right
|
||||
0x7D <LR>+0x007D # RIGHT CURLY BRACKET, left-right
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x05F2+0x05B7 # HEBREW LIGATURE YIDDISH YOD YOD PATAH
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 <RL>+0x0020 # SPACE, right-left
|
||||
0xA1 <RL>+0x0021 # EXCLAMATION MARK, right-left
|
||||
0xA2 <RL>+0x0022 # QUOTATION MARK, right-left
|
||||
0xA3 <RL>+0x0023 # NUMBER SIGN, right-left
|
||||
0xA4 <RL>+0x0024 # DOLLAR SIGN, right-left
|
||||
0xA5 <RL>+0x0025 # PERCENT SIGN, right-left
|
||||
0xA6 0x20AA # NEW SHEQEL SIGN
|
||||
0xA7 <RL>+0x0027 # APOSTROPHE, right-left
|
||||
0xA8 <RL>+0x0029 # RIGHT PARENTHESIS, right-left # close parenthesis
|
||||
0xA9 <RL>+0x0028 # LEFT PARENTHESIS, right-left # open parenthesis
|
||||
0xAA <RL>+0x002A # ASTERISK, right-left
|
||||
0xAB <RL>+0x002B # PLUS SIGN, right-left
|
||||
0xAC <RL>+0x002C # COMMA, right-left
|
||||
0xAD <RL>+0x002D # HYPHEN-MINUS, right-left
|
||||
0xAE <RL>+0x002E # FULL STOP, right-left
|
||||
0xAF <RL>+0x002F # SOLIDUS, right-left
|
||||
0xB0 <RL>+0x0030 # DIGIT ZERO, right-left (need override)
|
||||
0xB1 <RL>+0x0031 # DIGIT ONE, right-left (need override)
|
||||
0xB2 <RL>+0x0032 # DIGIT TWO, right-left (need override)
|
||||
0xB3 <RL>+0x0033 # DIGIT THREE, right-left (need override)
|
||||
0xB4 <RL>+0x0034 # DIGIT FOUR, right-left (need override)
|
||||
0xB5 <RL>+0x0035 # DIGIT FIVE, right-left (need override)
|
||||
0xB6 <RL>+0x0036 # DIGIT SIX, right-left (need override)
|
||||
0xB7 <RL>+0x0037 # DIGIT SEVEN, right-left (need override)
|
||||
0xB8 <RL>+0x0038 # DIGIT EIGHT, right-left (need override)
|
||||
0xB9 <RL>+0x0039 # DIGIT NINE, right-left (need override)
|
||||
0xBA <RL>+0x003A # COLON, right-left
|
||||
0xBB <RL>+0x003B # SEMICOLON, right-left
|
||||
0xBC <RL>+0x003C # LESS-THAN SIGN, right-left
|
||||
0xBD <RL>+0x003D # EQUALS SIGN, right-left
|
||||
0xBE <RL>+0x003E # GREATER-THAN SIGN, right-left
|
||||
0xBF <RL>+0x003F # QUESTION MARK, right-left
|
||||
0xC0 0xF86A+0x05DC+0x05B9 # Hebrew ligature lamed holam
|
||||
0xC1 <RL>+0x201E # DOUBLE LOW-9 QUOTATION MARK, right-left
|
||||
0xC2 0xF89B # Hebrew canoral 1
|
||||
0xC3 0xF89C # Hebrew canoral 2
|
||||
0xC4 0xF89D # Hebrew canoral 3
|
||||
0xC5 0xF89E # Hebrew canoral 4
|
||||
0xC6 0x05BC # HEBREW POINT DAGESH OR MAPIQ
|
||||
0xC7 0xFB4B # HEBREW LETTER VAV WITH HOLAM
|
||||
0xC8 0xFB35 # HEBREW LETTER VAV WITH DAGESH
|
||||
0xC9 <RL>+0x2026 # HORIZONTAL ELLIPSIS, right-left
|
||||
0xCA <RL>+0x00A0 # NO-BREAK SPACE, right-left
|
||||
0xCB 0x05B8 # HEBREW POINT QAMATS
|
||||
0xCC 0x05B7 # HEBREW POINT PATAH
|
||||
0xCD 0x05B5 # HEBREW POINT TSERE
|
||||
0xCE 0x05B6 # HEBREW POINT SEGOL
|
||||
0xCF 0x05B4 # HEBREW POINT HIRIQ
|
||||
0xD0 <RL>+0x2013 # EN DASH, right-left
|
||||
0xD1 <RL>+0x2014 # EM DASH, right-left
|
||||
0xD2 <RL>+0x201C # LEFT DOUBLE QUOTATION MARK, right-left
|
||||
0xD3 <RL>+0x201D # RIGHT DOUBLE QUOTATION MARK, right-left
|
||||
0xD4 <RL>+0x2018 # LEFT SINGLE QUOTATION MARK, right-left
|
||||
0xD5 <RL>+0x2019 # RIGHT SINGLE QUOTATION MARK, right-left
|
||||
0xD6 0xFB2A # HEBREW LETTER SHIN WITH SHIN DOT
|
||||
0xD7 0xFB2B # HEBREW LETTER SHIN WITH SIN DOT
|
||||
0xD8 0x05BF # HEBREW POINT RAFE
|
||||
0xD9 0x05B0 # HEBREW POINT SHEVA
|
||||
0xDA 0x05B2 # HEBREW POINT HATAF PATAH
|
||||
0xDB 0x05B1 # HEBREW POINT HATAF SEGOL
|
||||
0xDC 0x05BB # HEBREW POINT QUBUTS
|
||||
0xDD 0x05B9 # HEBREW POINT HOLAM
|
||||
0xDE 0x05B8+0xF87F # HEBREW POINT QAMATS, alternate form "qamats qatan"
|
||||
0xDF 0x05B3 # HEBREW POINT HATAF QAMATS
|
||||
0xE0 0x05D0 # HEBREW LETTER ALEF
|
||||
0xE1 0x05D1 # HEBREW LETTER BET
|
||||
0xE2 0x05D2 # HEBREW LETTER GIMEL
|
||||
0xE3 0x05D3 # HEBREW LETTER DALET
|
||||
0xE4 0x05D4 # HEBREW LETTER HE
|
||||
0xE5 0x05D5 # HEBREW LETTER VAV
|
||||
0xE6 0x05D6 # HEBREW LETTER ZAYIN
|
||||
0xE7 0x05D7 # HEBREW LETTER HET
|
||||
0xE8 0x05D8 # HEBREW LETTER TET
|
||||
0xE9 0x05D9 # HEBREW LETTER YOD
|
||||
0xEA 0x05DA # HEBREW LETTER FINAL KAF
|
||||
0xEB 0x05DB # HEBREW LETTER KAF
|
||||
0xEC 0x05DC # HEBREW LETTER LAMED
|
||||
0xED 0x05DD # HEBREW LETTER FINAL MEM
|
||||
0xEE 0x05DE # HEBREW LETTER MEM
|
||||
0xEF 0x05DF # HEBREW LETTER FINAL NUN
|
||||
0xF0 0x05E0 # HEBREW LETTER NUN
|
||||
0xF1 0x05E1 # HEBREW LETTER SAMEKH
|
||||
0xF2 0x05E2 # HEBREW LETTER AYIN
|
||||
0xF3 0x05E3 # HEBREW LETTER FINAL PE
|
||||
0xF4 0x05E4 # HEBREW LETTER PE
|
||||
0xF5 0x05E5 # HEBREW LETTER FINAL TSADI
|
||||
0xF6 0x05E6 # HEBREW LETTER TSADI
|
||||
0xF7 0x05E7 # HEBREW LETTER QOF
|
||||
0xF8 0x05E8 # HEBREW LETTER RESH
|
||||
0xF9 0x05E9 # HEBREW LETTER SHIN
|
||||
0xFA 0x05EA # HEBREW LETTER TAV
|
||||
0xFB <RL>+0x007D # RIGHT CURLY BRACKET, right-left
|
||||
0xFC <RL>+0x005D # RIGHT SQUARE BRACKET, right-left
|
||||
0xFD <RL>+0x007B # LEFT CURLY BRACKET, right-left
|
||||
0xFE <RL>+0x005B # LEFT SQUARE BRACKET, right-left
|
||||
0xFF <RL>+0x007C # VERTICAL LINE, right-left
|
369
charmap/ICELAND.TXT
Normal file
|
@ -0,0 +1,369 @@
|
|||
#=======================================================================
|
||||
# File name: ICELAND.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Icelandic
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b3>.
|
||||
# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change
|
||||
# mapping of 0xDB from CURRENCY SIGN to EURO
|
||||
# SIGN. Update contact e-mail address. Matches
|
||||
# internal utom<b2>, ufrm<b2>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n06 1998-Feb-05 Minor update to header comments, add
|
||||
# information on font variants
|
||||
# n03 1997-Dec-14 Update to match internal utom<n4>, ufrm<n16>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# n02 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n5>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Icelandic code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Icelandic code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Icelandic character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Icelandic:
|
||||
# --------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# 1. General
|
||||
#
|
||||
# Mac OS Icelandic is used for Icelandic and Faroese.
|
||||
#
|
||||
# The Mac OS Icelandic encoding shares the script code smRoman
|
||||
# (0) with the standard Mac OS Roman encoding. To determine if
|
||||
# the Icelandic encoding is being used, you must also check if
|
||||
# the system region code is 21, verIceland.
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman,
|
||||
# adding upper and lower eth, thorn, and Y acute. It has 6 code
|
||||
# point differences from standard Mac OS Roman.
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts are updated for Mac OS 8.5 to reflect this. There are
|
||||
# "currency sign" variants of the Mac OS Icelandic encoding that
|
||||
# still map 0xDB to U+00A4; these can be used for older fonts.
|
||||
#
|
||||
# 2. Font variants
|
||||
#
|
||||
# The table in this file gives the Unicode mappings for the standard
|
||||
# Mac OS Icelandic encoding. This encoding is supported by the
|
||||
# Icelandic versions of the fonts Chicago, Geneva, Monaco, and New
|
||||
# York, and is the encoding supported by the text processing
|
||||
# utilities. However, other TrueType fonts implement a slightly
|
||||
# different encoding; the difference is only in two code points.
|
||||
# For the standard variant, these are:
|
||||
# 0xBB -> 0x00AA FEMININE ORDINAL INDICATOR
|
||||
# 0xBC -> 0x00BA MASCULINE ORDINAL INDICATOR
|
||||
#
|
||||
# For the TrueType variant (used by the Icelandic versions of the
|
||||
# fonts Courier, Helvetica, Palatino, and Times), these are:
|
||||
# 0xBB -> 0xFB01 LATIN SMALL LIGATURE FI
|
||||
# 0xBC -> 0xFB02 LATIN SMALL LIGATURE FL
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n06 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||
#
|
||||
# Changes from version n02 to version n03:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x2044 # FRACTION SLASH
|
||||
0xDB 0x20AC # EURO SIGN
|
||||
0xDC 0x00D0 # LATIN CAPITAL LETTER ETH
|
||||
0xDD 0x00F0 # LATIN SMALL LETTER ETH
|
||||
0xDE 0x00DE # LATIN CAPITAL LETTER THORN
|
||||
0xDF 0x00FE # LATIN SMALL LETTER THORN
|
||||
0xE0 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0xF8FF # Apple logo
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0xF7 0x02DC # SMALL TILDE
|
||||
0xF8 0x00AF # MACRON
|
||||
0xF9 0x02D8 # BREVE
|
||||
0xFA 0x02D9 # DOT ABOVE
|
||||
0xFB 0x02DA # RING ABOVE
|
||||
0xFC 0x00B8 # CEDILLA
|
||||
0xFD 0x02DD # DOUBLE ACUTE ACCENT
|
||||
0xFE 0x02DB # OGONEK
|
||||
0xFF 0x02C7 # CARON
|
322
charmap/INUIT.TXT
Normal file
|
@ -0,0 +1,322 @@
|
|||
#=======================================================================
|
||||
# File name: INUIT.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Inuit
|
||||
# character set to Unicode 3.0 and later
|
||||
#
|
||||
# Contacts: charsets@apple.com, everson@evertype.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c01 2005-Apr-01 First posted version. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Inuit code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Inuit code order.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Inuit character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Inuit (partly from Michael Everson):
|
||||
# ----------------------------------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# This character set was developed by Michael Everson of Everson
|
||||
# Typography (everson@evertype.com) and was used for the Inuktitut
|
||||
# localizations of Mac OS, as well as for the Inuktitut utilities
|
||||
# package from Everson Typography. Note that while Apple authorized
|
||||
# the Inuktitut localization mentioned above, it was not shipped with
|
||||
# Apple hardware, and was not otherwise supported by Apple. Fonts
|
||||
# conforming to the Mac OS Inuit character set are available from
|
||||
# Everson Typography (http://www.evertype.com/software/apple/).
|
||||
# Information about the use of this character set is available at
|
||||
# http://www.evertype.com/standards/iu/.
|
||||
#
|
||||
# The Mac OS Inuit character set shares the script code smEthiopic
|
||||
# (28) with the Ethiopic encoding. To determine if the Inuktitut
|
||||
# encoding is being used, you must also check if the system region
|
||||
# code is 78, verNunavut.
|
||||
#
|
||||
# The Mac OS Inuit character set includes the full syllabic letter
|
||||
# repertoire required for Inuktitut; it is a subset of the Unified
|
||||
# Canadian Aboriginal Syllabics set encoded in Unicode. The encoding
|
||||
# is InuitSCII, designed by Doug Hitch for the Government of the
|
||||
# Northwest Territories.
|
||||
#
|
||||
# The Mac OS Inuit character set also includes a number of characters
|
||||
# that were needed for the classic Mac OS user interface and
|
||||
# localization (e.g. ellipsis, bullet, copyright sign). All of the
|
||||
# characters in Mac OS Inuit that are also in the Mac OS Roman
|
||||
# encoding are at the same code point in both; this improves
|
||||
# application compatibility.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x1403 # CANADIAN SYLLABICS I
|
||||
0x81 0x1404 # CANADIAN SYLLABICS II
|
||||
0x82 0x1405 # CANADIAN SYLLABICS O
|
||||
0x83 0x1406 # CANADIAN SYLLABICS OO
|
||||
0x84 0x140A # CANADIAN SYLLABICS A
|
||||
0x85 0x140B # CANADIAN SYLLABICS AA
|
||||
0x86 0x1431 # CANADIAN SYLLABICS PI
|
||||
0x87 0x1432 # CANADIAN SYLLABICS PII
|
||||
0x88 0x1433 # CANADIAN SYLLABICS PO
|
||||
0x89 0x1434 # CANADIAN SYLLABICS POO
|
||||
0x8A 0x1438 # CANADIAN SYLLABICS PA
|
||||
0x8B 0x1439 # CANADIAN SYLLABICS PAA
|
||||
0x8C 0x1449 # CANADIAN SYLLABICS P
|
||||
0x8D 0x144E # CANADIAN SYLLABICS TI
|
||||
0x8E 0x144F # CANADIAN SYLLABICS TII
|
||||
0x8F 0x1450 # CANADIAN SYLLABICS TO
|
||||
0x90 0x1451 # CANADIAN SYLLABICS TOO
|
||||
0x91 0x1455 # CANADIAN SYLLABICS TA
|
||||
0x92 0x1456 # CANADIAN SYLLABICS TAA
|
||||
0x93 0x1466 # CANADIAN SYLLABICS T
|
||||
0x94 0x146D # CANADIAN SYLLABICS KI
|
||||
0x95 0x146E # CANADIAN SYLLABICS KII
|
||||
0x96 0x146F # CANADIAN SYLLABICS KO
|
||||
0x97 0x1470 # CANADIAN SYLLABICS KOO
|
||||
0x98 0x1472 # CANADIAN SYLLABICS KA
|
||||
0x99 0x1473 # CANADIAN SYLLABICS KAA
|
||||
0x9A 0x1483 # CANADIAN SYLLABICS K
|
||||
0x9B 0x148B # CANADIAN SYLLABICS CI
|
||||
0x9C 0x148C # CANADIAN SYLLABICS CII
|
||||
0x9D 0x148D # CANADIAN SYLLABICS CO
|
||||
0x9E 0x148E # CANADIAN SYLLABICS COO
|
||||
0x9F 0x1490 # CANADIAN SYLLABICS CA
|
||||
0xA0 0x1491 # CANADIAN SYLLABICS CAA
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x14A1 # CANADIAN SYLLABICS C
|
||||
0xA3 0x14A5 # CANADIAN SYLLABICS MI
|
||||
0xA4 0x14A6 # CANADIAN SYLLABICS MII
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x14A7 # CANADIAN SYLLABICS MO
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x14A8 # CANADIAN SYLLABICS MOO
|
||||
0xAC 0x14AA # CANADIAN SYLLABICS MA
|
||||
0xAD 0x14AB # CANADIAN SYLLABICS MAA
|
||||
0xAE 0x14BB # CANADIAN SYLLABICS M
|
||||
0xAF 0x14C2 # CANADIAN SYLLABICS NI
|
||||
0xB0 0x14C3 # CANADIAN SYLLABICS NII
|
||||
0xB1 0x14C4 # CANADIAN SYLLABICS NO
|
||||
0xB2 0x14C5 # CANADIAN SYLLABICS NOO
|
||||
0xB3 0x14C7 # CANADIAN SYLLABICS NA
|
||||
0xB4 0x14C8 # CANADIAN SYLLABICS NAA
|
||||
0xB5 0x14D0 # CANADIAN SYLLABICS N
|
||||
0xB6 0x14EF # CANADIAN SYLLABICS SI
|
||||
0xB7 0x14F0 # CANADIAN SYLLABICS SII
|
||||
0xB8 0x14F1 # CANADIAN SYLLABICS SO
|
||||
0xB9 0x14F2 # CANADIAN SYLLABICS SOO
|
||||
0xBA 0x14F4 # CANADIAN SYLLABICS SA
|
||||
0xBB 0x14F5 # CANADIAN SYLLABICS SAA
|
||||
0xBC 0x1505 # CANADIAN SYLLABICS S
|
||||
0xBD 0x14D5 # CANADIAN SYLLABICS LI
|
||||
0xBE 0x14D6 # CANADIAN SYLLABICS LII
|
||||
0xBF 0x14D7 # CANADIAN SYLLABICS LO
|
||||
0xC0 0x14D8 # CANADIAN SYLLABICS LOO
|
||||
0xC1 0x14DA # CANADIAN SYLLABICS LA
|
||||
0xC2 0x14DB # CANADIAN SYLLABICS LAA
|
||||
0xC3 0x14EA # CANADIAN SYLLABICS L
|
||||
0xC4 0x1528 # CANADIAN SYLLABICS YI
|
||||
0xC5 0x1529 # CANADIAN SYLLABICS YII
|
||||
0xC6 0x152A # CANADIAN SYLLABICS YO
|
||||
0xC7 0x152B # CANADIAN SYLLABICS YOO
|
||||
0xC8 0x152D # CANADIAN SYLLABICS YA
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x152E # CANADIAN SYLLABICS YAA
|
||||
0xCC 0x153E # CANADIAN SYLLABICS Y
|
||||
0xCD 0x1555 # CANADIAN SYLLABICS FI
|
||||
0xCE 0x1556 # CANADIAN SYLLABICS FII
|
||||
0xCF 0x1557 # CANADIAN SYLLABICS FO
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x1558 # CANADIAN SYLLABICS FOO
|
||||
0xD7 0x1559 # CANADIAN SYLLABICS FA
|
||||
0xD8 0x155A # CANADIAN SYLLABICS FAA
|
||||
0xD9 0x155D # CANADIAN SYLLABICS F
|
||||
0xDA 0x1546 # CANADIAN SYLLABICS RI
|
||||
0xDB 0x1547 # CANADIAN SYLLABICS RII
|
||||
0xDC 0x1548 # CANADIAN SYLLABICS RO
|
||||
0xDD 0x1549 # CANADIAN SYLLABICS ROO
|
||||
0xDE 0x154B # CANADIAN SYLLABICS RA
|
||||
0xDF 0x154C # CANADIAN SYLLABICS RAA
|
||||
0xE0 0x1550 # CANADIAN SYLLABICS R
|
||||
0xE1 0x157F # CANADIAN SYLLABICS QI
|
||||
0xE2 0x1580 # CANADIAN SYLLABICS QII
|
||||
0xE3 0x1581 # CANADIAN SYLLABICS QO
|
||||
0xE4 0x1582 # CANADIAN SYLLABICS QOO
|
||||
0xE5 0x1583 # CANADIAN SYLLABICS QA
|
||||
0xE6 0x1584 # CANADIAN SYLLABICS QAA
|
||||
0xE7 0x1585 # CANADIAN SYLLABICS Q
|
||||
0xE8 0x158F # CANADIAN SYLLABICS NGI
|
||||
0xE9 0x1590 # CANADIAN SYLLABICS NGII
|
||||
0xEA 0x1591 # CANADIAN SYLLABICS NGO
|
||||
0xEB 0x1592 # CANADIAN SYLLABICS NGOO
|
||||
0xEC 0x1593 # CANADIAN SYLLABICS NGA
|
||||
0xED 0x1594 # CANADIAN SYLLABICS NGAA
|
||||
0xEE 0x1595 # CANADIAN SYLLABICS NG
|
||||
0xEF 0x1671 # CANADIAN SYLLABICS NNGI
|
||||
0xF0 0x1672 # CANADIAN SYLLABICS NNGII
|
||||
0xF1 0x1673 # CANADIAN SYLLABICS NNGO
|
||||
0xF2 0x1674 # CANADIAN SYLLABICS NNGOO
|
||||
0xF3 0x1675 # CANADIAN SYLLABICS NNGA
|
||||
0xF4 0x1676 # CANADIAN SYLLABICS NNGAA
|
||||
0xF5 0x1596 # CANADIAN SYLLABICS NNG
|
||||
0xF6 0x15A0 # CANADIAN SYLLABICS LHI
|
||||
0xF7 0x15A1 # CANADIAN SYLLABICS LHII
|
||||
0xF8 0x15A2 # CANADIAN SYLLABICS LHO
|
||||
0xF9 0x15A3 # CANADIAN SYLLABICS LHOO
|
||||
0xFA 0x15A4 # CANADIAN SYLLABICS LHA
|
||||
0xFB 0x15A5 # CANADIAN SYLLABICS LHAA
|
||||
0xFC 0x15A6 # CANADIAN SYLLABICS LH
|
||||
0xFD 0x157C # CANADIAN SYLLABICS NUNAVUT H
|
||||
0xFE 0x0141 # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0xFF 0x0142 # LATIN SMALL LETTER L WITH STROKE
|
7728
charmap/JAPANESE.TXT
Normal file
234
charmap/KEYBOARD.TXT
Normal file
|
@ -0,0 +1,234 @@
|
|||
#=======================================================================
|
||||
# File name: KEYBOARD.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Keyboard
|
||||
# character set to Unicode 4.0 and later.
|
||||
#
|
||||
# Copyright: (c) 2001-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Change mappings for 0x09, 0x0F, 0x8C; add
|
||||
# Mac OS X-only mappings for 0x8D-9x8F.
|
||||
# Update header comments, including
|
||||
# clarification of Mac OS X usage. Matches
|
||||
# internal xml <c1.2> and Text Encoding
|
||||
# Converter 2.0.
|
||||
# b1,c1 2002-Dec-19 First version. Matches internal utom<b6>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Keyboard code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN, etc.).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
# In some cases an additional comment follows the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Keyboard code order.
|
||||
#
|
||||
# Some of these mappings require the use of corporate characters.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# The Mac OS Keyboard character set uses the ranges normally set aside
|
||||
# for controls, so those ranges are present in this table.
|
||||
#
|
||||
# Notes on Mac OS Keyboard:
|
||||
# -------------------------
|
||||
#
|
||||
# This is the encoding for the legacy font named ".Keyboard". Before
|
||||
# Mac OS X, this font was used by the user-interface system to display
|
||||
# glyphs for special keys on the keyboard. In Mac OS X, that font is
|
||||
# not present and this mapping is not associated with a font; it is
|
||||
# only used as a way to map from a set of Menu Manager constants to
|
||||
# associated Unicode sequences. As such, new mappings added for Mac OS
|
||||
# X only may be one-way mappings: From the Keyboard glyph "encoding"
|
||||
# to Unicode, but not back.
|
||||
#
|
||||
# The Mac OS Keyboard encoding shares the script code smRoman
|
||||
# (0) with the Mac OS Roman encoding. To determine if the Keyboard
|
||||
# encoding is being used in Mac OS 8 or Mac OS 9, you must check if
|
||||
# the font name is ".Keyboard".
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The goals in the mappings provided here are:
|
||||
# - For mappings used in Mac OS 8 and Mac OS 9, ensure roundtrip
|
||||
# mapping from every character in the Mac OS Keyboard character set
|
||||
# to Unicode and back. This consideration does not apply to mappings
|
||||
# added for Mac OS X only (noted below).
|
||||
# - Use standard Unicode characters as much as possible, to
|
||||
# maximize interchangeability of the resulting Unicode text.
|
||||
# Whenever possible, avoid having content carried by private-use
|
||||
# characters.
|
||||
#
|
||||
# Some of the characters in the Mac OS Keyboard character set do not
|
||||
# correspond to distinct, single Unicode characters. To map these
|
||||
# and satisfy both goals above, we employ various strategies.
|
||||
#
|
||||
# a) If possible, use private use characters in combination with
|
||||
# standard Unicode characters to mark variants of the standard
|
||||
# Unicode character.
|
||||
#
|
||||
# Apple has defined a block of 32 corporate characters as "transcoding
|
||||
# hints." These are used in combination with standard Unicode
|
||||
# characters to force them to be treated in a special way for mapping
|
||||
# to other encodings; they have no other effect. Sixteen of these
|
||||
# transcoding hints are "grouping hints" - they indicate that the next
|
||||
# 2-4 Unicode characters should be treated as a single entity for
|
||||
# transcoding. The other sixteen transcoding hints are "variant tags"
|
||||
# - they are like combining characters, and can follow a standard
|
||||
# Unicode (or a sequence consisting of a base character and other
|
||||
# combining characters) to cause it to be treated in a special way for
|
||||
# transcoding. These always terminate a combining-character sequence.
|
||||
#
|
||||
# The transcoding coding hints used in this mapping table are two
|
||||
# grouping tags, 0xF860-61, and one variant tag, 0xF87F. Since these
|
||||
# are combined with standard Unicode characters, some characters in
|
||||
# the Mac OS Keyboard character set map to a sequence of two to four
|
||||
# Unicodes instead of a single Unicode character.
|
||||
#
|
||||
# For example, the Mac OS Keyboard character at 0x6F, representing the
|
||||
# F1 key, is mapped to Unicode using the grouping tag F860 (group next
|
||||
# two) followed by U+0046 (LATIN CAPITAL LETTER F) and U+0031 (DIGIT
|
||||
# ONE).
|
||||
#
|
||||
# b) Otherwise, use private use characters by themselves to map Mac OS
|
||||
# Keyboard characters which have no relationship to any standard
|
||||
# Unicode character.
|
||||
#
|
||||
# The following additional corporate zone Unicode characters are
|
||||
# used for this purpose here:
|
||||
#
|
||||
# 0xF802 Lower left pencil
|
||||
# 0xF803 Contextual menu key symbol
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version c01 to version c02:
|
||||
#
|
||||
# - Mapping for 0x09 changed from 0x0009 (wrong) to 0x2423
|
||||
# - Mapping for 0x0F changed from 0x270E (wrong) to 0xF802
|
||||
# - Mapping for 0x8C changed from 0xF804 to 0x23CF (Unicode 4.0)
|
||||
# - Add Mac OS X-only mappings for 0x8D-0x8F
|
||||
#
|
||||
##################
|
||||
|
||||
0x00 0x0000 # control - NUL
|
||||
#
|
||||
0x02 0x21E5 # RIGHTWARDS ARROW TO BAR # Tab right (left-to-right text)
|
||||
0x03 0x21E4 # LEFTWARDS ARROW TO BAR # Tab left (right-to-left text)
|
||||
0x04 0x2324 # UP ARROWHEAD BETWEEN TWO HORIZONTAL BARS # Enter key
|
||||
0x05 0x21E7 # UPWARDS WHITE ARROW # Shift key
|
||||
0x06 0x2303 # UP ARROWHEAD # Control key
|
||||
0x07 0x2325 # OPTION KEY # Option key
|
||||
0x08 0x0008 # control - BS
|
||||
0x09 0x2423 # OPEN BOX # Space key (Mac OS X mapping, duplicates mapping for 0x61, hence no round-trip)
|
||||
0x0A 0x2326 # ERASE TO THE RIGHT # Delete right (right-to-left text)
|
||||
0x0B 0x21A9 # LEFTWARDS ARROW WITH HOOK # Return key (left-to-right text)
|
||||
0x0C 0x21AA # RIGHTWARDS ARROW WITH HOOK # Return key (right-to-left text)
|
||||
0x0D 0x000D # control - CR
|
||||
#
|
||||
0x0F 0xF802 # lower left pencil
|
||||
0x10 0x21E3 # DOWNWARDS DASHED ARROW
|
||||
0x11 0x2318 # PLACE OF INTEREST SIGN # Command key
|
||||
0x12 0x2713 # CHECK MARK
|
||||
0x13 0x25C6 # BLACK DIAMOND
|
||||
0x14 0xF8FF # Apple logo
|
||||
#
|
||||
0x17 0x232B # ERASE TO THE LEFT # Delete left (left-to-right text)
|
||||
0x18 0x21E0 # LEFTWARDS DASHED ARROW
|
||||
0x19 0x21E1 # UPWARDS DASHED ARROW
|
||||
0x1A 0x21E2 # RIGHTWARDS DASHED ARROW
|
||||
0x1B 0x238B # BROKEN CIRCLE WITH NORTHWEST ARROW # Escape key; for Unicode 3.0 and later
|
||||
0x1C 0x2327 # X IN A RECTANGLE BOX # Clear key
|
||||
#
|
||||
0x20 0x0020 # SPACE
|
||||
#
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
#
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
#
|
||||
0x61 0x2423 # OPEN BOX # Blank key
|
||||
0x62 0x21DE # UPWARDS ARROW WITH DOUBLE STROKE # Page up key
|
||||
0x63 0x21EA # UPWARDS WHITE ARROW FROM BAR # Caps lock key
|
||||
0x64 0x2190 # LEFTWARDS ARROW
|
||||
0x65 0x2192 # RIGHTWARDS ARROW
|
||||
0x66 0x2196 # NORTH WEST ARROW
|
||||
0x67 0x003F+0x20DD # QUESTION MARK + COMBINING ENCLOSING CIRCLE # Help key
|
||||
0x68 0x2191 # UPWARDS ARROW
|
||||
0x69 0x2198 # SOUTH EAST ARROW
|
||||
0x6A 0x2193 # DOWNWARDS ARROW
|
||||
0x6B 0x21DF # DOWNWARDS ARROW WITH DOUBLE STROKE # Page down key
|
||||
0x6C 0xF8FF+0xF87F # Apple logo, outline
|
||||
0x6D 0xF803 # Contextual menu key symbol
|
||||
0x6E 0x2758+0x20DD # LIGHT VERTICAL BAR + COMBINING ENCLOSING CIRCLE # Power key
|
||||
0x6F 0xF860+0x0046+0x0031 # group_2 + F + 1 # F1 key
|
||||
0x70 0xF860+0x0046+0x0032 # group_2 + F + 2 # F2 key
|
||||
0x71 0xF860+0x0046+0x0033 # group_2 + F + 3 # F3 key
|
||||
0x72 0xF860+0x0046+0x0034 # group_2 + F + 4 # F4 key
|
||||
0x73 0xF860+0x0046+0x0035 # group_2 + F + 5 # F5 key
|
||||
0x74 0xF860+0x0046+0x0036 # group_2 + F + 6 # F6 key
|
||||
0x75 0xF860+0x0046+0x0037 # group_2 + F + 7 # F7 key
|
||||
0x76 0xF860+0x0046+0x0038 # group_2 + F + 8 # F8 key
|
||||
0x77 0xF860+0x0046+0x0039 # group_2 + F + 9 # F9 key
|
||||
0x78 0xF861+0x0046+0x0031+0x0030 # group_3 + F + 1 + 0 # F10 key
|
||||
0x79 0xF861+0x0046+0x0031+0x0031 # group_3 + F + 1 + 1 # F11 key
|
||||
0x7A 0xF861+0x0046+0x0031+0x0032 # group_3 + F + 1 + 2 # F12 key
|
||||
#
|
||||
0x87 0xF861+0x0046+0x0031+0x0033 # group_3 + F + 1 + 3 # F13 key
|
||||
0x88 0xF861+0x0046+0x0031+0x0034 # group_3 + F + 1 + 4 # F14 key
|
||||
0x89 0xF861+0x0046+0x0031+0x0035 # group_3 + F + 1 + 5 # F15 key
|
||||
0x8A 0x2388 # HELM SYMBOL # Control key (ISO standard), Unicode 3.0 and later
|
||||
0x8B 0x2387 # ALTERNATIVE KEY SYMBOL # Unicode 3.0 and later
|
||||
0x8C 0x23CF # EJECT SYMBOL # Unicode 4.0 and later, Mac OS X only
|
||||
0x8D 0x82F1+0x6570 # Japanese "eisu" key symbol # Mac OS X only
|
||||
0x8E 0x304B+0x306A # Japanese "kana" key symbol # Mac OS X only
|
||||
0x8F 0xF861+0x0046+0x0031+0x0036 # group_3 + F + 1 + 6 # F16 key, Mac OS X only
|
||||
#
|
9942
charmap/KOREAN.TXT
Normal file
370
charmap/ROMAN.TXT
Normal file
|
@ -0,0 +1,370 @@
|
|||
#=======================================================================
|
||||
# File name: ROMAN.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Roman
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b4,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b5>.
|
||||
# b03 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b4>, ufrm<b3>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; change
|
||||
# mapping of 0xDB from CURRENCY SIGN to
|
||||
# EURO SIGN. Matches internal utom<b3>,
|
||||
# ufrm<b3>.
|
||||
# n08 1998-Feb-05 Minor update to header comments
|
||||
# n06 1997-Dec-14 Add warning about future changes to 0xDB
|
||||
# from CURRENCY SIGN to EURO SIGN. Clarify
|
||||
# some header information
|
||||
# n04 1997-Dec-01 Update to match internal utom<n3>, ufrm<n22>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n9>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Roman code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Roman code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Roman character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Roman:
|
||||
# ----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
# interfaces for QuickDraw Text, the Script Manager, and related
|
||||
# Text Utilities. For other purposes it is supported via transcoding
|
||||
# to and from Unicode.
|
||||
#
|
||||
# This character set is used for at least the following Mac OS
|
||||
# localizations: U.S., British, Canadian French, French, Swiss
|
||||
# French, German, Swiss German, Italian, Swiss Italian, Dutch,
|
||||
# Swedish, Norwegian, Danish, Finnish, Spanish, Catalan,
|
||||
# Portuguese, Brazilian, and the default International system.
|
||||
#
|
||||
# Variants of Mac OS Roman are used for Croatian, Icelandic,
|
||||
# Turkish, Romanian, and other encodings. Separate mapping tables
|
||||
# are available for these encodings.
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
|
||||
# a "currency sign" variant of the Mac OS Roman encoding that still
|
||||
# maps 0xDB to U+00A4; this can be used for older fonts.
|
||||
#
|
||||
# Before Mac OS 8.5, the ROM bitmap versions of the fonts Chicago,
|
||||
# New York, Geneva, and Monaco did not implement the full Mac OS
|
||||
# Roman character set; they only supported character codes up to
|
||||
# 0xD8. The TrueType versions of these fonts have always implemented
|
||||
# the full character set, as with the bitmap and TrueType versions
|
||||
# of the other standard Roman fonts.
|
||||
#
|
||||
# In all Mac OS encodings, fonts such as Chicago which are used
|
||||
# as "system" fonts (for menus, dialogs, etc.) have four glyphs
|
||||
# at code points 0x11-0x14 for transient use by the Menu Manager.
|
||||
# These glyphs are not intended as characters for use in normal
|
||||
# text, and the associated code points are not generally
|
||||
# interpreted as associated with these glyphs; they are usually
|
||||
# interpreted (if at all) as the control codes DC1-DC4.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n08 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||
#
|
||||
# Changes from version n03 to version n04:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x2044 # FRACTION SLASH
|
||||
0xDB 0x20AC # EURO SIGN
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0xFB01 # LATIN SMALL LIGATURE FI
|
||||
0xDF 0xFB02 # LATIN SMALL LIGATURE FL
|
||||
0xE0 0x2021 # DOUBLE DAGGER
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0xF8FF # Apple logo
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0xF7 0x02DC # SMALL TILDE
|
||||
0xF8 0x00AF # MACRON
|
||||
0xF9 0x02D8 # BREVE
|
||||
0xFA 0x02D9 # DOT ABOVE
|
||||
0xFB 0x02DA # RING ABOVE
|
||||
0xFC 0x00B8 # CEDILLA
|
||||
0xFD 0x02DD # DOUBLE ACUTE ACCENT
|
||||
0xFE 0x02DB # OGONEK
|
||||
0xFF 0x02C7 # CARON
|
365
charmap/ROMANIAN.TXT
Normal file
|
@ -0,0 +1,365 @@
|
|||
#=======================================================================
|
||||
# File name: ROMANIAN.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Romanian
|
||||
# character set to Unicode 3.0 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.2> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update mappings for 0xAF, 0xBF, 0xDE, 0xDF
|
||||
# to use new composed characters added in
|
||||
# Unicode 3.0. Update URLs, notes. Matches
|
||||
# internal utom<b3>.
|
||||
# b02 1999-Sep-22 Encoding changed for Mac OS 8.5; change
|
||||
# mapping of 0xDB from CURRENCY SIGN to EURO
|
||||
# SIGN. Update contact e-mail address. Matches
|
||||
# internal utom<b2>, ufrm<b2>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n05 1998-Feb-05 Minor update to header comments
|
||||
# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n16>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# Change mapping of 0xAF,0xBF,0xDE,0xDF from
|
||||
# composed S/T WITH CEDILLA to S/T with
|
||||
# COMBINING COMMA BELOW (to match our
|
||||
# decomposition mappings).
|
||||
# n02 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n4>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Romanian code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Romanian code order.
|
||||
#
|
||||
# One of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Romanian character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Romanian:
|
||||
# -------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Romanian is used only for Romanian.
|
||||
#
|
||||
# The Mac OS Romanian encoding shares the script code smRoman
|
||||
# (0) with the standard Mac OS Roman encoding. To determine if
|
||||
# the Romanian encoding is being used, you must also check if the
|
||||
# system region code is 39, verRomania.
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman, adding
|
||||
# upper and lower A breve, S comma below, and T comma below. It
|
||||
# has 6 code point differences from standard Mac OS Roman.
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xDB was CURRENCY SIGN, and was
|
||||
# mapped to U+00A4. In Mac OS 8.5 and later versions, code point
|
||||
# 0xDB is changed to EURO SIGN and maps to U+20AC; the standard
|
||||
# Apple fonts are updated for Mac OS 8.5 to reflect this. There is
|
||||
# a "currency sign" variant of the Mac OS Romanian encoding that
|
||||
# still maps 0xDB to U+00A4; this can be used for older fonts.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The following corporate zone Unicode character is used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Update the mappings for 0xAF, 0xBF, 0xDE, 0xDF to use new
|
||||
# composed Unicode characters 0x0218-0x021B added in Unicode 3.0;
|
||||
# the previous mappings were to the equivalent decomposition
|
||||
# sequences.
|
||||
#
|
||||
# Changes from version n05 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; change mapping of 0xDB from
|
||||
# CURRENCY SIGN (U+00A4) to EURO SIGN (U+20AC).
|
||||
#
|
||||
# Changes from version n02 to version n03:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
# - Change mapping of 0xAF,0xBF,0xDE,0xDF from composed S or T
|
||||
# WITH CEDILLA to S or T with COMBINING COMMA BELOW (to match
|
||||
# our decomposition mappings).
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x0102 # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0xAF 0x0218 # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x0103 # LATIN SMALL LETTER A WITH BREVE
|
||||
0xBF 0x0219 # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x2044 # FRACTION SLASH
|
||||
0xDB 0x20AC # EURO SIGN
|
||||
0xDC 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0xDD 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0xDE 0x021A # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0xDF 0x021B # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0xE0 0x2021 # DOUBLE DAGGER
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0xF8FF # Apple logo
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0xF7 0x02DC # SMALL TILDE
|
||||
0xF8 0x00AF # MACRON
|
||||
0xF9 0x02D8 # BREVE
|
||||
0xFA 0x02D9 # DOT ABOVE
|
||||
0xFB 0x02DA # RING ABOVE
|
||||
0xFC 0x00B8 # CEDILLA
|
||||
0xFD 0x02DD # DOUBLE ACUTE ACCENT
|
||||
0xFE 0x02DB # OGONEK
|
||||
0xFF 0x02C7 # CARON
|
590
charmap/ReadMe.txt
Normal file
|
@ -0,0 +1,590 @@
|
|||
#=======================================================================
|
||||
# File name: README.TXT
|
||||
#
|
||||
# Contents: Background information on Unicode mapping tables for
|
||||
# Mac OS legacy text encodings
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-04 Update discussion of roundtrip fidelity,
|
||||
# delete discussion of mappings dependent on
|
||||
# symmetric swapping (no longer supported),
|
||||
# provide information on how legacy encodings
|
||||
# are supported in Mac OS X.
|
||||
# b3,c1 2002-Dec-19 Add Keyboard font encoding. Update URLs,
|
||||
# notes.
|
||||
# b02 1999-Sep-22 Update information on Cyrillic. Update
|
||||
# contact e-mail address.
|
||||
# n07 1998-Feb-05 Rewrite to provide additional information
|
||||
# relevant to using the accompanying mapping
|
||||
# tables, and to delete some extraneous
|
||||
# information. Delete Bulgarian (no special
|
||||
# encoding, uses standard Cyrillic), add
|
||||
# Farsi, Devanagari, Gurmukhi, Gujarati,
|
||||
# Celtic, Gaelic, Inuit, Tibetan.
|
||||
# n04 1995-Nov-15 Update info for Hebrew and Thai
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
#
|
||||
##################
|
||||
|
||||
0. Preliminaries
|
||||
----------------
|
||||
|
||||
For maximum interchangeability, this file and the accompanying Mac OS
|
||||
mapping tables use only ASCII characters. They are intended to be
|
||||
displayed in a monospaced font.
|
||||
|
||||
Apple, the Apple logo, Mac, and Macintosh are trademarks of Apple
|
||||
Computer, Inc., registered in the United States and other countries.
|
||||
QuickDraw and TrueType are trademarks of Apple Computer, Inc. Unicode is
|
||||
a trademark of Unicode Inc. PostScript is a trademark of Adobe Systems
|
||||
Inc., which may be registered in certain jurisdictions. IBM is a
|
||||
registered trademark of International Business Machines Corporation. ITC
|
||||
Zapf Dingbats is a registered trademark of the International Typeface
|
||||
Corporation. For the sake of brevity, throughout this document and the
|
||||
accompanying tables, "Macintosh" can be used to refer to Macintosh
|
||||
computers and "Unicode" can be used to refer to the Unicode standard.
|
||||
|
||||
Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
either express or implied, with respect to this document and the
|
||||
accompanying tables, their quality, accuracy, or fitness for a
|
||||
particular purpose. In no event will Apple be liable for direct,
|
||||
indirect, special, incidental, or consequential damages resulting from
|
||||
any defect or inaccuracy in this document or the accompanying tables.
|
||||
|
||||
1. Introduction
|
||||
---------------
|
||||
|
||||
This document summarizes some Unicode mapping considerations that are
|
||||
relevant for the accompanying mapping tables. It also provides an
|
||||
overview of Mac OS legacy encodings.
|
||||
|
||||
These mapping tables and character lists are subject to change. The
|
||||
latest tables should be available from the following:
|
||||
|
||||
<http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
|
||||
2. Round-trip fidelity and overview of mapping techniques
|
||||
---------------------------------------------------------
|
||||
|
||||
For a particular set of national and international standards, Unicode
|
||||
provides round-trip fidelity: Text in one of those encodings can be
|
||||
mapped to Unicode and back again, yielding the original characters.
|
||||
Characters which are distinct in one of these source standards have a
|
||||
distinct counterpart in Unicode. Note that this counterpart might not be
|
||||
a single Unicode character; as is pointed out in "The Unicode Standard,
|
||||
Version 2.0" (page 2-10), "sometimes a single code value in another
|
||||
standard corresponds to a sequence of code values in the Unicode
|
||||
Standard, or vice versa."
|
||||
|
||||
However, Unicode does not attempt to provide round-trip fidelity for
|
||||
most vendor standards. Nevertheless, Apple and other platform vendors
|
||||
may need to provide such round-trip fidelity for their current platform
|
||||
encodings and/or legacy platform encodings (this can be important in
|
||||
file systems, for example). In order to do this, Apple makes use of some
|
||||
Unicode characters in the corporate-use zone (the upper end of the
|
||||
private use area).
|
||||
|
||||
Corporate-zone characters must be used with care. Indiscriminate use of
|
||||
such characters can result in text which is not easily interchanged with
|
||||
other systems, since these characters have no standard meaning outside a
|
||||
particular platform. The mappings provided here are intended to minimize
|
||||
the use of private use characters, or to use them in such a way that
|
||||
basic text content will not be lost if the corporate zone characters are
|
||||
dropped when text is transferred to another system.
|
||||
|
||||
The tables provided here have three goals, in the following order of
|
||||
importance:
|
||||
1. Provide 100% round-trip mapping from a Mac OS legacy encoding to
|
||||
Unicode and back.
|
||||
2. Map characters in a Mac OS encoding into the Unicode characters that
|
||||
best represent the interpretation and usage of the Mac OS characters.
|
||||
3. When mapping text in a Mac OS encoding to Unicode using the tables,
|
||||
the resulting Unicode text should be as interchangeable as possible.
|
||||
|
||||
To satisfy these goals, the mappings use a variety of techniques. First
|
||||
we attempt to achieve round-trip mappings using any standard Unicode
|
||||
feature at our disposal, without resorting to corporate-zone characters.
|
||||
This can includes the following techniques:
|
||||
- Use of all Unicode characters defined in Unicode 2.1 and later,
|
||||
including compatibility characters.
|
||||
- Mapping a single character in a Mac OS encoding to a sequence of
|
||||
standard Unicode characters, or vice versa. This requires grouping
|
||||
characters into appropriate chunks for lookup before mapping them
|
||||
(this mainly applies to sequences of Unicode characters).
|
||||
- Using Unicode direction overrides to force direction attributes when
|
||||
mapping to Unicode. This requires resolution of Unicode character
|
||||
direction, and use of this information, when mapping from Unicode back
|
||||
to certain Mac OS encodings.
|
||||
The requirements imposed on Unicode handling are necessary for other,
|
||||
non-transcoding operations in a full Unicode implementation anyway, so
|
||||
requiring them for transcoding should not impose much of a burden.
|
||||
|
||||
Next, if round-trip fidelity cannot be achieved using the above
|
||||
techniques, we attempt to use corporate-zone characters only as
|
||||
"transcoding hints" (more on this below). These are combined with one or
|
||||
more standard Unicode characters to mark them as special for
|
||||
transcoding, but have no other function and can be deleted with no loss
|
||||
of basic text content (only of round-trip fidelity).
|
||||
|
||||
Finally, if a character in a Mac OS encoding is unrelated to any Unicode
|
||||
character or Unicode character sequence, we may map it to a single
|
||||
corporate-zone Unicode code point.
|
||||
|
||||
These techniques are described in more detail in the following sections.
|
||||
|
||||
Some clients of these tables may have a different set of goals. For
|
||||
example, some clients may prefer to avoid compatibility characters,
|
||||
perhaps sacrificing round-trip fidelity if necessary. In most cases it
|
||||
is fairly easy to construct other types of mappings from the mappings
|
||||
given here. In particular, the Unicode mappings here have been designed
|
||||
so that if they are converted to a restricted form of NFD (a form that
|
||||
does NOT decompose or normalize Unicode characters in the ranges
|
||||
2000-2FFF or F900-FAFF), the resulting mappings still provide roundtrip
|
||||
fidelity. (For certain characters in the Mac OS Hebrew and Devanagari
|
||||
encodings, the decomposition mappings must use a grouping transcoding
|
||||
hint to ensure roundtrip fidelity; more details on this are provided in
|
||||
the mapping tables for those encodings.)
|
||||
|
||||
There is one more round-trip issue that should be mentioned. If a
|
||||
Unicode character or sequence can be mapped at all into a particular Mac
|
||||
OS encoding, then the reverse mapping back to Unicode should yield the
|
||||
original Unicode character or sequence (except for possible differences
|
||||
in direction overrides or other Unicode characters with General Category
|
||||
Cf). The tables here also provide this. For a related issue, see the
|
||||
next section.
|
||||
|
||||
3. Mapping tolerance: Strict and loose
|
||||
--------------------------------------
|
||||
|
||||
In many character sets, a single character may have multiple semantics,
|
||||
either by explicit definition, ambiguous definition, or established
|
||||
usage. For example, the JIS character 0x2142, or 0x8161 in Shift-JIS,
|
||||
is specified in the JIS X0208 standard to have two meanings: "double
|
||||
vertical line" and "parallel". Each of these meanings corresponds to a
|
||||
different Unicode character: 0x2016 DOUBLE VERTICAL LINE and 0x2225
|
||||
PARALLEL TO. When mapping from Unicode to Shift-JIS, it is normally
|
||||
desirable to map both of these Unicode characters to the single
|
||||
Shift-JIS character. However, when mapping the Shift-JIS character to
|
||||
Unicode, we can choose only one of the possible Unicode characters.
|
||||
|
||||
For two encodings X and Y, we can define a set of "strict" mappings
|
||||
from one to the other as follows: If text in X can be mapped to Y using
|
||||
the strict mappings from X to Y, then the resulting text can be mapped
|
||||
back using the strict mappings from Y to X to end up with the original
|
||||
text from X. Similarly, if text in Y can be mapped to X using the strict
|
||||
mappings from Y to X, then the resulting text can be mapped back using
|
||||
the strict mappings from X to Y to end up with the original text from Y.
|
||||
|
||||
There may be several characters in one encoding that all map to a
|
||||
single character in another encoding, but only one of these mappings
|
||||
can be strict; the others are "loose".
|
||||
|
||||
The mappings given in the accompanying tables are strict mappings.
|
||||
However, the Mac OS Text Encoding Converter also supports loose
|
||||
mappings and fallback mappings. Some of the accompanying tables provide
|
||||
suggestions about possible loose mappings.
|
||||
|
||||
4. Mapping a Mac encoding character to a Unicode sequence or vice versa
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
In some cases, a character in a Mac OS legacy encoding maps to a
|
||||
sequence of Unicode characters. For example, the Mac OS Japanese
|
||||
encoding includes a character for the circled CJK ideograph "big".
|
||||
Although Unicode encodes other circled ideographs as single characters,
|
||||
it does not encode this one. However, this character can be
|
||||
unambiguously represented in Unicode as the Unicode sequence
|
||||
0x5927+0x20DD, the CJK ideograph for "big" followed by COMBINING
|
||||
ENCLOSING CIRCLE.
|
||||
|
||||
To handle the reverse mapping, a transcoding process must group the
|
||||
Unicode sequence 0x5927+0x20DD as a single element for lookup (The
|
||||
Mac OS Text Encoding Converter does this).
|
||||
|
||||
In a few cases, a sequence of characters in a Mac OS legacy encoding
|
||||
must be grouped for mapping to a single Unicode character or a sequence
|
||||
of Unicode characters. For example, in Mac OS Devanagari (based on
|
||||
ISCII-91), DEVANAGARI LETTER VOCALIC L is represented as 0xA6+0xE9;
|
||||
but this is represented in Unicode by the single character 0x090C.
|
||||
Furthermore, explicit halant is represented in Mac OS Devanagari as
|
||||
0xE8+0xE8 (double halant) and in Unicode as 0x094D+0x200C (VIRAMA
|
||||
plus ZERO WIDTH NON-JOINER). The latter can also be considered as
|
||||
a context-dependent mapping of 0xE8, halant.
|
||||
|
||||
Loose mappings from Unicode to a Mac OS encoding often map a single
|
||||
Unicode to a sequence of characters in the Mac OS encoding. For example,
|
||||
the Unicode character 0x00BD VULGAR FRACTION ONE HALF cannot be mapped
|
||||
into the Mac OS Roman character set as a single character, but it has a
|
||||
loose mapping to the sequence 0x31+0xDA+0x32, "digit one" + "fraction
|
||||
slash" + "digit two".
|
||||
|
||||
In some cases a Unicode character such as a direction override may
|
||||
simply be discarded when mapping to a Mac OS encoding, since the
|
||||
information carried by the override may be represented in a different
|
||||
way by the Mac OS encoding. See the next section for an example.
|
||||
|
||||
5. Mappings that depend on directionality (or other attributes)
|
||||
---------------------------------------------------------------
|
||||
|
||||
Strict mappings from Unicode to Mac OS legacy encodings may depend on
|
||||
resolved character direction. Loose mappings may depend on additional
|
||||
attributes such as whether the text should use vertical form codes if
|
||||
available (i.e. whether the text is intended for vertical display on a
|
||||
system that cannot automatically substitute vertical forms).
|
||||
|
||||
a) Resolved character direction
|
||||
|
||||
The Mac OS Arabic and Hebrew character sets were developed in 1986-1987.
|
||||
At that time the bidirectional line layout algorithm used in the Mac OS
|
||||
was fairly simple; it used only a few direction classes (instead of the
|
||||
19 now used in the Unicode bidirectional algorithm). In order to permit
|
||||
users to handle some tricky layout problems, certain punctuation and
|
||||
symbol characters have duplicate code points, one with a left-right
|
||||
direction attribute and the other with a right-left direction attribute.
|
||||
|
||||
For example, plus sign is encoded at 0x2B with a left-right attribute,
|
||||
and at 0xAB with a right-left attribute. However, there is only one PLUS
|
||||
SIGN character in Unicode. This leads to some interesting problems when
|
||||
mapping between Mac OS Arabic or Hebrew and Unicode.
|
||||
|
||||
We need a way to map both of these plus signs to Unicode and back. Using
|
||||
a single corporate character for one of these plus signs is not a good
|
||||
solution, since both of the plus sign characters are likely to be used
|
||||
in text that is interchanged, and thus content would be lost.
|
||||
|
||||
The problem is solved with the use of direction override characters and
|
||||
direction-dependent mappings. When mapping from Mac OS Arabic or Hebrew
|
||||
to Unicode, we use direction overrides as necessary to force the
|
||||
direction of the resulting Unicode characters. When mapping back from
|
||||
Unicode, the Unicode bidirectional algorithm should be used to determine
|
||||
resolved direction of the Unicode characters. The mapping from Unicode
|
||||
to Mac OS Arabic or Hebrew can then be disambiguated as necessary by
|
||||
using the resolved direction.
|
||||
|
||||
For example, when mapping from Mac OS Arabic or Hebrew, we can use
|
||||
LEFT-RIGHT OVERRIDE (LRO), RIGHT-LEFT OVERRIDE (RLO), and POP DIRECTION
|
||||
FORMATTING (PDF) as follows:
|
||||
|
||||
0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
|
||||
0xAB -> 0x202E (RLO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
|
||||
|
||||
When mapping back, we resolve the direction of the Unicode character
|
||||
0x002B, and use this information to determine which of the Mac OS
|
||||
encoding characters to use:
|
||||
|
||||
0x002B -> 0x2B (if LR) or 0xAB (if RL)
|
||||
|
||||
After direction overrides have been used in this way to force a
|
||||
particular resolved direction, they may be discarded when mapping from
|
||||
Unicode to Mac OS Arabic and Hebrew (since the information they carried
|
||||
in Unicode is represented in the Mac OS encoding by the code point of
|
||||
the plus sign).
|
||||
|
||||
Even when not required for round-trip fidelity, direction overrides
|
||||
may be used when mapping from a Mac OS encoding to Unicode in order to
|
||||
preserve proper text layout. For example, the single Mac OS Arabic
|
||||
ellipsis character has direction class right-left, while the Unicode
|
||||
HORIZONTAL ELLIPSIS character has direction class neutral. When
|
||||
mapping the Mac OS ellipsis to Unicode, it is surrounded with a
|
||||
direction override to help preserve proper text layout. However,
|
||||
resolved direction is not needed or used when mapping the Unicode
|
||||
HORIZONTAL ELLIPSIS back to Mac OS Arabic.
|
||||
|
||||
b) Horizontal or vertical display
|
||||
|
||||
The Mac OS Japanese encoding includes separately-encoded vertical forms
|
||||
for some punctuation and kana. When Unicode characters in the CJK
|
||||
punctuation and kana ranges are mapped to Mac OS Japanese characters and
|
||||
(1) those characters are intended for vertical display, (2) they will be
|
||||
displayed in an environment that does not provide automatic vertical
|
||||
form substitution, and (3) loose mappings are desired, the Unicode
|
||||
characters can be mapped to the corresponding vertical form codes in the
|
||||
Mac OS Japanese encoding.
|
||||
|
||||
This does not affect mapping of the Unicode vertical presentation forms
|
||||
(which always map to the Mac OS Japanese vertical form codes).
|
||||
|
||||
6. Use of corporate characters
|
||||
------------------------------
|
||||
|
||||
Apple has defined a block of 32 corporate characters as "transcoding
|
||||
hints." These are used in combination with standard Unicode characters
|
||||
to force them to be treated in a special way for mapping to other
|
||||
encodings; they have no other effect. Sixteen of these transcoding
|
||||
hints are "grouping hints" - they indicate that the next 2-4 Unicode
|
||||
characters should be treated as a single entity for transcoding. The
|
||||
other sixteen transcoding hints are "variant tags" - they are like
|
||||
combining characters, and can follow a standard Unicode (or a sequence
|
||||
consisting of a base character and other combining characters) to
|
||||
cause it to be treated in a special way for transcoding. These always
|
||||
terminate a combining-character sequence.
|
||||
|
||||
Whenever possible, mappings that require corporate-zone characters
|
||||
use standard Unicode characters in combination with a single
|
||||
transcoding hint (no mapping uses more than one transcoding hint).
|
||||
For these mappings, even if the corporate-zone characters are lost in
|
||||
interchange, the basic text content will be preserved.
|
||||
|
||||
However, some characters in a Mac OS encoding - such as the Apple
|
||||
logo character - bear no relation to any standard Unicode character.
|
||||
In these cases, the Mac OS character is mapped to a single corporate
|
||||
zone character defined by Apple. Fewer than 40 corporate characters
|
||||
are used in this way.
|
||||
|
||||
All of the corporate characters defined by Apple are listed in the
|
||||
accompanying file "CORPCHAR.TXT", including old Apple corporate
|
||||
character assignments which are now deprecated (but which are still
|
||||
supported as loose mappings by the Mac OS Text Encoding Converter).
|
||||
|
||||
7. Font variants
|
||||
----------------
|
||||
|
||||
For some Mac OS legacy encodings, certain fonts used with that encoding
|
||||
may actually implement a slight variant of the standard encoding
|
||||
specified in the accompanying mapping tables. The header comments in the
|
||||
mapping table files for each encoding describe any font variants
|
||||
associated with that encoding.
|
||||
|
||||
8. Encodings in Mac OS X
|
||||
------------------------
|
||||
|
||||
The Mac OS X Cocoa and Carbon environments use Unicode as the primary
|
||||
text encoding. Some legacy programming interfaces in the Carbon
|
||||
environment - e.g. Quickdraw Text, the Script Manager, and related
|
||||
Text Utilities - use and support the following subset of Mac OS legacy
|
||||
encodings:
|
||||
Roman
|
||||
Central European
|
||||
Cyrillic
|
||||
Chinese Traditional
|
||||
Chinese Simplified
|
||||
Japanese
|
||||
Korean
|
||||
|
||||
Other legacy Mac OS encodings are supported in Carbon and Cocoa via
|
||||
transcoding using the Mac OS Text Encoding Converter or other
|
||||
transcoding interfaces; the character repertoires of all Mac OS
|
||||
legacy encodings are supported in Unicode on Mac OS X.
|
||||
|
||||
Additional legacy encodings are also supported in the Classic
|
||||
environment under Mac OS X.
|
||||
|
||||
9. Mac OS legacy encodings
|
||||
--------------------------
|
||||
|
||||
Mac OS versions 7.1 and later supported multiple encodings via the
|
||||
Script Manager, QuickDraw Text and related Text Utilities. These
|
||||
system components distinguish these encodings primarily by script code:
|
||||
font family IDs are grouped into ranges, and each range is associated
|
||||
with a script code.
|
||||
|
||||
In some cases, there are several encodings that share a single script
|
||||
code. Usually these are closely related. To distinguish among these,
|
||||
additional information is required, such as font name or system
|
||||
region code (locale code).
|
||||
|
||||
The encodings described here (and in the accompanying tables) are the
|
||||
legacy encodings used in Mac OS versions 7.1 and later. In some cases,
|
||||
certain earlier system versions have used different encodings. Not all
|
||||
of these encodings are directly supported in Mac OS X, but Mac OS X
|
||||
does support transcoding between all of these encodings and Unicode.
|
||||
|
||||
In all Mac OS legacy encodings, character codes 0x00-0x7F are identical
|
||||
to ASCII, except that
|
||||
- in Mac OS Japanese, reverse solidus is replaced by yen sign
|
||||
- in Mac OS Arabic, Farsi, and Hebrew, some of the punctuation in this
|
||||
range is treated as having strong left-right directionality,
|
||||
although the corresponding Unicode characters have neutral
|
||||
directionality
|
||||
- in the three symbol glyphs encodings (Symbol, Dingbats, and Keyboard
|
||||
glyphs), a different mapping is used for the ASCII range. The
|
||||
Keyboard glyphs encoding even has a special mapping for the control
|
||||
characters range 0x00-0x1F.
|
||||
Fonts used as "system" fonts (for menus, dialogs, etc.) had four glyphs
|
||||
at code points 0x11-0x14 for transient use by the Menu Manager. These
|
||||
glyphs were not intended as characters for use in normal text, and the
|
||||
associated code points are not generally interpreted as associated with
|
||||
these glyphs. (However, a "system font variant" mapping table could
|
||||
provide mappings for these).
|
||||
|
||||
Note that in general, character sets cannot be determined from font
|
||||
layouts (they are not the same thing!). This is very noticeable with
|
||||
Arabic, Hebrew, and Devanagari, for example.
|
||||
|
||||
The following is a list of legacy Mac OS encodings. The accompanying
|
||||
tables provide mappings from these encodings to Unicode.
|
||||
|
||||
a) Mac OS encodings for script code 0, smRoman.
|
||||
|
||||
* Roman - this is the default for script code 0 (when the special
|
||||
cases listed below do not apply). It covers several western European
|
||||
languages, and includes math operators and various symbols.
|
||||
|
||||
* Symbol - this is the encoding for the font named "Symbol". It includes
|
||||
Greek letters, math operators, and miscellaneous symbols. The layout
|
||||
of the Symbol character set is identical to the layout of the Adobe
|
||||
Symbol encoding vector, with the addition of the Apple logo at 0xF0
|
||||
and the EURO SIGN at 0xA0.
|
||||
|
||||
* Dingbats - this is the encoding for the font named "Zapf Dingbats".
|
||||
The layout of the Dingbats character set is identical to or a superset
|
||||
of the layout of the Adobe Zapf Dingbats encoding vector.
|
||||
|
||||
* Keyboard glyphs - this is the encoding for the legacy font named
|
||||
".Keyboard". Before Mac OS X, this font was used by the user-interface
|
||||
system to display glyphs for special keys on the keyboard. In Mac OS
|
||||
X, this mapping is not associated with a font; it is only used as a
|
||||
way to map from a set of Menu Manager constants to associated Unicode
|
||||
sequences. As such, new mappings added for Mac OS X only may be
|
||||
one-way mappings: From the Keyboard glyph "encoding" to Unicode, but
|
||||
not back.
|
||||
|
||||
* Turkish - this is the encoding if the script code is 0 and the system
|
||||
region code is 24, verTurkey. It has 7 code point differences from
|
||||
Mac OS Roman.
|
||||
|
||||
* Croatian - this is the encoding if the script code is 0 and the system
|
||||
region code is any of the following:
|
||||
68, verCroatia
|
||||
66, verSlovenian
|
||||
25, verYugoCroatian (only used in older systems)
|
||||
It has 20 code point differences from standard Roman, but only 10
|
||||
differences in repertoire.
|
||||
|
||||
* Icelandic - this is the encoding if the script code is 0 and the
|
||||
system region code is either of the following:
|
||||
21, verIceland
|
||||
47, verFaroeIsl
|
||||
It has 6 code point differences from standard Roman. It also has one
|
||||
font variant.
|
||||
|
||||
* Romanian - this is the encoding if the script code is 0 and the system
|
||||
region code is 39, verRomania . It has 6 code point differences from
|
||||
standard Roman.
|
||||
|
||||
* Celtic - this is the encoding if the script code is 0 and the system
|
||||
region code is any of the following:
|
||||
50, verIreland
|
||||
75, verScottishGaelic
|
||||
76, verManxGaelic
|
||||
77, verBreton
|
||||
79, verWelsh
|
||||
It is a variant of Mac OS Roman with a few extra accented characters
|
||||
for Welsh.
|
||||
|
||||
* Gaelic - this is the encoding if the script code is 0 and the system
|
||||
region code is 81, verIrishGaelicScript. It is a variant of Mac OS
|
||||
Roman, and supports the older Irish orthography using dot above.
|
||||
|
||||
* Greek (monotonic) - this is the encoding if the script code is 0 and
|
||||
the system region code is 20, verGreece. Although a script code is
|
||||
defined for Greek, the Greek localized system does not use it (the
|
||||
font family IDs are in the smRoman range). This encoding is based on
|
||||
the ISO/IEC 8859-7 repertoire with additional Roman characters for
|
||||
French and German, as well as additional symbols. Greek system 4.1
|
||||
used a different encoding that matched 8859-7 code points for Greek
|
||||
letters. Greek system 6.0.7 also used a variant of the standard
|
||||
encoding, but it was quickly replaced by Greek system 6.0.7.1 which
|
||||
used the standard encoding.
|
||||
|
||||
See also the Central European encoding under script code 29 below.
|
||||
|
||||
b) Mac OS encodings for script code 1, smJapanese.
|
||||
|
||||
* Japanese - this is the default for script code 1. It is based on a
|
||||
Shift-JIS implementation of JIS X0208-1990 ("fullwidth") and
|
||||
JIS X0201-1976 ("halfwidth"), with 5 additional one-byte characters
|
||||
and one modified character, a set of Apple extension characters which
|
||||
include many industry standard extensions, and separate codes for
|
||||
vertical forms of some punctuation and kana. There are several font
|
||||
variants.
|
||||
|
||||
c) Mac OS encodings for script code 2, smTradChinese.
|
||||
|
||||
* Chinese Traditional - this is an extension of Big-5.
|
||||
|
||||
d) Mac OS encodings for script code 3, smKorean.
|
||||
|
||||
* Korean - this is an extension of EUC-KR.
|
||||
|
||||
e) Mac OS encodings for script code 4, smArabic.
|
||||
|
||||
* Arabic - This is the default for script code 4 (when the special
|
||||
case listed below does not apply). It is based on the ISO/IEC 8859-6
|
||||
repertoire, with additional Arabic letters for Persian and Urdu and
|
||||
with accented Roman letters for European languages. It has the
|
||||
interesting feature mentioned above that certain ASCII punctuation
|
||||
and symbol characters are encoded twice, once for each direction. It
|
||||
has several font variants.
|
||||
|
||||
* Farsi - This is the encoding if the script code is 4 and the system
|
||||
region code is 48, verIran. It is similar to Mac OS Arabic, but has
|
||||
the "extended" or Persian digits instead of the standard Arabic
|
||||
digits. It has one font variant.
|
||||
|
||||
f) Mac OS encodings for script code 5, smHebrew.
|
||||
|
||||
* Hebrew - This is based on the ISO/IEC 8859-8 Hebrew letter repertoire,
|
||||
but adds Hebrew points, some Hebrew ligatures, some accented Roman
|
||||
letters for European languages, and some non-ASCII punctuation. As
|
||||
with Mac OS Arabic, certain ASCII punctuation and symbol characters
|
||||
are encoded twice, once for each direction. This is also true for the
|
||||
European digits. This has one font variant.
|
||||
|
||||
g) Mac OS encodings for script code 6, smGreek.
|
||||
|
||||
None currently - see smRoman.
|
||||
|
||||
h) Mac OS encodings for script code 7, smCyrillic.
|
||||
|
||||
* Cyrillic - This is based on the ISO/IEC 8859-5 Cyrillic character
|
||||
repertoire plus an additional case pair for Ukrainian.
|
||||
|
||||
i) Mac OS encodings for script code 9, smDevanagari.
|
||||
|
||||
* Devanagari - This is based on IS 13194:1991 (ISCII-91), and adds some
|
||||
punctuation and symbols.
|
||||
|
||||
j) Mac OS encodings for script code 10, smGurmukhi.
|
||||
|
||||
* Gurmukhi - This is based on IS 13194:1991 (ISCII-91), and adds some
|
||||
punctuation and symbols.
|
||||
|
||||
k) Mac OS encodings for script code 11, smGujarati.
|
||||
|
||||
* Gujarati - This is based on IS 13194:1991 (ISCII-91), and adds some
|
||||
punctuation and symbols.
|
||||
|
||||
l) Mac OS encodings for script code 21, smThai.
|
||||
|
||||
* Thai - This is based on TIS 620-2533, except that three of the
|
||||
TIS 620-2533 characters are replaced with other characters. Some
|
||||
undefined code points in TIS 620-2533 are used for additional
|
||||
punctuation characters.
|
||||
|
||||
m) Mac OS encodings for script code 25, smSimpChinese.
|
||||
|
||||
* Chinese Simplified - this is an extension of EUC-CN.
|
||||
|
||||
n) Mac OS encodings for script code 26, smTibetan.
|
||||
|
||||
* Tibetan
|
||||
|
||||
o) Mac OS encodings for script code 28, smEthiopic.
|
||||
|
||||
* Inuit - this is the encoding if the script code is 28 and the
|
||||
system region code is 78, verNunavut (for Inuktitut language).
|
||||
There is no script code for Inuit, so it shares the script code
|
||||
with Ethiopic.
|
||||
|
||||
p) Mac OS encodings for script code 29, smCentralEuroRoman.
|
||||
|
||||
* Central European - This is similar to standard Roman, but with a
|
||||
different (and larger) set of European characters and with fewer
|
||||
symbols. It is used for Polish, Czech, Slovak, Hungarian, Estonian,
|
||||
Latvian, and Lithuanian.
|
405
charmap/SYMBOL.TXT
Normal file
|
@ -0,0 +1,405 @@
|
|||
#=======================================================================
|
||||
# File name: SYMBOL.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Symbol
|
||||
# character set to Unicode 4.0 and later.
|
||||
#
|
||||
# Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Change mappings for 0xBD, 0xE0. Update
|
||||
# header comments. Matches internal xml <c1.2>
|
||||
# and Text Encoding Converter 2.0.
|
||||
# b4,c1 2002-Dec-19 Update mappings for encoded glyph fragments
|
||||
# 0xBE, 0xE6-EF, 0xF4, 0xF6-FE to use new
|
||||
# Unicode 3.2 characters instead of sequences
|
||||
# involving corporate-use characters. Update
|
||||
# URLs, notes. Matches internal utom<b4>.
|
||||
# b03 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b3>, ufrm<b3>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# b02 1998-Aug-18 Encoding changed for Mac OS 8.5; add new
|
||||
# mapping from 0xA0 to EURO SIGN. Matches
|
||||
# internal utom<b3>, ufrm<b3>.
|
||||
# n05 1998-Feb-05 Update to match internal utom<n5>, ufrm<n15>
|
||||
# and Text Encoding Converter version 1.3:
|
||||
# Use standard Unicodes plus transcoding hints
|
||||
# instead of single corporate characters, also
|
||||
# change mappings for 0xE1 & 0xF1 from U+2329
|
||||
# & U+232A to their canonical decompositions;
|
||||
# see details below. Also update header
|
||||
# comments to new format.
|
||||
# n03 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n4>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Symbol code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name.
|
||||
# In some cases an additional comment follows the Unicode name.
|
||||
#
|
||||
# The entries are in Mac OS Symbol code order.
|
||||
#
|
||||
# Some of these mappings require the use of corporate characters.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Symbol character set uses the standard control characters
|
||||
# at 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Symbol:
|
||||
# -----------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported directly in programming
|
||||
# interfaces for QuickDraw Text, the Script Manager, and related
|
||||
# Text Utilities. For other purposes it is supported via transcoding
|
||||
# to and from Unicode.
|
||||
#
|
||||
# The Mac OS Symbol encoding shares the script code smRoman
|
||||
# (0) with the Mac OS Roman encoding. To determine if the Symbol
|
||||
# encoding is being used, you must check if the font name is
|
||||
# "Symbol".
|
||||
#
|
||||
# Before Mac OS 8.5, code point 0xA0 was unused. In Mac OS 8.5
|
||||
# and later versions, code point 0xA0 is EURO SIGN and maps to
|
||||
# U+20AC (the Symbol font is updated for Mac OS 8.5 to reflect
|
||||
# this).
|
||||
#
|
||||
# The layout of the Mac OS Symbol character set is identical to
|
||||
# the layout of the Adobe Symbol encoding vector, with the
|
||||
# addition of the Apple logo character at 0xF0.
|
||||
#
|
||||
# This character set encodes a number of glyph fragments. Some are
|
||||
# used as extenders: 0x60 is used to extend radical signs, 0xBD and
|
||||
# 0xBE are used to extend vertical and horizontal arrows, etc. In
|
||||
# addition, there are top, bottom, and center sections for
|
||||
# parentheses, brackets, integral signs, and other signs that may
|
||||
# extend vertically for 2 or more lines of normal text. As of
|
||||
# Unicode 3.2, most of these are now encoded in Unicode; a few are
|
||||
# not, so these are mapped using corporate-zone Unicode characters
|
||||
# (see below).
|
||||
#
|
||||
# In addition, Symbol separately encodes both serif and sans-serif
|
||||
# forms for copyright, trademark, and registered signs. Unicode
|
||||
# encodes only the abstract characters, so one set of these (the
|
||||
# sans-serif forms) are also mapped using corporate-zone Unicode
|
||||
# characters (see below).
|
||||
#
|
||||
# The following code points are unused, and are not shown here:
|
||||
# 0x80-0x9F, 0xFF.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The goals in the mappings provided here are:
|
||||
# - Ensure roundtrip mapping from every character in the Mac OS
|
||||
# Symbol character set to Unicode and back
|
||||
# - Use standard Unicode characters as much as possible, to
|
||||
# maximize interchangeability of the resulting Unicode text.
|
||||
# Whenever possible, avoid having content carried by private-use
|
||||
# characters.
|
||||
#
|
||||
# Some of the characters in the Mac OS Symbol character set do not
|
||||
# correspond to distinct, single Unicode characters. To map these
|
||||
# and satisfy both goals above, we employ various strategies.
|
||||
#
|
||||
# a) If possible, use private use characters in combination with
|
||||
# standard Unicode characters to mark variants of the standard
|
||||
# Unicode character.
|
||||
#
|
||||
# Apple has defined a block of 32 corporate characters as "transcoding
|
||||
# hints." These are used in combination with standard Unicode
|
||||
# characters to force them to be treated in a special way for mapping
|
||||
# to other encodings; they have no other effect. Sixteen of these
|
||||
# transcoding hints are "grouping hints" - they indicate that the next
|
||||
# 2-4 Unicode characters should be treated as a single entity for
|
||||
# transcoding. The other sixteen transcoding hints are "variant tags"
|
||||
# - they are like combining characters, and can follow a standard
|
||||
# Unicode (or a sequence consisting of a base character and other
|
||||
# combining characters) to cause it to be treated in a special way for
|
||||
# transcoding. These always terminate a combining-character sequence.
|
||||
#
|
||||
# The transcoding coding hint used in this mapping table is the
|
||||
# variant tag 0xF87F. Since this is combined with standard Unicode
|
||||
# characters, some characters in the Mac OS Symbol character set map
|
||||
# to a sequence of two Unicodes instead of a single Unicode character.
|
||||
#
|
||||
# For example, the Mac OS Symbol character at 0xE2 is an alternate,
|
||||
# sans-serif form of the REGISTERED SIGN (the standard mapping is for
|
||||
# the abstract character at 0xD2, which here has a serif form). So 0xE2
|
||||
# is mapped to 0x00AE (REGISTERED SIGN) + 0xF87F (a variant tag).
|
||||
#
|
||||
# b) Otherwise, use private use characters by themselves to map
|
||||
# Mac OS Symbol characters which have no relationship to any standard
|
||||
# Unicode character.
|
||||
#
|
||||
# The following additional corporate zone Unicode characters are
|
||||
# used for this purpose here:
|
||||
#
|
||||
# 0xF8E5 radical extender
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version c01 to version c02:
|
||||
#
|
||||
# - Update mappings for 0xBD from 0xF8E6 to 0x23D0 (use new Unicode
|
||||
# 4.0 char)
|
||||
# - Correct mapping for 0xE0 from 0x22C4 to 0x25CA
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Update mappings for encoded glyph fragments 0xBE, 0xE6-EF, 0xF4,
|
||||
# 0xF6-FE to use new Unicode 3.2 characters instead of using either
|
||||
# single corporate-use characters (e.g. 0xBE was mapped to 0xF8E7) or
|
||||
# sequences combining a standard Unicode character with a transcoding
|
||||
# hint (e.g. 0xE6 was mapped to 0x0028+0xF870).
|
||||
#
|
||||
# Changes from version n05 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 8.5; 0xA0 now maps to 0x20AC, EURO
|
||||
# SIGN. 0xA0 was unmapped in earlier versions.
|
||||
#
|
||||
# Changes from version n03 to version n05:
|
||||
#
|
||||
# - Change strict mapping for 0xE1 & 0xF1 from U+2329 & U+232A
|
||||
# to their canonical decompositions, U+3008 & U+3009.
|
||||
#
|
||||
# - Change mapping for the following to use standard Unicode +
|
||||
# transcoding hint, instead of single corporate-zone
|
||||
# character: 0xE2-0xE4, 0xE6-0xEE, 0xF4, 0xF6-0xFE.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x2200 # FOR ALL
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x2203 # THERE EXISTS
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x220D # SMALL CONTAINS AS MEMBER
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x2217 # ASTERISK OPERATOR
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x2212 # MINUS SIGN
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x2245 # APPROXIMATELY EQUAL TO
|
||||
0x41 0x0391 # GREEK CAPITAL LETTER ALPHA
|
||||
0x42 0x0392 # GREEK CAPITAL LETTER BETA
|
||||
0x43 0x03A7 # GREEK CAPITAL LETTER CHI
|
||||
0x44 0x0394 # GREEK CAPITAL LETTER DELTA
|
||||
0x45 0x0395 # GREEK CAPITAL LETTER EPSILON
|
||||
0x46 0x03A6 # GREEK CAPITAL LETTER PHI
|
||||
0x47 0x0393 # GREEK CAPITAL LETTER GAMMA
|
||||
0x48 0x0397 # GREEK CAPITAL LETTER ETA
|
||||
0x49 0x0399 # GREEK CAPITAL LETTER IOTA
|
||||
0x4A 0x03D1 # GREEK THETA SYMBOL
|
||||
0x4B 0x039A # GREEK CAPITAL LETTER KAPPA
|
||||
0x4C 0x039B # GREEK CAPITAL LETTER LAMDA
|
||||
0x4D 0x039C # GREEK CAPITAL LETTER MU
|
||||
0x4E 0x039D # GREEK CAPITAL LETTER NU
|
||||
0x4F 0x039F # GREEK CAPITAL LETTER OMICRON
|
||||
0x50 0x03A0 # GREEK CAPITAL LETTER PI
|
||||
0x51 0x0398 # GREEK CAPITAL LETTER THETA
|
||||
0x52 0x03A1 # GREEK CAPITAL LETTER RHO
|
||||
0x53 0x03A3 # GREEK CAPITAL LETTER SIGMA
|
||||
0x54 0x03A4 # GREEK CAPITAL LETTER TAU
|
||||
0x55 0x03A5 # GREEK CAPITAL LETTER UPSILON
|
||||
0x56 0x03C2 # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x57 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0x58 0x039E # GREEK CAPITAL LETTER XI
|
||||
0x59 0x03A8 # GREEK CAPITAL LETTER PSI
|
||||
0x5A 0x0396 # GREEK CAPITAL LETTER ZETA
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x2234 # THEREFORE
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x22A5 # UP TACK
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0xF8E5 # radical extender # corporate char
|
||||
0x61 0x03B1 # GREEK SMALL LETTER ALPHA
|
||||
0x62 0x03B2 # GREEK SMALL LETTER BETA
|
||||
0x63 0x03C7 # GREEK SMALL LETTER CHI
|
||||
0x64 0x03B4 # GREEK SMALL LETTER DELTA
|
||||
0x65 0x03B5 # GREEK SMALL LETTER EPSILON
|
||||
0x66 0x03C6 # GREEK SMALL LETTER PHI
|
||||
0x67 0x03B3 # GREEK SMALL LETTER GAMMA
|
||||
0x68 0x03B7 # GREEK SMALL LETTER ETA
|
||||
0x69 0x03B9 # GREEK SMALL LETTER IOTA
|
||||
0x6A 0x03D5 # GREEK PHI SYMBOL
|
||||
0x6B 0x03BA # GREEK SMALL LETTER KAPPA
|
||||
0x6C 0x03BB # GREEK SMALL LETTER LAMDA
|
||||
0x6D 0x03BC # GREEK SMALL LETTER MU
|
||||
0x6E 0x03BD # GREEK SMALL LETTER NU
|
||||
0x6F 0x03BF # GREEK SMALL LETTER OMICRON
|
||||
0x70 0x03C0 # GREEK SMALL LETTER PI
|
||||
0x71 0x03B8 # GREEK SMALL LETTER THETA
|
||||
0x72 0x03C1 # GREEK SMALL LETTER RHO
|
||||
0x73 0x03C3 # GREEK SMALL LETTER SIGMA
|
||||
0x74 0x03C4 # GREEK SMALL LETTER TAU
|
||||
0x75 0x03C5 # GREEK SMALL LETTER UPSILON
|
||||
0x76 0x03D6 # GREEK PI SYMBOL
|
||||
0x77 0x03C9 # GREEK SMALL LETTER OMEGA
|
||||
0x78 0x03BE # GREEK SMALL LETTER XI
|
||||
0x79 0x03C8 # GREEK SMALL LETTER PSI
|
||||
0x7A 0x03B6 # GREEK SMALL LETTER ZETA
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x223C # TILDE OPERATOR
|
||||
#
|
||||
0xA0 0x20AC # EURO SIGN
|
||||
0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL
|
||||
0xA2 0x2032 # PRIME # minute
|
||||
0xA3 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xA4 0x2044 # FRACTION SLASH
|
||||
0xA5 0x221E # INFINITY
|
||||
0xA6 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xA7 0x2663 # BLACK CLUB SUIT
|
||||
0xA8 0x2666 # BLACK DIAMOND SUIT
|
||||
0xA9 0x2665 # BLACK HEART SUIT
|
||||
0xAA 0x2660 # BLACK SPADE SUIT
|
||||
0xAB 0x2194 # LEFT RIGHT ARROW
|
||||
0xAC 0x2190 # LEFTWARDS ARROW
|
||||
0xAD 0x2191 # UPWARDS ARROW
|
||||
0xAE 0x2192 # RIGHTWARDS ARROW
|
||||
0xAF 0x2193 # DOWNWARDS ARROW
|
||||
0xB0 0x00B0 # DEGREE SIGN
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2033 # DOUBLE PRIME # second
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00D7 # MULTIPLICATION SIGN
|
||||
0xB5 0x221D # PROPORTIONAL TO
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2022 # BULLET
|
||||
0xB8 0x00F7 # DIVISION SIGN
|
||||
0xB9 0x2260 # NOT EQUAL TO
|
||||
0xBA 0x2261 # IDENTICAL TO
|
||||
0xBB 0x2248 # ALMOST EQUAL TO
|
||||
0xBC 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xBD 0x23D0 # VERTICAL LINE EXTENSION (for arrows) # for Unicode 4.0 and later
|
||||
0xBE 0x23AF # HORIZONTAL LINE EXTENSION (for arrows) # for Unicode 3.2 and later
|
||||
0xBF 0x21B5 # DOWNWARDS ARROW WITH CORNER LEFTWARDS
|
||||
0xC0 0x2135 # ALEF SYMBOL
|
||||
0xC1 0x2111 # BLACK-LETTER CAPITAL I
|
||||
0xC2 0x211C # BLACK-LETTER CAPITAL R
|
||||
0xC3 0x2118 # SCRIPT CAPITAL P
|
||||
0xC4 0x2297 # CIRCLED TIMES
|
||||
0xC5 0x2295 # CIRCLED PLUS
|
||||
0xC6 0x2205 # EMPTY SET
|
||||
0xC7 0x2229 # INTERSECTION
|
||||
0xC8 0x222A # UNION
|
||||
0xC9 0x2283 # SUPERSET OF
|
||||
0xCA 0x2287 # SUPERSET OF OR EQUAL TO
|
||||
0xCB 0x2284 # NOT A SUBSET OF
|
||||
0xCC 0x2282 # SUBSET OF
|
||||
0xCD 0x2286 # SUBSET OF OR EQUAL TO
|
||||
0xCE 0x2208 # ELEMENT OF
|
||||
0xCF 0x2209 # NOT AN ELEMENT OF
|
||||
0xD0 0x2220 # ANGLE
|
||||
0xD1 0x2207 # NABLA
|
||||
0xD2 0x00AE # REGISTERED SIGN # serif
|
||||
0xD3 0x00A9 # COPYRIGHT SIGN # serif
|
||||
0xD4 0x2122 # TRADE MARK SIGN # serif
|
||||
0xD5 0x220F # N-ARY PRODUCT
|
||||
0xD6 0x221A # SQUARE ROOT
|
||||
0xD7 0x22C5 # DOT OPERATOR
|
||||
0xD8 0x00AC # NOT SIGN
|
||||
0xD9 0x2227 # LOGICAL AND
|
||||
0xDA 0x2228 # LOGICAL OR
|
||||
0xDB 0x21D4 # LEFT RIGHT DOUBLE ARROW
|
||||
0xDC 0x21D0 # LEFTWARDS DOUBLE ARROW
|
||||
0xDD 0x21D1 # UPWARDS DOUBLE ARROW
|
||||
0xDE 0x21D2 # RIGHTWARDS DOUBLE ARROW
|
||||
0xDF 0x21D3 # DOWNWARDS DOUBLE ARROW
|
||||
0xE0 0x25CA # LOZENGE # previously mapped to 0x22C4 DIAMOND OPERATOR
|
||||
0xE1 0x3008 # LEFT ANGLE BRACKET
|
||||
0xE2 0x00AE+0xF87F # REGISTERED SIGN, alternate: sans serif
|
||||
0xE3 0x00A9+0xF87F # COPYRIGHT SIGN, alternate: sans serif
|
||||
0xE4 0x2122+0xF87F # TRADE MARK SIGN, alternate: sans serif
|
||||
0xE5 0x2211 # N-ARY SUMMATION
|
||||
0xE6 0x239B # LEFT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later
|
||||
0xE7 0x239C # LEFT PARENTHESIS EXTENSION # for Unicode 3.2 and later
|
||||
0xE8 0x239D # LEFT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later
|
||||
0xE9 0x23A1 # LEFT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later
|
||||
0xEA 0x23A2 # LEFT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later
|
||||
0xEB 0x23A3 # LEFT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later
|
||||
0xEC 0x23A7 # LEFT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later
|
||||
0xED 0x23A8 # LEFT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later
|
||||
0xEE 0x23A9 # LEFT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later
|
||||
0xEF 0x23AA # CURLY BRACKET EXTENSION # for Unicode 3.2 and later
|
||||
0xF0 0xF8FF # Apple logo
|
||||
0xF1 0x3009 # RIGHT ANGLE BRACKET
|
||||
0xF2 0x222B # INTEGRAL
|
||||
0xF3 0x2320 # TOP HALF INTEGRAL
|
||||
0xF4 0x23AE # INTEGRAL EXTENSION # for Unicode 3.2 and later
|
||||
0xF5 0x2321 # BOTTOM HALF INTEGRAL
|
||||
0xF6 0x239E # RIGHT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later
|
||||
0xF7 0x239F # RIGHT PARENTHESIS EXTENSION # for Unicode 3.2 and later
|
||||
0xF8 0x23A0 # RIGHT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later
|
||||
0xF9 0x23A4 # RIGHT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later
|
||||
0xFA 0x23A5 # RIGHT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later
|
||||
0xFB 0x23A6 # RIGHT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later
|
||||
0xFC 0x23AB # RIGHT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later
|
||||
0xFD 0x23AC # RIGHT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later
|
||||
0xFE 0x23AD # RIGHT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later
|
384
charmap/THAI.TXT
Normal file
|
@ -0,0 +1,384 @@
|
|||
#=======================================================================
|
||||
# File name: THAI.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Thai
|
||||
# character set to Unicode 3.2 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update mapping for 0xDB to use new Unicode
|
||||
# 3.2 WORD JOINER instead of ZWNBSP (BOM).
|
||||
# Update URLs. Matches internal utom<b3>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b2>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n07 1998-Feb-05 Update to match internal utom<n5>, ufrm<n13>
|
||||
# and Text Encoding Converter version 1.3:
|
||||
# Use standard Unicodes plus transcoding hints
|
||||
# instead of single corporate characters; see
|
||||
# details below. Also update header comments
|
||||
# to new format.
|
||||
# n04 1995-Nov-17 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n6>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Thai code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode or Unicode sequence
|
||||
# (in hex as 0xNNNN or 0xNNNN+0xNNNN).
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Thai code order.
|
||||
#
|
||||
# Some of these mappings require the use of corporate characters.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Thai character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Thai:
|
||||
# ---------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Codes 0xA1-0xDA and 0xDF-0xFB are the character set from Thai
|
||||
# standard TIS 620-2533, except that the following changes are
|
||||
# made:
|
||||
# 0xEE is TRADE MARK SIGN (instead of THAI CHARACTER YAMAKKAN)
|
||||
# 0xFA is REGISTERED SIGN (instead of THAI CHARACTER ANGKHANKHU)
|
||||
# 0xFB is COPYRIGHT SIGN (instead of THAI CHARACTER KHOMUT)
|
||||
#
|
||||
# Codes 0x80-0x82, 0x8D-0x8E, 0x91, 0x9D-0x9E, and 0xDB-0xDE are
|
||||
# various additional punctuation marks (e.g. curly quotes,
|
||||
# ellipsis), no-break space, and two special characters "word join"
|
||||
# and "word break".
|
||||
#
|
||||
# Codes 0x83-0x8C, 0x8F, and 0x92-0x9C are for positional variants
|
||||
# of the upper vowels, tone marks, and other signs at 0xD1,
|
||||
# 0xD4-0xD7, and 0xE7-0xED. The positional variants would normally
|
||||
# be considered presentation forms only and not characters. In most
|
||||
# cases they are not typed directly; they are selected automatically
|
||||
# at display time by the WorldScript software. However, using the
|
||||
# Thai-DTP keyboard, the presentation forms can in fact be typed
|
||||
# directly using dead keys. Thus they must be treated as real
|
||||
# characters in the Mac OS Thai encoding. They are mapped using
|
||||
# variant tags; see below.
|
||||
#
|
||||
# Several code points are undefined and unused (they cannot be
|
||||
# typed using any of the Mac OS Thai keyboard layouts): 0x90, 0x9F,
|
||||
# 0xFC-0xFE. These are not shown in the table below.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The goals in the Apple mappings provided here are:
|
||||
# - Ensure roundtrip mapping from every character in the Mac OS Thai
|
||||
# character set to Unicode and back
|
||||
# - Use standard Unicode characters as much as possible, to maximize
|
||||
# interchangeability of the resulting Unicode text. Whenever possible,
|
||||
# avoid having content carried by private-use characters.
|
||||
#
|
||||
# To satisfy both goals, we use private use characters to mark variants
|
||||
# that are similar to a sequence of one or more standard Unicode
|
||||
# characters.
|
||||
#
|
||||
# Apple has defined a block of 32 corporate characters as "transcoding
|
||||
# hints." These are used in combination with standard Unicode characters
|
||||
# to force them to be treated in a special way for mapping to other
|
||||
# encodings; they have no other effect. Sixteen of these transcoding
|
||||
# hints are "grouping hints" - they indicate that the next 2-4 Unicode
|
||||
# characters should be treated as a single entity for transcoding. The
|
||||
# other sixteen transcoding hints are "variant tags" - they are like
|
||||
# combining characters, and can follow a standard Unicode (or a sequence
|
||||
# consisting of a base character and other combining characters) to
|
||||
# cause it to be treated in a special way for transcoding. These always
|
||||
# terminate a combining-character sequence.
|
||||
#
|
||||
# The transcoding coding hints used in this mapping table are four
|
||||
# variant tags in the range 0xF873-75. Since these are combined with
|
||||
# standard Unicode characters, some characters in the Mac OS Thai
|
||||
# character set map to a sequence of two Unicodes instead of a single
|
||||
# Unicode character. For example, the Mac OS Thai character at 0x83 is a
|
||||
# low-left positional variant of THAI CHARACTER MAI EK (the standard
|
||||
# mapping is for the abstract character at 0xE8). So 0x83 is mapped to
|
||||
# 0x0E48 (THAI CHARACTER MAI EK) + 0xF875 (a variant tag).
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version b02 to version b03/c01:
|
||||
#
|
||||
# - Update mapping for 0xDB to use new Unicode 3.2 character U+2060
|
||||
# WORD JOINER instead of U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM)
|
||||
#
|
||||
# Changes from version n04 to version n07:
|
||||
#
|
||||
# - Changed mappings of the positional variants to use standard
|
||||
# Unicodes + transcoding hint, instead of using single corporate
|
||||
# zone characters. This affected the mappings for the following:
|
||||
# 0x83-08C, 0x8F, 0x92-0x9C
|
||||
#
|
||||
# - Just comment out unused code points in the table, instead
|
||||
# of mapping them to U+FFFD.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x82 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0x83 0x0E48+0xF875 # THAI CHARACTER MAI EK, low left position
|
||||
0x84 0x0E49+0xF875 # THAI CHARACTER MAI THO, low left position
|
||||
0x85 0x0E4A+0xF875 # THAI CHARACTER MAI TRI, low left position
|
||||
0x86 0x0E4B+0xF875 # THAI CHARACTER MAI CHATTAWA, low left position
|
||||
0x87 0x0E4C+0xF875 # THAI CHARACTER THANTHAKHAT, low left position
|
||||
0x88 0x0E48+0xF873 # THAI CHARACTER MAI EK, low position
|
||||
0x89 0x0E49+0xF873 # THAI CHARACTER MAI THO, low position
|
||||
0x8A 0x0E4A+0xF873 # THAI CHARACTER MAI TRI, low position
|
||||
0x8B 0x0E4B+0xF873 # THAI CHARACTER MAI CHATTAWA, low position
|
||||
0x8C 0x0E4C+0xF873 # THAI CHARACTER THANTHAKHAT, low position
|
||||
0x8D 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0x8E 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0x8F 0x0E4D+0xF874 # THAI CHARACTER NIKHAHIT, left position
|
||||
#
|
||||
0x91 0x2022 # BULLET
|
||||
0x92 0x0E31+0xF874 # THAI CHARACTER MAI HAN-AKAT, left position
|
||||
0x93 0x0E47+0xF874 # THAI CHARACTER MAITAIKHU, left position
|
||||
0x94 0x0E34+0xF874 # THAI CHARACTER SARA I, left position
|
||||
0x95 0x0E35+0xF874 # THAI CHARACTER SARA II, left position
|
||||
0x96 0x0E36+0xF874 # THAI CHARACTER SARA UE, left position
|
||||
0x97 0x0E37+0xF874 # THAI CHARACTER SARA UEE, left position
|
||||
0x98 0x0E48+0xF874 # THAI CHARACTER MAI EK, left position
|
||||
0x99 0x0E49+0xF874 # THAI CHARACTER MAI THO, left position
|
||||
0x9A 0x0E4A+0xF874 # THAI CHARACTER MAI TRI, left position
|
||||
0x9B 0x0E4B+0xF874 # THAI CHARACTER MAI CHATTAWA, left position
|
||||
0x9C 0x0E4C+0xF874 # THAI CHARACTER THANTHAKHAT, left position
|
||||
0x9D 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0x9E 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
#
|
||||
0xA0 0x00A0 # NO-BREAK SPACE
|
||||
0xA1 0x0E01 # THAI CHARACTER KO KAI
|
||||
0xA2 0x0E02 # THAI CHARACTER KHO KHAI
|
||||
0xA3 0x0E03 # THAI CHARACTER KHO KHUAT
|
||||
0xA4 0x0E04 # THAI CHARACTER KHO KHWAI
|
||||
0xA5 0x0E05 # THAI CHARACTER KHO KHON
|
||||
0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG
|
||||
0xA7 0x0E07 # THAI CHARACTER NGO NGU
|
||||
0xA8 0x0E08 # THAI CHARACTER CHO CHAN
|
||||
0xA9 0x0E09 # THAI CHARACTER CHO CHING
|
||||
0xAA 0x0E0A # THAI CHARACTER CHO CHANG
|
||||
0xAB 0x0E0B # THAI CHARACTER SO SO
|
||||
0xAC 0x0E0C # THAI CHARACTER CHO CHOE
|
||||
0xAD 0x0E0D # THAI CHARACTER YO YING
|
||||
0xAE 0x0E0E # THAI CHARACTER DO CHADA
|
||||
0xAF 0x0E0F # THAI CHARACTER TO PATAK
|
||||
0xB0 0x0E10 # THAI CHARACTER THO THAN
|
||||
0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO
|
||||
0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO
|
||||
0xB3 0x0E13 # THAI CHARACTER NO NEN
|
||||
0xB4 0x0E14 # THAI CHARACTER DO DEK
|
||||
0xB5 0x0E15 # THAI CHARACTER TO TAO
|
||||
0xB6 0x0E16 # THAI CHARACTER THO THUNG
|
||||
0xB7 0x0E17 # THAI CHARACTER THO THAHAN
|
||||
0xB8 0x0E18 # THAI CHARACTER THO THONG
|
||||
0xB9 0x0E19 # THAI CHARACTER NO NU
|
||||
0xBA 0x0E1A # THAI CHARACTER BO BAIMAI
|
||||
0xBB 0x0E1B # THAI CHARACTER PO PLA
|
||||
0xBC 0x0E1C # THAI CHARACTER PHO PHUNG
|
||||
0xBD 0x0E1D # THAI CHARACTER FO FA
|
||||
0xBE 0x0E1E # THAI CHARACTER PHO PHAN
|
||||
0xBF 0x0E1F # THAI CHARACTER FO FAN
|
||||
0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO
|
||||
0xC1 0x0E21 # THAI CHARACTER MO MA
|
||||
0xC2 0x0E22 # THAI CHARACTER YO YAK
|
||||
0xC3 0x0E23 # THAI CHARACTER RO RUA
|
||||
0xC4 0x0E24 # THAI CHARACTER RU
|
||||
0xC5 0x0E25 # THAI CHARACTER LO LING
|
||||
0xC6 0x0E26 # THAI CHARACTER LU
|
||||
0xC7 0x0E27 # THAI CHARACTER WO WAEN
|
||||
0xC8 0x0E28 # THAI CHARACTER SO SALA
|
||||
0xC9 0x0E29 # THAI CHARACTER SO RUSI
|
||||
0xCA 0x0E2A # THAI CHARACTER SO SUA
|
||||
0xCB 0x0E2B # THAI CHARACTER HO HIP
|
||||
0xCC 0x0E2C # THAI CHARACTER LO CHULA
|
||||
0xCD 0x0E2D # THAI CHARACTER O ANG
|
||||
0xCE 0x0E2E # THAI CHARACTER HO NOKHUK
|
||||
0xCF 0x0E2F # THAI CHARACTER PAIYANNOI
|
||||
0xD0 0x0E30 # THAI CHARACTER SARA A
|
||||
0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT
|
||||
0xD2 0x0E32 # THAI CHARACTER SARA AA
|
||||
0xD3 0x0E33 # THAI CHARACTER SARA AM
|
||||
0xD4 0x0E34 # THAI CHARACTER SARA I
|
||||
0xD5 0x0E35 # THAI CHARACTER SARA II
|
||||
0xD6 0x0E36 # THAI CHARACTER SARA UE
|
||||
0xD7 0x0E37 # THAI CHARACTER SARA UEE
|
||||
0xD8 0x0E38 # THAI CHARACTER SARA U
|
||||
0xD9 0x0E39 # THAI CHARACTER SARA UU
|
||||
0xDA 0x0E3A # THAI CHARACTER PHINTHU
|
||||
0xDB 0x2060 # WORD JOINER # for Unicode 3.2 and later
|
||||
0xDC 0x200B # ZERO WIDTH SPACE
|
||||
0xDD 0x2013 # EN DASH
|
||||
0xDE 0x2014 # EM DASH
|
||||
0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT
|
||||
0xE0 0x0E40 # THAI CHARACTER SARA E
|
||||
0xE1 0x0E41 # THAI CHARACTER SARA AE
|
||||
0xE2 0x0E42 # THAI CHARACTER SARA O
|
||||
0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN
|
||||
0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI
|
||||
0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO
|
||||
0xE6 0x0E46 # THAI CHARACTER MAIYAMOK
|
||||
0xE7 0x0E47 # THAI CHARACTER MAITAIKHU
|
||||
0xE8 0x0E48 # THAI CHARACTER MAI EK
|
||||
0xE9 0x0E49 # THAI CHARACTER MAI THO
|
||||
0xEA 0x0E4A # THAI CHARACTER MAI TRI
|
||||
0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA
|
||||
0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT
|
||||
0xED 0x0E4D # THAI CHARACTER NIKHAHIT
|
||||
0xEE 0x2122 # TRADE MARK SIGN
|
||||
0xEF 0x0E4F # THAI CHARACTER FONGMAN
|
||||
0xF0 0x0E50 # THAI DIGIT ZERO
|
||||
0xF1 0x0E51 # THAI DIGIT ONE
|
||||
0xF2 0x0E52 # THAI DIGIT TWO
|
||||
0xF3 0x0E53 # THAI DIGIT THREE
|
||||
0xF4 0x0E54 # THAI DIGIT FOUR
|
||||
0xF5 0x0E55 # THAI DIGIT FIVE
|
||||
0xF6 0x0E56 # THAI DIGIT SIX
|
||||
0xF7 0x0E57 # THAI DIGIT SEVEN
|
||||
0xF8 0x0E58 # THAI DIGIT EIGHT
|
||||
0xF9 0x0E59 # THAI DIGIT NINE
|
||||
0xFA 0x00AE # REGISTERED SIGN
|
||||
0xFB 0x00A9 # COPYRIGHT SIGN
|
341
charmap/TURKISH.TXT
Normal file
|
@ -0,0 +1,341 @@
|
|||
#=======================================================================
|
||||
# File name: TURKISH.TXT
|
||||
#
|
||||
# Contents: Map (external version) from Mac OS Turkish
|
||||
# character set to Unicode 2.1 and later.
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments. Matches internal xml
|
||||
# <c1.1> and Text Encoding Converter 2.0.
|
||||
# b3,c1 2002-Dec-19 Update URLs, notes. Matches internal
|
||||
# utom<b1>.
|
||||
# b02 1999-Sep-22 Update contact e-mail address. Matches
|
||||
# internal utom<b1>, ufrm<b1>, and Text
|
||||
# Encoding Converter version 1.5.
|
||||
# n05 1998-Feb-05 Minor update to header comments
|
||||
# n03 1997-Dec-14 Update to match internal utom<n5>, ufrm<n15>:
|
||||
# Change standard mapping for 0xBD from U+2126
|
||||
# to its canonical decomposition, U+03A9.
|
||||
# n02 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<n4>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Format:
|
||||
# -------
|
||||
#
|
||||
# Three tab-separated columns;
|
||||
# '#' begins a comment which continues to the end of the line.
|
||||
# Column #1 is the Mac OS Turkish code (in hex as 0xNN)
|
||||
# Column #2 is the corresponding Unicode (in hex as 0xNNNN)
|
||||
# Column #3 is a comment containing the Unicode name
|
||||
#
|
||||
# The entries are in Mac OS Turkish code order.
|
||||
#
|
||||
# Two of these mappings requires the use of a corporate character.
|
||||
# See the file "CORPCHAR.TXT" and notes below.
|
||||
#
|
||||
# Control character mappings are not shown in this table, following
|
||||
# the conventions of the standard UTC mapping tables. However, the
|
||||
# Mac OS Turkish character set uses the standard control characters at
|
||||
# 0x00-0x1F and 0x7F.
|
||||
#
|
||||
# Notes on Mac OS Turkish:
|
||||
# ------------------------
|
||||
#
|
||||
# This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
|
||||
# environments, it is only supported via transcoding to and from
|
||||
# Unicode.
|
||||
#
|
||||
# Mac OS Turkish is used for Turkish.
|
||||
#
|
||||
# The Mac OS Turkish encoding shares the script code smRoman
|
||||
# (0) with the Mac OS Roman encoding. To determine if the Turkish
|
||||
# encoding is being used, you must also check if the system region
|
||||
# code is 24, verTurkey.
|
||||
#
|
||||
# This character set is a variant of standard Mac OS Roman. It adds
|
||||
# upper & lower G with breve, upper & lower S with cedilla, upper I
|
||||
# with dot, and moves the dotless lower i from its position at 0xF5
|
||||
# in standard Mac OS Roman to a position at 0xDD here (leaving the
|
||||
# 0xF5 code point undefined in Mac OS Turkish). This gives a total
|
||||
# of 7 code point differences from standard Mac OS Roman.
|
||||
#
|
||||
# Unicode mapping issues and notes:
|
||||
# ---------------------------------
|
||||
#
|
||||
# The following corporate zone Unicode characters are used in this
|
||||
# mapping:
|
||||
#
|
||||
# 0xF8A0 undefined1, used to map the single undefined code point
|
||||
# in Mac OS Turkish (to obtain roundtrip fidelity for all
|
||||
# code points).
|
||||
# 0xF8FF Apple logo
|
||||
#
|
||||
# NOTE: The graphic image associated with the Apple logo character
|
||||
# is not authorized for use without permission of Apple, and
|
||||
# unauthorized use might constitute trademark infringement.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n02 to version n03:
|
||||
#
|
||||
# - Change mapping of 0xBD from U+2126 to its canonical
|
||||
# decomposition, U+03A9.
|
||||
#
|
||||
##################
|
||||
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x005C # REVERSE SOLIDUS
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x007E # TILDE
|
||||
#
|
||||
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x83 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x84 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x85 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x86 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x87 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x88 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x89 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x8A 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x8B 0x00E3 # LATIN SMALL LETTER A WITH TILDE
|
||||
0x8C 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x8D 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x8E 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x8F 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x90 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x91 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x92 0x00ED # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x93 0x00EC # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x94 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x95 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x96 0x00F1 # LATIN SMALL LETTER N WITH TILDE
|
||||
0x97 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x98 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x99 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x9A 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x9B 0x00F5 # LATIN SMALL LETTER O WITH TILDE
|
||||
0x9C 0x00FA # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x9D 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x9E 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x9F 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0xA0 0x2020 # DAGGER
|
||||
0xA1 0x00B0 # DEGREE SIGN
|
||||
0xA2 0x00A2 # CENT SIGN
|
||||
0xA3 0x00A3 # POUND SIGN
|
||||
0xA4 0x00A7 # SECTION SIGN
|
||||
0xA5 0x2022 # BULLET
|
||||
0xA6 0x00B6 # PILCROW SIGN
|
||||
0xA7 0x00DF # LATIN SMALL LETTER SHARP S
|
||||
0xA8 0x00AE # REGISTERED SIGN
|
||||
0xA9 0x00A9 # COPYRIGHT SIGN
|
||||
0xAA 0x2122 # TRADE MARK SIGN
|
||||
0xAB 0x00B4 # ACUTE ACCENT
|
||||
0xAC 0x00A8 # DIAERESIS
|
||||
0xAD 0x2260 # NOT EQUAL TO
|
||||
0xAE 0x00C6 # LATIN CAPITAL LETTER AE
|
||||
0xAF 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0xB0 0x221E # INFINITY
|
||||
0xB1 0x00B1 # PLUS-MINUS SIGN
|
||||
0xB2 0x2264 # LESS-THAN OR EQUAL TO
|
||||
0xB3 0x2265 # GREATER-THAN OR EQUAL TO
|
||||
0xB4 0x00A5 # YEN SIGN
|
||||
0xB5 0x00B5 # MICRO SIGN
|
||||
0xB6 0x2202 # PARTIAL DIFFERENTIAL
|
||||
0xB7 0x2211 # N-ARY SUMMATION
|
||||
0xB8 0x220F # N-ARY PRODUCT
|
||||
0xB9 0x03C0 # GREEK SMALL LETTER PI
|
||||
0xBA 0x222B # INTEGRAL
|
||||
0xBB 0x00AA # FEMININE ORDINAL INDICATOR
|
||||
0xBC 0x00BA # MASCULINE ORDINAL INDICATOR
|
||||
0xBD 0x03A9 # GREEK CAPITAL LETTER OMEGA
|
||||
0xBE 0x00E6 # LATIN SMALL LETTER AE
|
||||
0xBF 0x00F8 # LATIN SMALL LETTER O WITH STROKE
|
||||
0xC0 0x00BF # INVERTED QUESTION MARK
|
||||
0xC1 0x00A1 # INVERTED EXCLAMATION MARK
|
||||
0xC2 0x00AC # NOT SIGN
|
||||
0xC3 0x221A # SQUARE ROOT
|
||||
0xC4 0x0192 # LATIN SMALL LETTER F WITH HOOK
|
||||
0xC5 0x2248 # ALMOST EQUAL TO
|
||||
0xC6 0x2206 # INCREMENT
|
||||
0xC7 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC8 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0xC9 0x2026 # HORIZONTAL ELLIPSIS
|
||||
0xCA 0x00A0 # NO-BREAK SPACE
|
||||
0xCB 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0xCC 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0xCD 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0xCE 0x0152 # LATIN CAPITAL LIGATURE OE
|
||||
0xCF 0x0153 # LATIN SMALL LIGATURE OE
|
||||
0xD0 0x2013 # EN DASH
|
||||
0xD1 0x2014 # EM DASH
|
||||
0xD2 0x201C # LEFT DOUBLE QUOTATION MARK
|
||||
0xD3 0x201D # RIGHT DOUBLE QUOTATION MARK
|
||||
0xD4 0x2018 # LEFT SINGLE QUOTATION MARK
|
||||
0xD5 0x2019 # RIGHT SINGLE QUOTATION MARK
|
||||
0xD6 0x00F7 # DIVISION SIGN
|
||||
0xD7 0x25CA # LOZENGE
|
||||
0xD8 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0xD9 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0xDA 0x011E # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0xDB 0x011F # LATIN SMALL LETTER G WITH BREVE
|
||||
0xDC 0x0130 # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0xDD 0x0131 # LATIN SMALL LETTER DOTLESS I
|
||||
0xDE 0x015E # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0xDF 0x015F # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0xE0 0x2021 # DOUBLE DAGGER
|
||||
0xE1 0x00B7 # MIDDLE DOT
|
||||
0xE2 0x201A # SINGLE LOW-9 QUOTATION MARK
|
||||
0xE3 0x201E # DOUBLE LOW-9 QUOTATION MARK
|
||||
0xE4 0x2030 # PER MILLE SIGN
|
||||
0xE5 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0xE6 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0xE7 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0xE8 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0xE9 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0xEA 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0xEB 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0xEC 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0xED 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0xEE 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0xEF 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0xF0 0xF8FF # Apple logo
|
||||
0xF1 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0xF2 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0xF3 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0xF4 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0xF5 0xF8A0 # undefined1
|
||||
0xF6 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0xF7 0x02DC # SMALL TILDE
|
||||
0xF8 0x00AF # MACRON
|
||||
0xF9 0x02D8 # BREVE
|
||||
0xFA 0x02D9 # DOT ABOVE
|
||||
0xFB 0x02DA # RING ABOVE
|
||||
0xFC 0x00B8 # CEDILLA
|
||||
0xFD 0x02DD # DOUBLE ACUTE ACCENT
|
||||
0xFE 0x02DB # OGONEK
|
||||
0xFF 0x02C7 # CARON
|
106
charmap/UKRAINE.TXT
Normal file
|
@ -0,0 +1,106 @@
|
|||
#=======================================================================
|
||||
# File name: UKRAINE.TXT
|
||||
#
|
||||
# Contents: Notes on Mac OS Ukrainian character set
|
||||
#
|
||||
# Copyright: (c) 1995-2002, 2005 by Apple Computer, Inc., all rights
|
||||
# reserved.
|
||||
#
|
||||
# Contact: charsets@apple.com
|
||||
#
|
||||
# Changes:
|
||||
#
|
||||
# c02 2005-Apr-05 Update header comments.
|
||||
# b3,c1 2002-Dec-19 Update URLs. Matches internal utom<b1>.
|
||||
# b02 1999-Sep-22 Encoding changed for Mac OS 9.0 to merge
|
||||
# with Mac OS Cyrillic and support EURO SIGN;
|
||||
# change mappings for 0xFF. For Mac OS 9.0
|
||||
# there is no longer a separate Mac OS
|
||||
# Ukrainian character set; the mappings are
|
||||
# in CYRILLIC.TXT. Update contact e-mail
|
||||
# address. Matches internal utom<b1>, ufrm<b1>,
|
||||
# and Text Encoding Converter version 1.5.
|
||||
# n04 1998-Feb-05 Update header comments to new format; no
|
||||
# mapping changes. Matches internal utom<2>,
|
||||
# ufrm<13>, and Text Encoding Converter
|
||||
# version 1.3.
|
||||
# n02 1995-Apr-15 First version (after fixing some typos).
|
||||
# Matches internal ufrm<4>.
|
||||
#
|
||||
# Standard header:
|
||||
# ----------------
|
||||
#
|
||||
# Apple, the Apple logo, and Macintosh are trademarks of Apple
|
||||
# Computer, Inc., registered in the United States and other countries.
|
||||
# Unicode is a trademark of Unicode Inc. For the sake of brevity,
|
||||
# throughout this document, "Macintosh" can be used to refer to
|
||||
# Macintosh computers and "Unicode" can be used to refer to the
|
||||
# Unicode standard.
|
||||
#
|
||||
# Apple Computer, Inc. ("Apple") makes no warranty or representation,
|
||||
# either express or implied, with respect to this document and the
|
||||
# included data, its quality, accuracy, or fitness for a particular
|
||||
# purpose. In no event will Apple be liable for direct, indirect,
|
||||
# special, incidental, or consequential damages resulting from any
|
||||
# defect or inaccuracy in this document or the included data.
|
||||
#
|
||||
# These mapping tables and character lists are subject to change.
|
||||
# The latest tables should be available from the following:
|
||||
#
|
||||
# <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
|
||||
#
|
||||
# For general information about Mac OS encodings and these mapping
|
||||
# tables, see the file "README.TXT".
|
||||
#
|
||||
# Notes on Mac OS Ukrainian and Mac OS Cyrillic:
|
||||
# ----------------------------------------------
|
||||
#
|
||||
# Before Mac OS 9.0, there were two separate Slavic Cyrillic
|
||||
# encodings for the Mac OS:
|
||||
#
|
||||
# 1. The Cyrillic currency sign variant (used for localized Russian
|
||||
# and Bulgarian systems), which had the following:
|
||||
# 0xA2 U+00A2 CENT SIGN
|
||||
# 0xB6 U+2202 PARTIAL DIFFERENTIAL
|
||||
# 0xFF U+00A4 CURRENCY SIGN
|
||||
#
|
||||
# 2. The Ukrainian currency sign variant (used for localized Ukrainian
|
||||
# systems and the pre-9.0 Cyrillic Language Kit), which had the
|
||||
# following:
|
||||
# 0xA2 U+0490 CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
# 0xB6 U+0491 CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
# 0xFF U+00A4 CURRENCY SIGN
|
||||
#
|
||||
# Before Mac OS 9.0, The Ukrainian currency sign variant shared the
|
||||
# script code smCyrillic (7) with the Cyrillic currency sign variant.
|
||||
# The Ukrainian currency sign variant was being used if one of the
|
||||
# following was true:
|
||||
# - The system region code was 62, verUkraine (indicates Ukrainian
|
||||
# localized system), or
|
||||
# - The system script was not 7, smCyrillic (indicates Cyrillic
|
||||
# Language Kit instead of localized system).
|
||||
#
|
||||
# For Mac OS 9.0 and later, both currency sign variants were replaced
|
||||
# with a new Euro sign version of Mac OS Cyrillic, which is similar to
|
||||
# the old Ukrainian currency sign variant but changes 0xFF to EURO
|
||||
# SIGN. Mappings for this are in CYRILLIC.TXT.
|
||||
#
|
||||
# Note: There is a common glyph variation in Ukrainian, in which the
|
||||
# glyph for CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I may or
|
||||
# may not have a dot above.
|
||||
#
|
||||
# Details of mapping changes in each version:
|
||||
# -------------------------------------------
|
||||
#
|
||||
# Changes from version n04 to version b02:
|
||||
#
|
||||
# - Encoding changed for Mac OS 9.0 to merge with Mac OS Cyrillic and
|
||||
# support EURO SIGN; 0xFF changed from U+00A4 to U+20AC. For Mac OS
|
||||
# 9.0 there is no longer a separate Mac OS Ukrainian character set, so
|
||||
# the mappings here are deleted; see the mappings in CYRILLIC.TXT.
|
||||
#
|
||||
##################
|
||||
|
||||
##################
|
||||
# For mappings, see CYRILLIC.TXT
|
||||
##################
|
59
convert.c
|
@ -1,59 +0,0 @@
|
|||
// convert.c - Conversion helper functions.
|
||||
#include "convert.h"
|
||||
|
||||
#include "defs.h"
|
||||
#include "mac_from_unix_data.h"
|
||||
|
||||
#include <CursorCtl.h>
|
||||
#include <Files.h>
|
||||
#include <MacErrors.h>
|
||||
#include <Quickdraw.h>
|
||||
|
||||
int convert_read(short ref, long *count, void *data) {
|
||||
OSErr err;
|
||||
|
||||
SpinCursor(1);
|
||||
err = FSRead(ref, count, data);
|
||||
switch (err) {
|
||||
case noErr:
|
||||
return kConvertOK;
|
||||
case eofErr:
|
||||
return kConvertEOF;
|
||||
default:
|
||||
print_errcode(err, "could not read source file");
|
||||
return kConvertError;
|
||||
}
|
||||
}
|
||||
|
||||
int convert_write(short ref, long count, const void *data) {
|
||||
OSErr err;
|
||||
|
||||
SpinCursor(1);
|
||||
err = FSWrite(ref, &count, data);
|
||||
if (err == noErr) {
|
||||
return kConvertOK;
|
||||
}
|
||||
print_errcode(err, "could not write temp file");
|
||||
return kConvertError;
|
||||
}
|
||||
|
||||
static unsigned short *gFromUnixData;
|
||||
|
||||
// Get the table for converting from Unix to Macintosh.
|
||||
unsigned short *mac_from_unix_data(void) {
|
||||
Ptr ptr, src, dest;
|
||||
|
||||
if (gFromUnixData != NULL) {
|
||||
return gFromUnixData;
|
||||
}
|
||||
ptr = NewPtr(FROM_UNIX_DATALEN);
|
||||
if (ptr == NULL) {
|
||||
print_memerr(FROM_UNIX_DATALEN);
|
||||
return NULL;
|
||||
}
|
||||
src = (void *)kFromUnixData;
|
||||
dest = ptr;
|
||||
UnpackBits(&src, &dest, FROM_UNIX_DATALEN);
|
||||
gFromUnixData = (void *)ptr;
|
||||
return gFromUnixData;
|
||||
}
|
53
convert.h
|
@ -1,53 +0,0 @@
|
|||
// These helper functions are written so the conversion functions can be written
|
||||
// for a standard C environment without using Macintosh Toolbox functions.
|
||||
|
||||
enum {
|
||||
// Base size of temporary buffer for converting files, not counting the
|
||||
// "extra".
|
||||
kBufferBaseSize = 16 * 1024,
|
||||
|
||||
// Extra space past the end of the buffer for converting files.
|
||||
kBufferExtraSize = 16,
|
||||
|
||||
// Total size of a buffer.
|
||||
kBufferTotalSize = kBufferBaseSize + kBufferExtraSize,
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Helper functions
|
||||
// =============================================================================
|
||||
|
||||
// Result codes for convert_read and convert_write.
|
||||
enum {
|
||||
kConvertOK,
|
||||
kConvertError,
|
||||
kConvertEOF,
|
||||
};
|
||||
|
||||
// Read data from a file.
|
||||
int convert_read(short ref, long *count, void *data);
|
||||
|
||||
// Write data to a file.
|
||||
int convert_write(short ref, long count, const void *data);
|
||||
|
||||
// Get the table for converting from Unix to Macintosh.
|
||||
unsigned short *mac_from_unix_data(void);
|
||||
|
||||
// =============================================================================
|
||||
// Conversion functions
|
||||
// =============================================================================
|
||||
|
||||
// Convert Macintosh encoding with CR line endings to UTF-8 with LF. The source
|
||||
// and destinations are file handles. The buffers have size buf
|
||||
int mac_to_unix(short srcRef, short destRef, void *srcBuf, void *destBuf);
|
||||
|
||||
// Convert UTF-8 with LF line endings to Macintosh encoding with CR. The source
|
||||
// and destinations are file handles. The buffers have size kBufferTotalSize.
|
||||
int mac_from_unix(short srcRef, short destRef, void *srcBuf, void *destBuf);
|
||||
|
||||
// Raw data copy.
|
||||
int copy_data(short srcRef, short destRef, void *buf);
|
||||
|
||||
// Convert line endings but don't change encoding.
|
||||
int convert_line_endings(short srcRef, short destRef, void *buf,
|
||||
unsigned char srcEnding, unsigned char destEnding);
|
1
convert/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
charmap*
|
54
convert/BUILD.bazel
Normal file
|
@ -0,0 +1,54 @@
|
|||
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
|
||||
load("//bazel:copts.bzl", "COPTS")
|
||||
|
||||
genrule(
|
||||
name = "data",
|
||||
srcs = [
|
||||
"//charmap:data",
|
||||
"//scripts:data",
|
||||
],
|
||||
outs = [
|
||||
"charmap_data.c",
|
||||
"charmap_info.c",
|
||||
"charmap_region.c",
|
||||
"charmap.r",
|
||||
],
|
||||
cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet -format=false",
|
||||
tools = [
|
||||
"//gen:macscript",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "convert",
|
||||
srcs = [
|
||||
"charmap_data.c",
|
||||
"charmap_info.c",
|
||||
"charmap_region.c",
|
||||
"convert.c",
|
||||
"convert_1f.c",
|
||||
"convert_1r.c",
|
||||
],
|
||||
hdrs = [
|
||||
"convert.h",
|
||||
"data.h",
|
||||
],
|
||||
copts = COPTS,
|
||||
deps = [
|
||||
"//lib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "convert_test",
|
||||
size = "small",
|
||||
srcs = [
|
||||
"convert_test.c",
|
||||
],
|
||||
copts = COPTS,
|
||||
deps = [
|
||||
":convert",
|
||||
"//lib",
|
||||
"//lib:test",
|
||||
],
|
||||
)
|
10
convert/README.md
Normal file
|
@ -0,0 +1,10 @@
|
|||
# Converter
|
||||
|
||||
## Debugging
|
||||
|
||||
Tests can be debugged with GDB:
|
||||
|
||||
```shell
|
||||
bazel build -c dbg //src:convert_test
|
||||
gdb -ex 'dir .' -ex 'cd bazel-bin' bazel-bin/src/convert_test
|
||||
```
|
42
convert/convert.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
#include "convert/convert.h"
|
||||
|
||||
struct ConvertEngine {
|
||||
ConvertBuildf build;
|
||||
ConvertRunf run;
|
||||
};
|
||||
|
||||
const struct ConvertEngine kEngines[][2] = {
|
||||
{{Convert1fBuild, Convert1fRun}, {Convert1rBuild, Convert1rRun}}};
|
||||
|
||||
int ConverterBuild(struct Converter *c, Handle data, Size datasz,
|
||||
ConvertDirection direction)
|
||||
{
|
||||
int engine;
|
||||
const struct ConvertEngine *funcs;
|
||||
Handle out;
|
||||
ErrorCode err;
|
||||
|
||||
if (datasz == 0) {
|
||||
return kErrorBadData;
|
||||
}
|
||||
engine = (UInt8)(**data) - 1;
|
||||
if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) {
|
||||
// Invalid engine.
|
||||
return kErrorBadData;
|
||||
}
|
||||
funcs = &kEngines[engine][direction];
|
||||
if (funcs->build == NULL || funcs->run == NULL) {
|
||||
// Invalid engine.
|
||||
return kErrorBadData;
|
||||
}
|
||||
err = funcs->build(&out, data, datasz);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
c->data = out;
|
||||
c->run = funcs->run;
|
||||
return 0;
|
||||
}
|
82
convert/convert.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
#ifndef CONVERT_CONVERT_H
|
||||
#define CONVERT_CONVERT_H
|
||||
// convert.h - character set conversion routines.
|
||||
|
||||
#include "lib/defs.h"
|
||||
#include "lib/error.h"
|
||||
|
||||
enum {
|
||||
// Constants for CR and LF. Note that we should not use '\n' or '\r'
|
||||
// anywhere, because these character constants may have unexpected values on
|
||||
// certain old Mac OS compilers, depending on the compiler settings. In
|
||||
// particular, the values of '\n' and '\r' will be swapped.
|
||||
kCharLF = 10,
|
||||
kCharCR = 13,
|
||||
|
||||
// Constant for substitution character: '?'.
|
||||
kCharSubstitute = 63
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
// Don't translite line breaks.
|
||||
kLineBreakKeep,
|
||||
|
||||
// Convert line breaks to LF.
|
||||
kLineBreakLF,
|
||||
|
||||
// Convert line breaks to CR.
|
||||
kLineBreakCR,
|
||||
|
||||
// Convert line breaks to CR LF.
|
||||
kLineBreakCRLF
|
||||
} LineBreakConversion;
|
||||
|
||||
// Directions that the converter runs in.
|
||||
typedef enum {
|
||||
kToUTF8,
|
||||
kFromUTF8,
|
||||
} ConvertDirection;
|
||||
|
||||
// Get the character map used for the given Mac OS script and region codes.
|
||||
// Return -1 if no known character map exists.
|
||||
int GetCharmap(int script, int region);
|
||||
|
||||
// The state of a converter. Must be zeroed prior to first conversion.
|
||||
struct ConverterState {
|
||||
UInt32 data;
|
||||
};
|
||||
|
||||
// Implementation function for building a converter.
|
||||
typedef ErrorCode (*ConvertBuildf)(Handle *out, Handle data, Size datasz);
|
||||
|
||||
// Implementation function for running a converter.
|
||||
typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc,
|
||||
struct ConverterState *stateptr, UInt8 **optr,
|
||||
UInt8 *oend, const UInt8 **iptr, const UInt8 *iend);
|
||||
|
||||
// A converter. The converter can be freed by disposing the handle.
|
||||
struct Converter {
|
||||
Handle data;
|
||||
ConvertRunf run;
|
||||
};
|
||||
|
||||
// Build a converter from the given conversion table data.
|
||||
int ConverterBuild(struct Converter *c, Handle data, Size datasz,
|
||||
ConvertDirection direction);
|
||||
|
||||
// Engine 1: extended ASCII.
|
||||
|
||||
ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz);
|
||||
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
|
||||
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
|
||||
const UInt8 **iptr, const UInt8 *iend);
|
||||
|
||||
ErrorCode Convert1rBuild(Handle *out, Handle data, Size datasz);
|
||||
void Convert1rRun(const void *cvtptr, LineBreakConversion lc,
|
||||
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
|
||||
const UInt8 **iptr, const UInt8 *iend);
|
||||
|
||||
#endif
|
130
convert/convert_1f.c
Normal file
|
@ -0,0 +1,130 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
|
||||
// convert_1f.c - Forward conversion from extended ASCII to UTF-8.
|
||||
#include "convert/convert.h"
|
||||
#include "lib/defs.h"
|
||||
|
||||
struct Convert1fData {
|
||||
// Unicode characters, encoded in UTF-8, and packed MSB first. Always either
|
||||
// 2 bytes or 3 bytes.
|
||||
UInt32 chars[128];
|
||||
};
|
||||
|
||||
struct Convert1fState {
|
||||
UInt8 lastch;
|
||||
};
|
||||
|
||||
ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz)
|
||||
{
|
||||
Handle h;
|
||||
struct Convert1fData *cvt;
|
||||
int i, n;
|
||||
UInt32 uch;
|
||||
const UInt8 *dptr, *dend;
|
||||
|
||||
h = NewHandle(sizeof(struct Convert1fData));
|
||||
if (h == NULL) {
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
cvt = (void *)*h;
|
||||
dptr = (void *)*data;
|
||||
dptr++;
|
||||
dend = dptr + datasz;
|
||||
for (i = 0; i < 128; i++) {
|
||||
if (dptr == dend) {
|
||||
goto bad_table;
|
||||
}
|
||||
n = *dptr++;
|
||||
if (n < 2 || 3 < n) {
|
||||
goto bad_table;
|
||||
}
|
||||
if (dend - dptr < n) {
|
||||
goto bad_table;
|
||||
}
|
||||
uch = 0;
|
||||
while (n-- > 0) {
|
||||
uch = (uch << 8) | *dptr++;
|
||||
}
|
||||
cvt->chars[i] = uch;
|
||||
if (dptr == dend) {
|
||||
goto bad_table;
|
||||
}
|
||||
n = *dptr++;
|
||||
if (dend - dptr < n) {
|
||||
goto bad_table;
|
||||
}
|
||||
dptr += n;
|
||||
}
|
||||
*out = h;
|
||||
return 0;
|
||||
|
||||
bad_table:
|
||||
DisposeHandle(h);
|
||||
return kErrorBadData;
|
||||
}
|
||||
|
||||
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
|
||||
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
|
||||
const UInt8 **iptr, const UInt8 *iend)
|
||||
{
|
||||
const struct Convert1fData *cvt = cvtptr;
|
||||
struct Convert1fState *state = (struct Convert1fState *)stateptr;
|
||||
UInt8 *opos = *optr;
|
||||
const UInt8 *ipos = *iptr;
|
||||
unsigned ch, lastch;
|
||||
UInt32 uch;
|
||||
|
||||
ch = state->lastch;
|
||||
while (ipos < iend && oend - opos >= 3) {
|
||||
lastch = ch;
|
||||
ch = *ipos++;
|
||||
if (ch < 128) {
|
||||
if (ch == kCharLF || ch == kCharCR) {
|
||||
// Line breaks.
|
||||
if (ch == kCharLF && lastch == kCharCR) {
|
||||
if (lc == kLineBreakKeep) {
|
||||
*opos++ = ch;
|
||||
}
|
||||
} else {
|
||||
switch (lc) {
|
||||
case kLineBreakKeep:
|
||||
*opos++ = ch;
|
||||
break;
|
||||
case kLineBreakLF:
|
||||
*opos++ = kCharLF;
|
||||
break;
|
||||
case kLineBreakCR:
|
||||
*opos++ = kCharCR;
|
||||
break;
|
||||
case kLineBreakCRLF:
|
||||
*opos++ = kCharCR;
|
||||
*opos++ = kCharLF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// ASCII characters.
|
||||
*opos++ = ch;
|
||||
}
|
||||
} else {
|
||||
// Unicode characters.
|
||||
uch = cvt->chars[ch - 128];
|
||||
if (uch > 0xffff) {
|
||||
opos[0] = uch >> 16;
|
||||
opos[1] = uch >> 8;
|
||||
opos[2] = uch;
|
||||
opos += 3;
|
||||
} else {
|
||||
opos[0] = uch >> 8;
|
||||
opos[1] = uch;
|
||||
opos += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
state->lastch = ch;
|
||||
|
||||
*optr = opos;
|
||||
*iptr = ipos;
|
||||
}
|
362
convert/convert_1r.c
Normal file
|
@ -0,0 +1,362 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
|
||||
// convert_1r.c - Reverse conversion from UTF-8 to extended ASCII.
|
||||
#include "convert/convert.h"
|
||||
#include "lib/defs.h"
|
||||
|
||||
enum {
|
||||
// Maximum length of encoded character.
|
||||
kMaxEncodedLength = 8,
|
||||
|
||||
// Initial number of nodes to allocate when building the tree.
|
||||
kInitialTableAlloc = 8
|
||||
};
|
||||
|
||||
struct TEntry {
|
||||
// The output character, or zero if no output.
|
||||
UInt8 output;
|
||||
// The next node, or zero if no next node.
|
||||
UInt8 next;
|
||||
};
|
||||
|
||||
// A node for building the converter.
|
||||
struct TNode {
|
||||
struct TEntry entries[256];
|
||||
};
|
||||
|
||||
struct TTree {
|
||||
struct TNode **nodes;
|
||||
int count;
|
||||
};
|
||||
|
||||
static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
|
||||
{
|
||||
struct TNode **nodes, *node;
|
||||
int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc;
|
||||
unsigned ch;
|
||||
|
||||
// Create a tree with a root node mapping all the ASCII characters except
|
||||
// NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as
|
||||
// no output. CR and LF are removed so they can be handled specially be the
|
||||
// decoder.
|
||||
nodes =
|
||||
(struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode));
|
||||
if (nodes == NULL) {
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
nodecount = 1;
|
||||
nodealloc = kInitialTableAlloc;
|
||||
node = *nodes;
|
||||
MemClear(node, sizeof(struct TNode));
|
||||
for (i = 0; i < 128; i++) {
|
||||
node->entries[i].output = i;
|
||||
}
|
||||
node->entries[kCharLF].output = 0;
|
||||
node->entries[kCharCR].output = 0;
|
||||
|
||||
// Parse the table data and build up a tree of TNode.
|
||||
dpos = 1;
|
||||
// For each high character (128..255).
|
||||
for (i = 0; i < 128; i++) {
|
||||
// For each encoding of that character.
|
||||
for (j = 0; j < 2; j++) {
|
||||
if (dpos >= datasz) {
|
||||
goto bad_table;
|
||||
}
|
||||
enclen = (UInt8)(*data)[dpos++];
|
||||
if (enclen != 0) {
|
||||
if (enclen < 2 || enclen > datasz - dpos ||
|
||||
enclen > kMaxEncodedLength) {
|
||||
goto bad_table;
|
||||
}
|
||||
// Iterate over all but last byte in encoding, to find the node
|
||||
// which will produce the decoded byte as output.
|
||||
state = 0;
|
||||
node = *nodes;
|
||||
for (encend = dpos + enclen - 1; dpos < encend; dpos++) {
|
||||
ch = (UInt8)(*data)[dpos];
|
||||
cur = state;
|
||||
state = node->entries[ch].next;
|
||||
if (state == 0) {
|
||||
if (nodecount >= nodealloc) {
|
||||
nodealloc *= 2;
|
||||
if (!ResizeHandle(
|
||||
(Handle)nodes,
|
||||
nodealloc * sizeof(struct TNode))) {
|
||||
DisposeHandle((Handle)nodes);
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
node = *nodes + cur;
|
||||
}
|
||||
state = nodecount++;
|
||||
node->entries[ch].next = state;
|
||||
node = (*nodes) + state;
|
||||
MemClear(node, sizeof(*node));
|
||||
} else {
|
||||
node = *nodes + state;
|
||||
}
|
||||
}
|
||||
ch = (UInt8)(*data)[dpos++];
|
||||
if (node->entries[ch].output != 0) {
|
||||
goto bad_table;
|
||||
}
|
||||
node->entries[ch].output = i | 0x80;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!ResizeHandle((Handle)nodes, nodecount * sizeof(struct TNode))) {
|
||||
DisposeHandle((Handle)nodes);
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
tree->nodes = nodes;
|
||||
tree->count = nodecount;
|
||||
return 0;
|
||||
|
||||
bad_table:
|
||||
DisposeHandle((Handle)nodes);
|
||||
return kErrorBadData;
|
||||
}
|
||||
|
||||
struct NodeInfo {
|
||||
UInt8 min;
|
||||
UInt8 max;
|
||||
UInt16 offset;
|
||||
};
|
||||
|
||||
struct CEntry {
|
||||
UInt16 output;
|
||||
UInt16 next;
|
||||
};
|
||||
|
||||
// A compressed table node. Followed by an array of centry.
|
||||
struct CNode {
|
||||
// First byte in table.
|
||||
UInt8 base;
|
||||
// Number of entries in table, minus one.
|
||||
UInt8 span;
|
||||
};
|
||||
|
||||
static ErrorCode CompactTree(Handle *out, struct TNode **nodes, int nodecount)
|
||||
{
|
||||
Handle ctree;
|
||||
struct TNode *node;
|
||||
struct NodeInfo **infos, *info;
|
||||
struct CNode *cnode;
|
||||
struct CEntry *centry;
|
||||
int i, j, min, max, count, next;
|
||||
unsigned offset;
|
||||
|
||||
// Figure out where each compacted node will go.
|
||||
infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount);
|
||||
if (infos == NULL) {
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
offset = 0;
|
||||
for (i = 0; i < nodecount; i++) {
|
||||
node = *nodes + i;
|
||||
min = 0;
|
||||
while (node->entries[min].output == 0 && node->entries[min].next == 0) {
|
||||
min++;
|
||||
}
|
||||
max = 255;
|
||||
while (node->entries[max].output == 0 && node->entries[max].next == 0) {
|
||||
max--;
|
||||
}
|
||||
info = *infos + i;
|
||||
info->min = min;
|
||||
info->max = max;
|
||||
info->offset = offset;
|
||||
count = max - min + 1;
|
||||
offset += sizeof(struct CNode) + count * sizeof(struct CEntry);
|
||||
}
|
||||
|
||||
// Create the compacted tree.
|
||||
ctree = NewHandle(offset);
|
||||
if (ctree == NULL) {
|
||||
DisposeHandle((Handle)infos);
|
||||
return kErrorNoMemory;
|
||||
}
|
||||
for (i = 0; i < nodecount; i++) {
|
||||
node = *nodes + i;
|
||||
info = *infos + i;
|
||||
min = info->min;
|
||||
max = info->max;
|
||||
offset = info->offset;
|
||||
cnode = (void *)(*ctree + offset);
|
||||
cnode->base = min;
|
||||
cnode->span = max - min;
|
||||
centry = (void *)(*ctree + offset + sizeof(struct CNode));
|
||||
for (j = min; j <= max; j++) {
|
||||
centry->output = node->entries[j].output;
|
||||
next = node->entries[j].next;
|
||||
if (next != 0) {
|
||||
next = (*infos)[next].offset;
|
||||
}
|
||||
centry->next = next;
|
||||
centry++;
|
||||
}
|
||||
}
|
||||
|
||||
DisposeHandle((Handle)infos);
|
||||
*out = ctree;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ErrorCode Convert1rBuild(Handle *out, Handle data, Size datasz)
|
||||
{
|
||||
struct TTree table;
|
||||
ErrorCode err;
|
||||
|
||||
err = CreateTree(&table, data, datasz);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
err = CompactTree(out, table.nodes, table.count);
|
||||
DisposeHandle((Handle)table.nodes);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct Convert1rState {
|
||||
UInt8 lastch;
|
||||
UInt8 output;
|
||||
UInt16 tableoffset;
|
||||
};
|
||||
|
||||
void Convert1rRun(const void *cvtptr, LineBreakConversion lc,
|
||||
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
|
||||
const UInt8 **iptr, const UInt8 *iend)
|
||||
{
|
||||
struct Convert1rState *state = (struct Convert1rState *)stateptr;
|
||||
const struct CNode *node;
|
||||
const struct CEntry *entry;
|
||||
UInt8 *opos = *optr;
|
||||
const UInt8 *ipos = *iptr, *savein;
|
||||
unsigned ch, lastch, chlen, output, saveout, toffset, savetoffset;
|
||||
|
||||
ch = state->lastch;
|
||||
savein = ipos;
|
||||
saveout = state->output;
|
||||
toffset = state->tableoffset;
|
||||
savetoffset = toffset;
|
||||
if (oend - opos < 2) {
|
||||
goto done;
|
||||
}
|
||||
goto resume;
|
||||
|
||||
next_out:
|
||||
if (oend - opos < 2) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Follow state machine to the end.
|
||||
savein = ipos;
|
||||
saveout = 0;
|
||||
toffset = 0;
|
||||
savetoffset = 0;
|
||||
resume:
|
||||
for (;;) {
|
||||
if (ipos >= iend) {
|
||||
goto done;
|
||||
}
|
||||
lastch = ch;
|
||||
ch = *ipos++;
|
||||
|
||||
node = (const void *)((const UInt8 *)cvtptr + toffset);
|
||||
ch -= node->base;
|
||||
if (ch > node->span) {
|
||||
toffset = 0;
|
||||
goto bad_char;
|
||||
}
|
||||
entry =
|
||||
(const void *)((const UInt8 *)cvtptr + toffset +
|
||||
sizeof(struct CNode) + ch * sizeof(struct CEntry));
|
||||
output = entry->output;
|
||||
toffset = entry->next;
|
||||
if (toffset == 0) {
|
||||
// Reached end of tree.
|
||||
if (output == 0) {
|
||||
goto bad_char;
|
||||
}
|
||||
*opos++ = output;
|
||||
goto next_out;
|
||||
}
|
||||
if (output != 0) {
|
||||
// Can produce output here, or can consume more input. We try
|
||||
// consuming more input, but save the state to rewind if that fails.
|
||||
savein = ipos;
|
||||
saveout = output;
|
||||
savetoffset = toffset;
|
||||
}
|
||||
}
|
||||
|
||||
bad_char:
|
||||
// Bad character. Back up and try again.
|
||||
ipos = savein;
|
||||
if (saveout != 0) {
|
||||
// Produce saved output.
|
||||
*opos++ = saveout;
|
||||
ch = 0;
|
||||
} else {
|
||||
// No saved output, this really is a bad character. Consume one UTF-8
|
||||
// character, emit it as a fallback, and continue.
|
||||
ch = *ipos++;
|
||||
if ((ch & 0x80) == 0) {
|
||||
// ASCII character: either NUL, CR, or LF, because only these
|
||||
// characters will result in a transition to state 0.
|
||||
if (ch == 0) {
|
||||
*opos++ = ch;
|
||||
} else if (ch == kCharLF && lastch == kCharCR) {
|
||||
if (lc == kLineBreakKeep) {
|
||||
*opos++ = ch;
|
||||
}
|
||||
} else {
|
||||
switch (lc) {
|
||||
case kLineBreakKeep:
|
||||
*opos++ = ch;
|
||||
break;
|
||||
case kLineBreakLF:
|
||||
*opos++ = kCharLF;
|
||||
break;
|
||||
case kLineBreakCR:
|
||||
*opos++ = kCharCR;
|
||||
break;
|
||||
case kLineBreakCRLF:
|
||||
*opos++ = kCharCR;
|
||||
*opos++ = kCharLF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((ch & 0xe0) == 0xc0) {
|
||||
chlen = 1;
|
||||
} else if ((ch & 0xf0) == 0xe0) {
|
||||
chlen = 2;
|
||||
} else if ((ch & 0xf8) == 0xf0) {
|
||||
chlen = 3;
|
||||
} else {
|
||||
chlen = 0;
|
||||
}
|
||||
for (; chlen > 0; chlen--) {
|
||||
if (ipos == iend) {
|
||||
goto done;
|
||||
}
|
||||
ch = *ipos;
|
||||
if ((ch & 0xc0) != 0x80) {
|
||||
break;
|
||||
}
|
||||
ipos++;
|
||||
}
|
||||
*opos++ = kCharSubstitute;
|
||||
}
|
||||
}
|
||||
goto next_out;
|
||||
|
||||
done:
|
||||
state->lastch = ch;
|
||||
state->output = saveout;
|
||||
state->tableoffset = savetoffset;
|
||||
*optr = opos;
|
||||
*iptr = savein;
|
||||
}
|
253
convert/convert_test.c
Normal file
|
@ -0,0 +1,253 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
#include "convert/convert.h"
|
||||
#include "convert/data.h"
|
||||
#include "lib/test.h"
|
||||
#include "lib/util.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
enum {
|
||||
kInitialBufSize = 4 * 1024,
|
||||
kConvertBufferSize = 1024,
|
||||
};
|
||||
|
||||
static UInt8 *gBuffer[3];
|
||||
|
||||
static void PrintQuotedString(const UInt8 *buf, int len)
|
||||
{
|
||||
int i, c;
|
||||
|
||||
fputc('"', stderr);
|
||||
for (i = 0; i < len; i++) {
|
||||
c = buf[i];
|
||||
if (32 <= c && c <= 126) {
|
||||
if (c == '\\' || c == '"') {
|
||||
fputc('\\', stderr);
|
||||
}
|
||||
fputc(c, stderr);
|
||||
} else {
|
||||
fprintf(stderr, "\\x%02x", c);
|
||||
}
|
||||
}
|
||||
fputc('"', stderr);
|
||||
}
|
||||
|
||||
static void Check(const void *exbuf, int exlen, const void *inbuf, int inlen,
|
||||
const void *outbuf, int outlen)
|
||||
{
|
||||
int i, n, col, diffcol, c1, c2;
|
||||
|
||||
if (exlen == outlen && memcmp(exbuf, outbuf, outlen) == 0) {
|
||||
return;
|
||||
}
|
||||
Failf("incorrect output");
|
||||
n = exlen;
|
||||
if (n > outlen) {
|
||||
n = outlen;
|
||||
}
|
||||
diffcol = -1;
|
||||
col = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
c1 = ((const UInt8 *)exbuf)[i];
|
||||
c2 = ((const UInt8 *)outbuf)[i];
|
||||
if (c1 != c2) {
|
||||
diffcol = col;
|
||||
break;
|
||||
}
|
||||
if (32 <= c1 && c1 <= 126) {
|
||||
col++;
|
||||
if (c1 == '\\' || c1 == '"') {
|
||||
col++;
|
||||
}
|
||||
} else {
|
||||
col += 4;
|
||||
}
|
||||
}
|
||||
fputs("Input: ", stderr);
|
||||
PrintQuotedString(inbuf, inlen);
|
||||
fputc('\n', stderr);
|
||||
fputs("Expect: ", stderr);
|
||||
PrintQuotedString(exbuf, exlen);
|
||||
fputc('\n', stderr);
|
||||
fputs("Output: ", stderr);
|
||||
PrintQuotedString(outbuf, outlen);
|
||||
fputc('\n', stderr);
|
||||
if (diffcol >= 0) {
|
||||
for (i = 0; i < diffcol + 9; i++) {
|
||||
fputc(' ', stderr);
|
||||
}
|
||||
fputc('^', stderr);
|
||||
}
|
||||
fputc('\n', stderr);
|
||||
}
|
||||
|
||||
static const char *const kLineBreakData[4] = {
|
||||
"Line Break\nA\n\nB\rC\r\rD\r\nE\r\n\r\n",
|
||||
"Line Break\nA\n\nB\nC\n\nD\nE\n\n",
|
||||
"Line Break\rA\r\rB\rC\r\rD\rE\r\r",
|
||||
"Line Break\r\nA\r\n\r\nB\r\nC\r\n\r\nD\r\nE\r\n\r\n",
|
||||
};
|
||||
|
||||
static const char *const kLineBreakName[4] = {"keep", "LF", "CR", "CRLF"};
|
||||
|
||||
static void TestConverter(const char *name, struct CharmapData data)
|
||||
{
|
||||
Ptr datap;
|
||||
Handle datah;
|
||||
struct Converter cf, cr, cc;
|
||||
struct ConverterState st;
|
||||
int i, j, k, jmax, len0, len1, len2;
|
||||
UInt8 *ptr;
|
||||
const UInt8 *iptr, *iend, *istart;
|
||||
UInt8 *optr, *oend;
|
||||
int lblen[4];
|
||||
ErrorCode err;
|
||||
|
||||
cf.data = NULL;
|
||||
cr.data = NULL;
|
||||
|
||||
SetTestName(name);
|
||||
|
||||
// Load the converter into memory and build the conversion table.
|
||||
datap = (void *)data.ptr;
|
||||
datah = &datap;
|
||||
err = ConverterBuild(&cf, datah, data.size, kToUTF8);
|
||||
if (err != 0) {
|
||||
Failf("ConverterBuild: to UTF-8: %s", ErrorDescriptionOrDie(err));
|
||||
goto done;
|
||||
}
|
||||
err = ConverterBuild(&cr, datah, data.size, kFromUTF8);
|
||||
if (err != 0) {
|
||||
Failf("ConverterBuild: from UTF-8: %s", ErrorDescriptionOrDie(err));
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Create sample data to convert: 0-255, followed by 0.
|
||||
len0 = 257;
|
||||
ptr = gBuffer[0];
|
||||
for (i = 0; i < 256; i++) {
|
||||
ptr[i] = i;
|
||||
}
|
||||
ptr[256] = 0;
|
||||
|
||||
// Convert sample data.
|
||||
iptr = gBuffer[0];
|
||||
iend = iptr + 257;
|
||||
optr = gBuffer[1];
|
||||
oend = optr + kConvertBufferSize;
|
||||
st.data = 0;
|
||||
cf.run(*cf.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
|
||||
if (iptr != iend) {
|
||||
Failf("some data failed to convert");
|
||||
goto done;
|
||||
}
|
||||
len1 = optr - gBuffer[1];
|
||||
|
||||
// Convert back, in three calls. The middle call will be to a 1-4 byte slice
|
||||
// in the middle.
|
||||
for (i = 1; i < len1 - 2; i++) {
|
||||
jmax = len1 - i;
|
||||
if (jmax > 4) {
|
||||
jmax = 4;
|
||||
}
|
||||
for (j = 1; j <= jmax; j++) {
|
||||
SetTestNamef("%s reverse i=%d j=%d", name, i, j);
|
||||
st.data = 0;
|
||||
iptr = gBuffer[1];
|
||||
optr = gBuffer[2];
|
||||
oend = optr + kConvertBufferSize;
|
||||
iend = gBuffer[1] + i;
|
||||
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
|
||||
iend = gBuffer[1] + i + j;
|
||||
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
|
||||
iend = gBuffer[1] + len1;
|
||||
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
|
||||
if (iptr != iend) {
|
||||
Failf("some data failed to convert");
|
||||
} else {
|
||||
len2 = optr - gBuffer[2];
|
||||
Check(gBuffer[0], len0, gBuffer[1], len1, gBuffer[2], len2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
lblen[i] = strlen(kLineBreakData[i]) + 1;
|
||||
}
|
||||
istart = (const UInt8 *)kLineBreakData[0];
|
||||
for (k = 0; k < 2; k++) {
|
||||
cc = k == 0 ? cf : cr;
|
||||
for (i = 0; i < 4; i++) {
|
||||
len1 = lblen[0]; // Input data
|
||||
len0 = lblen[i]; // Expected output
|
||||
for (j = 1; j < len1; j++) {
|
||||
SetTestNamef("%s %s linebreak %s split=%d", name,
|
||||
k == 0 ? "forward" : "backward", kLineBreakName[i],
|
||||
j);
|
||||
st.data = 0;
|
||||
iptr = istart;
|
||||
optr = gBuffer[0];
|
||||
oend = optr + kConvertBufferSize;
|
||||
iend = istart + j;
|
||||
cc.run(*cc.data, i, &st, &optr, oend, &iptr, iend);
|
||||
iend = istart + len1;
|
||||
cc.run(*cc.data, i, &st, &optr, oend, &iptr, iend);
|
||||
if (iptr != iend) {
|
||||
Failf("some data failed to convert");
|
||||
} else {
|
||||
len2 = optr - gBuffer[0];
|
||||
Check(kLineBreakData[i], len0, kLineBreakData[0], len1,
|
||||
gBuffer[0], len2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (cf.data != NULL) {
|
||||
DisposeHandle(cf.data);
|
||||
}
|
||||
if (cr.data != NULL) {
|
||||
DisposeHandle(cr.data);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
void *buf;
|
||||
struct CharmapData data;
|
||||
const char *name;
|
||||
int i;
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
buf = malloc(kConvertBufferSize);
|
||||
if (buf == NULL) {
|
||||
Fatalf("malloc failed");
|
||||
}
|
||||
gBuffer[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0;; i++) {
|
||||
name = CharmapName(i);
|
||||
if (name == NULL) {
|
||||
break;
|
||||
}
|
||||
data = CharmapData(i);
|
||||
if (data.ptr != NULL) {
|
||||
TestConverter(name, data);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
free(gBuffer[i]);
|
||||
}
|
||||
|
||||
return TestsDone();
|
||||
}
|
28
convert/data.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
#ifndef CONVERT_DATA_H
|
||||
#define CONVERT_DATA_H
|
||||
// data.h - charmap data, not used for classic Mac OS builds
|
||||
#include "lib/defs.h"
|
||||
|
||||
// Get the ID of the given character map. Return NULL if no such character map
|
||||
// exists.
|
||||
const char *CharmapID(int cmap);
|
||||
|
||||
// Get the human-readable name fo the given character map. Return NULL if no
|
||||
// such character map exists.
|
||||
const char *CharmapName(int cmap);
|
||||
|
||||
// Conversion table data.
|
||||
struct CharmapData {
|
||||
const UInt8 *ptr;
|
||||
Size size;
|
||||
};
|
||||
|
||||
// Get the conversion table data for the given charmap. Returns an empty buffer
|
||||
// with a NULL pointer if the character map does not exist or if no conversion
|
||||
// table exists for that character map.
|
||||
struct CharmapData CharmapData(int cmap);
|
||||
|
||||
#endif
|
11
convert/resources.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
// Copyright 2022 Dietrich Epp.
|
||||
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
|
||||
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
|
||||
#ifndef CONVERT_RESOURCES_H
|
||||
#define CONVERT_RESOURCES_H
|
||||
|
||||
#define rSTRS_Charmaps 128
|
||||
#define rSTRS_Scripts 129
|
||||
#define rSTRS_Regions 130
|
||||
|
||||
#endif
|
|
@ -1,26 +0,0 @@
|
|||
#include "convert.h"
|
||||
|
||||
int convert_line_endings(short srcRef, short destRef, void *buf,
|
||||
unsigned char srcEnding, unsigned char destEnding) {
|
||||
unsigned char *ptr, *end;
|
||||
long count;
|
||||
int r, r2;
|
||||
|
||||
do {
|
||||
count = kBufferBaseSize;
|
||||
r = convert_read(srcRef, &count, buf);
|
||||
if (r == kConvertError) {
|
||||
return 1;
|
||||
}
|
||||
for (ptr = buf, end = ptr + count; ptr != end; ptr++) {
|
||||
if (*ptr == srcEnding) {
|
||||
*ptr = destEnding;
|
||||
}
|
||||
}
|
||||
r2 = convert_write(destRef, count, buf);
|
||||
if (r2 != kConvertOK) {
|
||||
return 1;
|
||||
}
|
||||
} while (r != kConvertEOF);
|
||||
return 0;
|
||||
}
|
235
convert_test.c
|
@ -1,235 +0,0 @@
|
|||
#include "convert.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdnoreturn.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "mac_from_unix_data.h"
|
||||
|
||||
static noreturn void malloc_fail(size_t sz) {
|
||||
fprintf(stderr, "Error: malloc(%zu) failed\n", sz);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void *xmalloc(size_t sz) {
|
||||
void *ptr = malloc(sz);
|
||||
if (ptr == NULL) {
|
||||
malloc_fail(sz);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
struct buf {
|
||||
char *data;
|
||||
size_t size;
|
||||
size_t alloc;
|
||||
};
|
||||
|
||||
static void buf_put(struct buf *buf, const void *data, size_t length) {
|
||||
if (length > buf->alloc - buf->size) {
|
||||
size_t nalloc = buf->alloc;
|
||||
if (nalloc == 0) {
|
||||
nalloc = 1024;
|
||||
}
|
||||
while (length > nalloc - buf->size) {
|
||||
nalloc <<= 1;
|
||||
}
|
||||
void *narr = realloc(buf->data, nalloc);
|
||||
if (narr == NULL) {
|
||||
malloc_fail(nalloc);
|
||||
}
|
||||
buf->data = narr;
|
||||
buf->alloc = nalloc;
|
||||
}
|
||||
memcpy(buf->data + buf->size, data, length);
|
||||
buf->size += length;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
|
||||
static unsigned short *gMacFromUnixData;
|
||||
|
||||
static noreturn void bad_unpackbits(void) {
|
||||
fputs("Error: invalid unpackbits data\n", stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void unpackbits(void *dest, size_t destsz, const void *src,
|
||||
size_t srcsz) {
|
||||
const unsigned char *ip = src, *ie = ip + srcsz;
|
||||
unsigned char *op = dest, *oe = op + destsz;
|
||||
while (op < oe) {
|
||||
if (ip >= ie) {
|
||||
bad_unpackbits();
|
||||
}
|
||||
int c = (signed char)*ip++;
|
||||
if (c >= 0) {
|
||||
int len = c + 1;
|
||||
if (len > ie - ip || len > oe - op) {
|
||||
bad_unpackbits();
|
||||
}
|
||||
memcpy(op, ip, len);
|
||||
op += len;
|
||||
ip += len;
|
||||
} else {
|
||||
int len = -c + 1;
|
||||
if (ip >= ie || len > oe - op) {
|
||||
bad_unpackbits();
|
||||
}
|
||||
memset(op, *ip, len);
|
||||
op += len;
|
||||
ip += 1;
|
||||
}
|
||||
}
|
||||
if (ip != ie) {
|
||||
bad_unpackbits();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned short *mac_from_unix_data(void) {
|
||||
unsigned short *ptr = gMacFromUnixData;
|
||||
if (ptr == NULL) {
|
||||
unsigned char *bytes = xmalloc(FROM_UNIX_DATALEN);
|
||||
unpackbits(bytes, FROM_UNIX_DATALEN, kFromUnixData,
|
||||
sizeof(kFromUnixData));
|
||||
ptr = xmalloc(FROM_UNIX_DATALEN);
|
||||
for (int i = 0; i < FROM_UNIX_DATALEN / 2; i++) {
|
||||
ptr[i] = (bytes[i * 2] << 8) | bytes[i * 2 + 1];
|
||||
}
|
||||
free(bytes);
|
||||
gMacFromUnixData = ptr;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
|
||||
enum {
|
||||
kSrcRef = 1234,
|
||||
kDestRef = 5678,
|
||||
};
|
||||
|
||||
static const char *gReadBuf;
|
||||
static size_t gReadSize;
|
||||
static size_t gReadPos;
|
||||
static size_t gReadChunk;
|
||||
static struct buf gWriteBuf;
|
||||
|
||||
int convert_read(short ref, long *count, void *data) {
|
||||
if (ref != kSrcRef) {
|
||||
fputs("Wrong ref\n", stderr);
|
||||
exit(1);
|
||||
}
|
||||
size_t amt = *count;
|
||||
size_t rem = gReadSize - gReadPos;
|
||||
if (amt > rem) {
|
||||
amt = rem;
|
||||
}
|
||||
if (gReadChunk != 0 && amt > gReadChunk) {
|
||||
amt = gReadChunk;
|
||||
}
|
||||
*count = amt;
|
||||
memcpy(data, gReadBuf + gReadPos, amt);
|
||||
gReadPos += amt;
|
||||
if (gReadPos == gReadSize) {
|
||||
return kConvertEOF;
|
||||
}
|
||||
return kConvertOK;
|
||||
}
|
||||
|
||||
int convert_write(short ref, long count, const void *data) {
|
||||
if (ref != kDestRef) {
|
||||
fputs("Wrong ref\n", stderr);
|
||||
exit(1);
|
||||
}
|
||||
buf_put(&gWriteBuf, data, count);
|
||||
return kConvertOK;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
|
||||
enum {
|
||||
kInputSize = 64 * 1024 - 2,
|
||||
};
|
||||
|
||||
static char *gen_input(void) {
|
||||
char *ptr = xmalloc(kInputSize);
|
||||
unsigned state = 0x12345678;
|
||||
for (int i = 0; i < kInputSize; i++) {
|
||||
// Relatively common LCG.
|
||||
state = (state * 1103515245 + 12345) & 0x7fffffff;
|
||||
ptr[i] = state >> 23;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
|
||||
int r;
|
||||
|
||||
void *sbuf = xmalloc(kBufferTotalSize);
|
||||
void *dbuf = xmalloc(kBufferTotalSize);
|
||||
|
||||
// Generate input.
|
||||
char *input = gen_input();
|
||||
|
||||
// Convert Macintosh -> UTF-8.
|
||||
gReadBuf = input;
|
||||
gReadSize = kInputSize;
|
||||
gReadPos = 0;
|
||||
r = mac_to_unix(kSrcRef, kDestRef, sbuf, dbuf);
|
||||
if (r != 0) {
|
||||
fputs("mac_to_unix failed\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Check that we have no CR.
|
||||
{
|
||||
const char *data = gWriteBuf.data;
|
||||
size_t size = gWriteBuf.size;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (data[i] == 0x0d) {
|
||||
fprintf(stderr, "Error: CR at offset %zu\n", i);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert back.
|
||||
gReadBuf = gWriteBuf.data;
|
||||
gReadSize = gWriteBuf.size;
|
||||
gReadPos = 0;
|
||||
gWriteBuf = (struct buf){NULL, 0, 0};
|
||||
r = mac_from_unix(kSrcRef, kDestRef, sbuf, dbuf);
|
||||
if (r != 0) {
|
||||
fputs("mac_from_unix failed\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Check that this is equal to original, except with LF changed to CR.
|
||||
{
|
||||
const char *data = gWriteBuf.data;
|
||||
size_t size = gWriteBuf.size;
|
||||
if (kInputSize != size) {
|
||||
fprintf(stderr, "Error: size = %zu, expect %d\n", size, kInputSize);
|
||||
return 1;
|
||||
}
|
||||
for (size_t i = 0; i < kInputSize; i++) {
|
||||
unsigned char x = input[i];
|
||||
if (x == 0x0a) {
|
||||
x = 0x0d;
|
||||
}
|
||||
unsigned char y = data[i];
|
||||
if (x != y) {
|
||||
fprintf(stderr, "Error: data[%zu] = 0x%02x, expect 0x%02x\n", i,
|
||||
y, x);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
19
copy.c
|
@ -1,19 +0,0 @@
|
|||
#include "convert.h"
|
||||
|
||||
int copy_data(short srcRef, short destRef, void *buf) {
|
||||
long count;
|
||||
int r, r2;
|
||||
|
||||
do {
|
||||
count = kBufferBaseSize;
|
||||
r = convert_read(srcRef, &count, buf);
|
||||
if (r == kConvertError) {
|
||||
return 1;
|
||||
}
|
||||
r2 = convert_write(destRef, count, buf);
|
||||
if (r2 != kConvertOK) {
|
||||
return 1;
|
||||
}
|
||||
} while (r != kConvertEOF);
|
||||
return 0;
|
||||
}
|
114
copymac.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
"""copymac.py -- Copy source files to/from Basilisk, with conversions."""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List
|
||||
|
||||
import xattr # type: ignore
|
||||
|
||||
SUBDIRS = ". macos sync lib".split()
|
||||
TEXT_SUFFIXES = ".c .h .r".split()
|
||||
TEXT_CREATOR = b"R*ch" # BBEdit
|
||||
|
||||
|
||||
class Converter(metaclass=ABCMeta):
|
||||
name: str
|
||||
|
||||
@abstractmethod
|
||||
def convert(self, src: pathlib.Path, dest: pathlib.Path) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class TextConverter(Converter):
|
||||
enc_in: str
|
||||
enc_out: str
|
||||
newline: str
|
||||
|
||||
name = "Text"
|
||||
|
||||
def __init__(self, enc_in: str, enc_out: str, newline: str) -> None:
|
||||
self.enc_in = enc_in
|
||||
self.enc_out = enc_out
|
||||
self.newline = newline
|
||||
|
||||
def convert(self, src: pathlib.Path, dest: pathlib.Path) -> None:
|
||||
text = src.read_text(encoding=self.enc_in)
|
||||
lines = text.splitlines()
|
||||
lines.append("")
|
||||
dest.write_text(self.newline.join(lines), encoding=self.enc_out)
|
||||
|
||||
|
||||
class DataConverter(Converter):
|
||||
name = "Data"
|
||||
|
||||
def convert(self, src: pathlib.Path, dest: pathlib.Path) -> None:
|
||||
data = src.read_bytes()
|
||||
dest.write_bytes(data)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileType:
|
||||
converter: Converter
|
||||
filetype: bytes
|
||||
creator: bytes
|
||||
|
||||
def finder_info(self) -> bytes:
|
||||
assert len(self.filetype) == 4
|
||||
assert len(self.creator) == 4
|
||||
return self.filetype + self.creator + b"\x01" + b"\x00" * 23
|
||||
|
||||
|
||||
def main(argv: List[str]) -> None:
|
||||
if not argv:
|
||||
print("Usage: python copymac.py (push|pull)", file=sys.stderr)
|
||||
print(" pull: pull FROM guest (macintosh), into guest (unix)", file=sys.stderr)
|
||||
print(" push: push TO guest (macintosh), from host (unix)", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
unix_dir = pathlib.Path(__file__).resolve().parent
|
||||
mac_dir = pathlib.Path(pathlib.Path.home(), "SyncFiles")
|
||||
cmd = argv[0]
|
||||
|
||||
suffixes: Dict[str, FileType] = {}
|
||||
text: Converter
|
||||
data = DataConverter()
|
||||
if cmd == "pull":
|
||||
text = TextConverter("macintosh", "utf-8", "\n")
|
||||
src_dir, dest_dir = mac_dir, unix_dir
|
||||
elif cmd == "push":
|
||||
text = TextConverter("utf-8", "macintosh", "\r")
|
||||
src_dir, dest_dir = unix_dir, mac_dir
|
||||
else:
|
||||
print("Error: unknown command", file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
|
||||
text_type = FileType(text, b"TEXT", TEXT_CREATOR)
|
||||
for suffix in TEXT_SUFFIXES:
|
||||
suffixes[suffix] = text_type
|
||||
suffixes[".mcp"] = FileType(data, b"MMPr", b"CWIE")
|
||||
|
||||
for subdir in SUBDIRS:
|
||||
src_subdir = src_dir / subdir
|
||||
dest_subdir = dest_dir / subdir
|
||||
dest_subdir.mkdir(exist_ok=True)
|
||||
fset = set()
|
||||
for src in src_subdir.iterdir():
|
||||
file_type = suffixes.get(src.suffix)
|
||||
if file_type is not None:
|
||||
fset.add(src.name)
|
||||
print(file_type.converter.name, pathlib.Path(subdir, src.name))
|
||||
dest = dest_subdir / src.name
|
||||
file_type.converter.convert(src, dest)
|
||||
if cmd == "push":
|
||||
info = file_type.finder_info()
|
||||
attr = xattr.xattr(dest)
|
||||
attr["com.apple.FinderInfo"] = info
|
||||
for dest in dest_subdir.iterdir():
|
||||
if dest.suffix in suffixes and not dest.name in fset:
|
||||
print("Delete", pathlib.Path(subdir, dest.name))
|
||||
dest.unlink()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
106
defs.h
|
@ -1,106 +0,0 @@
|
|||
#include <MacTypes.h>
|
||||
|
||||
typedef unsigned char bool;
|
||||
|
||||
#define ARRAY_COUNT(x) (sizeof(x) / sizeof(*x))
|
||||
|
||||
// =============================================================================
|
||||
// util.c
|
||||
// =============================================================================
|
||||
|
||||
// Message log level.
|
||||
typedef enum {
|
||||
kLogWarn,
|
||||
kLogInfo,
|
||||
kLogVerbose,
|
||||
} log_level;
|
||||
|
||||
// Global log verbosity.
|
||||
extern log_level gLogLevel;
|
||||
|
||||
// -f / -force flag: if true, all destination files are replaced, regardless of
|
||||
// timestamp.
|
||||
extern bool gFlagForce;
|
||||
|
||||
// -n / -dry-run flag: if true, no actions are taken, but the actions are
|
||||
// printed out.
|
||||
extern bool gFlagDryRun;
|
||||
|
||||
// -d / -delete flag: if true, destination files are deleted if there is no
|
||||
// corresponding source file.
|
||||
extern bool gFlagDelete;
|
||||
|
||||
// Print an error message.
|
||||
void print_err(const char *msg, ...);
|
||||
|
||||
// Print an error message with a Macintosh toolbox error code.
|
||||
void print_errcode(OSErr err, const char *msg, ...);
|
||||
|
||||
// Print an out-of-memory error.
|
||||
void print_memerr(unsigned long size);
|
||||
|
||||
// Log the error result of a function call.
|
||||
void log_call(OSErr err, const char *function);
|
||||
|
||||
// Convert a C string to Pascal string. Returns nonzero on failure.
|
||||
int c2pstr(unsigned char *ostr, const char *istr);
|
||||
|
||||
// Convert a Pascall string (maximum 31 characters) to a C string.
|
||||
void p2cstr(char *ostr, const unsigned char *istr);
|
||||
|
||||
// Global operation mode
|
||||
typedef enum {
|
||||
kModeUnknown,
|
||||
kModePush,
|
||||
kModePull,
|
||||
} operation_mode;
|
||||
|
||||
// =============================================================================
|
||||
// file.c
|
||||
// =============================================================================
|
||||
|
||||
enum {
|
||||
kSrcDir,
|
||||
kDestDir,
|
||||
};
|
||||
|
||||
// Metadata for a file in the source or destination.
|
||||
struct file_meta {
|
||||
Boolean exists;
|
||||
long modTime;
|
||||
};
|
||||
|
||||
// An action to take for a particular file.
|
||||
typedef enum {
|
||||
kActionNone, // Leave file alone.
|
||||
kActionNew, // Copy src to dest, dest does not exist.
|
||||
kActionReplace, // Replace existing file in dest.
|
||||
kActionDelete, // Delete dest file.
|
||||
} file_action;
|
||||
|
||||
// A general type of file. Affects the type code and conversions applied.
|
||||
typedef enum {
|
||||
kTypeUnknown,
|
||||
kTypeText, // Text file: convert CR/LF and encoding.
|
||||
kTypeTextUTF8, // Text file: convert CR/LF only.
|
||||
kTypeResource, // Resource file: copy resource fork to data fork.
|
||||
} file_type;
|
||||
|
||||
// Information about a file present in the source or destination directory (or
|
||||
// both).
|
||||
struct file_info {
|
||||
// Filename, Pascal string.
|
||||
Str31 name;
|
||||
// Metadata indexed by kSrcDir or kDestDir.
|
||||
struct file_meta meta[2];
|
||||
// The action to apply to this file.
|
||||
file_action action;
|
||||
// The type of file. Used to select type codes and converters.
|
||||
file_type type;
|
||||
};
|
||||
|
||||
// Synchronize a file according to the action in the action field. The temporary
|
||||
// directory must be a valid directory on the destination volume.
|
||||
int sync_file(struct file_info *file, operation_mode mode, short srcVol,
|
||||
long srcDir, short destVol, long destDir, short tempVol,
|
||||
long tempDir);
|
5
docs/.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
_site
|
||||
.sass-cache
|
||||
.jekyll-cache
|
||||
.jekyll-metadata
|
||||
vendor
|
25
docs/404.html
Normal file
|
@ -0,0 +1,25 @@
|
|||
---
|
||||
permalink: /404.html
|
||||
layout: default
|
||||
---
|
||||
|
||||
<style type="text/css" media="screen">
|
||||
.container {
|
||||
margin: 10px auto;
|
||||
max-width: 600px;
|
||||
text-align: center;
|
||||
}
|
||||
h1 {
|
||||
margin: 30px 0;
|
||||
font-size: 4em;
|
||||
line-height: 1;
|
||||
letter-spacing: -1px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="container">
|
||||
<h1>404</h1>
|
||||
|
||||
<p><strong>Page not found :(</strong></p>
|
||||
<p>The requested page could not be found.</p>
|
||||
</div>
|
36
docs/Gemfile
Normal file
|
@ -0,0 +1,36 @@
|
|||
source "https://rubygems.org"
|
||||
# Hello! This is where you manage which Jekyll version is used to run.
|
||||
# When you want to use a different version, change it below, save the
|
||||
# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
|
||||
#
|
||||
# bundle exec jekyll serve
|
||||
#
|
||||
# This will help ensure the proper Jekyll version is running.
|
||||
# Happy Jekylling!
|
||||
gem "jekyll", "~> 4.2.2"
|
||||
# This is the default theme for new Jekyll sites. You may change this to anything you like.
|
||||
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
|
||||
# uncomment the line below. To upgrade, run `bundle update github-pages`.
|
||||
# gem "github-pages", group: :jekyll_plugins
|
||||
# If you have any plugins, put them here!
|
||||
group :jekyll_plugins do
|
||||
gem "jekyll-feed", "~> 0.12"
|
||||
end
|
||||
|
||||
# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem
|
||||
# and associated library.
|
||||
platforms :mingw, :x64_mingw, :mswin, :jruby do
|
||||
gem "tzinfo", "~> 1.2"
|
||||
gem "tzinfo-data"
|
||||
end
|
||||
|
||||
# Performance-booster for watching directories on Windows
|
||||
gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin]
|
||||
|
||||
# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem
|
||||
# do not have a Java counterpart.
|
||||
gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby]
|
||||
|
||||
gem "webrick", "~> 1.7"
|
||||
|
||||
gem "just-the-docs", "~> 0.3.3"
|
89
docs/Gemfile.lock
Normal file
|
@ -0,0 +1,89 @@
|
|||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
addressable (2.8.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
colorator (1.1.0)
|
||||
concurrent-ruby (1.1.10)
|
||||
em-websocket (0.5.3)
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0)
|
||||
eventmachine (1.2.7)
|
||||
ffi (1.15.5)
|
||||
forwardable-extended (2.6.0)
|
||||
http_parser.rb (0.8.0)
|
||||
i18n (1.10.0)
|
||||
concurrent-ruby (~> 1.0)
|
||||
jekyll (4.2.2)
|
||||
addressable (~> 2.4)
|
||||
colorator (~> 1.0)
|
||||
em-websocket (~> 0.5)
|
||||
i18n (~> 1.0)
|
||||
jekyll-sass-converter (~> 2.0)
|
||||
jekyll-watch (~> 2.0)
|
||||
kramdown (~> 2.3)
|
||||
kramdown-parser-gfm (~> 1.0)
|
||||
liquid (~> 4.0)
|
||||
mercenary (~> 0.4.0)
|
||||
pathutil (~> 0.9)
|
||||
rouge (~> 3.0)
|
||||
safe_yaml (~> 1.0)
|
||||
terminal-table (~> 2.0)
|
||||
jekyll-feed (0.16.0)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-sass-converter (2.2.0)
|
||||
sassc (> 2.0.1, < 3.0)
|
||||
jekyll-seo-tag (2.8.0)
|
||||
jekyll (>= 3.8, < 5.0)
|
||||
jekyll-watch (2.2.1)
|
||||
listen (~> 3.0)
|
||||
just-the-docs (0.3.3)
|
||||
jekyll (>= 3.8.5)
|
||||
jekyll-seo-tag (~> 2.0)
|
||||
rake (>= 12.3.1, < 13.1.0)
|
||||
kramdown (2.3.2)
|
||||
rexml
|
||||
kramdown-parser-gfm (1.1.0)
|
||||
kramdown (~> 2.0)
|
||||
liquid (4.0.3)
|
||||
listen (3.7.1)
|
||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||
rb-inotify (~> 0.9, >= 0.9.10)
|
||||
mercenary (0.4.0)
|
||||
minima (2.5.1)
|
||||
jekyll (>= 3.5, < 5.0)
|
||||
jekyll-feed (~> 0.9)
|
||||
jekyll-seo-tag (~> 2.1)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (4.0.6)
|
||||
rake (13.0.6)
|
||||
rb-fsevent (0.11.1)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rexml (3.2.5)
|
||||
rouge (3.28.0)
|
||||
safe_yaml (1.0.5)
|
||||
sassc (2.4.0)
|
||||
ffi (~> 1.9)
|
||||
terminal-table (2.0.0)
|
||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||
unicode-display_width (1.8.0)
|
||||
webrick (1.7.0)
|
||||
|
||||
PLATFORMS
|
||||
x86_64-linux
|
||||
|
||||
DEPENDENCIES
|
||||
http_parser.rb (~> 0.6.0)
|
||||
jekyll (~> 4.2.2)
|
||||
jekyll-feed (~> 0.12)
|
||||
just-the-docs (~> 0.3.3)
|
||||
minima (~> 2.5)
|
||||
tzinfo (~> 1.2)
|
||||
tzinfo-data
|
||||
wdm (~> 0.1.1)
|
||||
webrick (~> 1.7)
|
||||
|
||||
BUNDLED WITH
|
||||
2.3.11
|
17
docs/_config.yml
Normal file
|
@ -0,0 +1,17 @@
|
|||
title: SyncFiles Documentation
|
||||
email: depp@zdome.net
|
||||
description: >-
|
||||
Write an awesome description for your new site here. You can edit this
|
||||
line in _config.yml. It will appear in your document head meta (for
|
||||
Google search results) and in your feed.xml site description.
|
||||
url: https://depp.github.io
|
||||
baseurl: /syncfiles/
|
||||
twitter_username: DietrichEpp
|
||||
github_username: depp
|
||||
|
||||
theme: just-the-docs
|
||||
plugins:
|
||||
- jekyll-feed
|
||||
|
||||
exclude:
|
||||
- push.sh
|
9
docs/index.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
layout: page
|
||||
title: Home
|
||||
nav_order: 1
|
||||
has_children: true
|
||||
permalink: /
|
||||
---
|
||||
|
||||
# Transfer files to and from old Macintosh systems
|
49
docs/push.sh
Executable file
|
@ -0,0 +1,49 @@
|
|||
#!/bin/sh
|
||||
# Build the site and push it to GitHub pages.
|
||||
set -e
|
||||
|
||||
root=$(git rev-parse --show-toplevel)
|
||||
cd "$root"
|
||||
|
||||
if ! command -v frum >/dev/null ; then
|
||||
echo >&2 "Error: frum is not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! git diff-index --quiet --cached HEAD -- docs ; then
|
||||
echo >&2 "Error: uncommitted changes"
|
||||
exit 1
|
||||
fi
|
||||
if ! git diff-files --quiet docs ; then
|
||||
echo >&2 "Error: uncommitted changes"
|
||||
exit 1
|
||||
fi
|
||||
branch=$(git symbolic-ref HEAD)
|
||||
if test "$branch" != refs/heads/main ; then
|
||||
echo >&2 "Error: branch is not main"
|
||||
exit 1
|
||||
fi
|
||||
commit="$(git rev-parse HEAD)"
|
||||
|
||||
echo >&2 "Checking out gh-pages..."
|
||||
if test -d gh-pages ; then
|
||||
branch=$(git -C gh-pages symbolic-ref HEAD)
|
||||
if test "$branch" != refs/heads/gh-pages ; then
|
||||
echo >&2 "Error: gh-pages dir does not have gh-pages branch"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
git worktree add gh-pages
|
||||
fi
|
||||
|
||||
echo >&2 "Building site..."
|
||||
(
|
||||
cd docs
|
||||
eval "$(sh -c 'frum init')"
|
||||
bundle exec jekyll build --destination ../gh-pages
|
||||
)
|
||||
|
||||
echo >&2 "Committing..."
|
||||
cd gh-pages
|
||||
git add .
|
||||
git commit -m "Generated from commit ${commit}"
|
216
docs/tech/apis.md
Normal file
|
@ -0,0 +1,216 @@
|
|||
---
|
||||
layout: page
|
||||
title: File APIs
|
||||
nav_order: 4
|
||||
permalink: /tech/apis
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# File APIs
|
||||
|
||||
Mac OS filesystem APIs evolved as the underlying filesystem semantics changed.
|
||||
|
||||
## Pascal Strings
|
||||
|
||||
Prior to Mac OS X, Macintosh APIs used _Pascal strings_ to represent strings. Pascal strings are passed by pointer. The first byte pointed to stores the string length, and the string contents follows. Pascal strings are not null-terminated and not compatible with C-style strings. Note that since the string length is stored in a single byte, these strings cannot be longer than 255 bytes.
|
||||
|
||||
Compilers for Mac OS support Pascal strings by putting the `\p` sequence at the beginning of a string. For example,
|
||||
|
||||
```c
|
||||
const unsigned char kFilename[] = "\pMy File";
|
||||
```
|
||||
|
||||
This is equivalent to:
|
||||
|
||||
```c
|
||||
const unsigned char kFilename[8] = {
|
||||
7, 'M', 'y', ' ', 'F', 'i', 'l', 'e'
|
||||
};
|
||||
```
|
||||
|
||||
Pascal strings are encoded using one of the old Macintosh character encodings, such as Mac OS Roman. In some cases, the encoding is assumed to be the system’s encoding, whatever that is. In other cases, the encoding is explicitly specified using a `ScriptCode` (although this value is somewhat ambiguous). In other cases still, the actual encoding is ignored and the string is treated as if it were encoded using the Mac OS Roman encoding.
|
||||
|
||||
## Early Mac OS
|
||||
|
||||
The filesystem on the very first Macintosh, later called the Mac 128K, did not support folders or directories. Each file was identified by volume ID and name. For example, `OpenDF` opens the data fork of a file (presumably, `DF` stands for “data fork”—there is a corresponding `OpenRF` for the resource fork), and `Create` creates a new file.
|
||||
|
||||
```c
|
||||
OSErr OpenDF(
|
||||
const unsigned char * fileName,
|
||||
short vRefNum,
|
||||
short * refNum);
|
||||
|
||||
OSErr Create(
|
||||
const unsigned char * fileName,
|
||||
short vRefNum,
|
||||
OSType creator,
|
||||
OSType fileType);
|
||||
```
|
||||
|
||||
Filenames have a maximum length of 63 characters and are case insensitive. Systems from this era did not support multiple character encodings, so the encoding did not need to be specified. Note that the 63-character limit is an API limit, and common filesystems have a lower, 31-character limit.
|
||||
|
||||
You should not be using this API unless you are targeting _extremely_ old Macintosh systems, like the Mac 128K. This API became obsolete with the introduction of 128K ROMs with the Mac Plus in 1986.
|
||||
|
||||
This API is not part of Carbon and cannot be used on Mac OS X.
|
||||
|
||||
### Swapping Floppy Disks
|
||||
|
||||
During this era, it was common to swap floppy disks while a program was running. You can run a program from one disk and save files on another disk. When a program tries to access a file that’s on a different disk, the operating system ejects the current disk and prompts the user to insert the correct disk. This happens automatically; programs do not need to include any code to make this possible.
|
||||
|
||||
### Compatibility with HFS
|
||||
|
||||
Old applications written to use this API continue to work after files after the introduction of the hierarchical filesystem. The way this happens is through something called _working directories_.
|
||||
|
||||
A working directory is the combination of a volume ID and a directory ID, and it can be used in place of a volume ID in the filesystem API. The intent is that old code which only uses volume IDs can be used to save files in different locations on the filesystem. For example, if an old application creates a “save file” dialog box, instead of a volume ID, the dialog box returns a working directory pointing to the directory where the user chose to save the file.
|
||||
|
||||
## Hierarchical API
|
||||
|
||||
Alongside Apple’s first hard disk for the Macintosh, the operating system introduced a new filesystem (HFS) which supported directories, and this required a new API. Instead of `OpenDF`, programs now call `HOpenDF` to the data fork of a file, and call `HCreate` instead of `Create`. Presumably, “H” stands for “hierarchical”.
|
||||
|
||||
```c
|
||||
OSErr HOpenDF(
|
||||
short vRefNum,
|
||||
long dirID,
|
||||
const unsigned char * fileName,
|
||||
SInt8 permission,
|
||||
short * refNum);
|
||||
|
||||
OSErr HCreate(
|
||||
short vRefNum,
|
||||
long dirID,
|
||||
const unsigned char * fileName,
|
||||
OSType creator,
|
||||
OSType fileType);
|
||||
```
|
||||
|
||||
In this API, files are identified by volume ID, the directory ID within that volume, and the filename within that directory. The encoding is not specified, and presumably, files will be created using the system’s default character encoding.
|
||||
|
||||
## FSSpec API
|
||||
|
||||
Mac System 7 introduces the FSSPec API. It does not change semantics, but provides a simple data structure which is used to store the volume ID, directory ID, and filename. This structure is called `FSSpec`.
|
||||
|
||||
```c
|
||||
struct FSSpec {
|
||||
short vRefNum;
|
||||
long parID;
|
||||
unsigned char name[64];
|
||||
};
|
||||
```
|
||||
|
||||
Functions which use an `FSSpec` are named with the `FSp` prefix. These functions are preferred for over the previous versions for their simplicity. For example, `FSpOpenDF`, which opens a file’s data fork, and `FSpCreate`, which creates a new file:
|
||||
|
||||
```c
|
||||
OSErr FSpOpenDF(
|
||||
const FSSpec * spec,
|
||||
SInt8 permission,
|
||||
short * refNum);
|
||||
|
||||
OSErr FSpCreate(
|
||||
const FSSpec * spec,
|
||||
OSType creator,
|
||||
OSType fileType,
|
||||
ScriptCode scriptTag);
|
||||
```
|
||||
|
||||
Note that the character encoding is specified when creating a file, using the `scriptTag` parameter (although technically, this does not completely specify an encoding). The character encoding is not specified when opening a file, instead, the encoding is ignored and treated as if it were the Mac OS Roman encoding.
|
||||
|
||||
This API _preserves_ the encoding used for filenames, but you only need the correct bytestring to refer to existing files.
|
||||
|
||||
Starting in Mac OS X 10.4, this API and other APIs before it are marked as deprecated.
|
||||
|
||||
## FSRef API
|
||||
|
||||
Mac OS 9 introduces a new opaque alternative to `FSSpec` called `FSRef`. You can use an `FSRef` to refer to an existing file, but there is no way to get any information from an `FSRef` without invoking the filesystem API. The `FSRef` is not a drop-in replacement for `FSSpec`, beceause a `FSRef` must refer to an existing file, and therefore, cannot be used to create a new file.
|
||||
|
||||
```c
|
||||
struct FSRef {
|
||||
UInt8 hidden[80];
|
||||
};
|
||||
```
|
||||
|
||||
The `FSRef` structure is private to an application and cannot be assumed to be valid if the file is moved/renamed, if the volume is unmounted and remounted, or if the structure is passed to another process. We can speculate that the `FSRef` structure contains information about a file like its filesystem inode, which can be used to look up the file name.
|
||||
|
||||
You can convert an `FSSpec` to an `FSRef`, although this will fail if the file does not exist:
|
||||
|
||||
```c
|
||||
OSErr FSpMakeFSRef(
|
||||
const FSSpec * source,
|
||||
FSRef * newRef);
|
||||
```
|
||||
|
||||
The `FSRef` API has some other differences. For example, when you open a file, you specify the name of the fork you want to open. Previous APIs used different functions for opening the data fork and the resource fork. File names are now specified as Unicode strings encoded with UTF-16.
|
||||
|
||||
```c
|
||||
OSErr FSOpenFork(
|
||||
const FSRef * ref,
|
||||
UniCharCount forkNameLength,
|
||||
const UniChar * forkName,
|
||||
SInt8 permissions,
|
||||
SInt16 * forkRefNum);
|
||||
|
||||
OSErr FSCreateFileUnicode(
|
||||
const FSRef * parentRef,
|
||||
UniCharCount nameLength,
|
||||
const UniChar * name,
|
||||
FSCatalogInfoBitmap whichInfo,
|
||||
const FSCatalogInfo * catalogInfo,
|
||||
FSRef * newRef,
|
||||
FSSpec * newSpec);
|
||||
```
|
||||
|
||||
This API is a part of Carbon. Carbon was never ported to 64-bit architectures, and was deprecated in Mac OS X 10.8. The last version that supports this API is macOS 10.14, which is the last version of macOS that supports 32-bit programs.
|
||||
|
||||
## Unix API
|
||||
|
||||
Mac OS X brought us the Unix APIs:
|
||||
|
||||
```c
|
||||
int open(
|
||||
const char * pathname,
|
||||
int flags,
|
||||
...);
|
||||
|
||||
int openat(
|
||||
int fd,
|
||||
const char * pathname,
|
||||
int flags,
|
||||
...);
|
||||
```
|
||||
|
||||
The `openat` call is available from Mac OS 10.10 onwards.
|
||||
|
||||
Strings are now null-terminated C strings, which are interpreted as UTF-8. Mac OS filesystems do not support arbitrary bytestrings as filenames. If you try to create a file with `open` with a filename that is not supported by the filesystem, the system call will fail and set `errno` to `EILSEQ` (92). This is not documented in the man page for `open`.
|
||||
|
||||
### Forks on Unix
|
||||
|
||||
The resource fork can be accessed as if it were a separate file. To access the resource fork, append `/rsrc` or `/..namedfork/rsrc` to the file path. This allows you to view resource fork data through ordinary Unix APIs or with Unix command-line utilities like `hexdump` and `ls`.
|
||||
|
||||
Linux also allows you to access the resource fork by appending `/rsrc` to the file path, for filesystems that support resource forks.
|
||||
|
||||
### Extended Attributes
|
||||
|
||||
Starting in version 10.4, Mac OS X provides an interface for accessing extended attributes on a file.
|
||||
|
||||
```c
|
||||
ssize_t getxattr(
|
||||
const char * path,
|
||||
const char * name,
|
||||
void * value,
|
||||
size_t size,
|
||||
u_int32_t position,
|
||||
int options);
|
||||
```
|
||||
|
||||
The resource fork is presented as an extended attribute with the name `com.apple.ResourceFork`. Since resource forks can be as much as 16 MiB in size, the `getxattr` function provides a way to read portions of the resource fork without having to read the entire fork.
|
||||
|
||||
Finder info is contained in an attribute named `com.apple.FinderInfo`.
|
||||
|
||||
## Aliases and Bookmarks
|
||||
|
||||
Mac OS also provides facilities to store a reference to a file. These references are designed to be durable, and work even if the file is moved and renamed. These references work by containing various pieces of metadata about the file. If the file is moved, the metadata can be used to find it again.
|
||||
|
||||
On older Mac OS systems, these records are called _aliases_ and you can create them using the alias manager APIs, which are available starting in System 7.
|
||||
|
||||
In Cocoa, these records are called _bookmarks_.
|
||||
|
||||
The main use of aliases and bookmarks is to store references to files in the “recently used files” menu option in applications.
|
125
docs/tech/archive-formats.md
Normal file
|
@ -0,0 +1,125 @@
|
|||
---
|
||||
layout: page
|
||||
title: Archive Formats
|
||||
nav_order: 1
|
||||
permalink: /tech/archive-formats
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# Archive Formats
|
||||
|
||||
Files on old Macintosh systems often have metadata and extra data streams that they need in order to work correctly. If you copy a file from an old Macintosh computer to a Windows or Linux computer, and back, the extra data will be lost and the file may not work correctly.
|
||||
|
||||
The extra data that Macintosh archive formats must preserve are the file type code and creator code (see [Finder Info]({%link tech/finder-info.md %})) and the resource fork (see [Resource Forks]({%link tech/resource-forks.md %})).
|
||||
|
||||
Files don’t always need special treatment. Plain text files and common image formats like PNG and JPEG are pure data. With software like PC Exchange or Software Exchange, the system can automatically set the file type code and creator code from the file’s extension. This is all you need for plain text, PNG, JPEG, and various other formtas.
|
||||
|
||||
However, applications and most types of files will not work correctly if the extra is lost. Archive formats provide ways to preserve this data.
|
||||
|
||||
## Which Format Should I Use?
|
||||
|
||||
This is a matter of opinion! However, some choices are better than others.
|
||||
|
||||
1. **Raw disk image** (extension `.img`, or sometimes `.dsk`) is the first choice you should consider. Raw disk images are easy to create, preserve all data, and can be used almost anywhere. Note that not all `.img` files are raw disk images.
|
||||
|
||||
1. **UDIF disk images** (extension `.dmg`) are preferred for archiving and distributing software for Mac OS X.
|
||||
|
||||
1. **NDIF disk images with MacBinary** (extension `.img.bin`) are useful for uploading and sharing software and files for Mac OS 9 and earlier.
|
||||
|
||||
## What Should I Avoid?
|
||||
|
||||
- Don’t transfer NDIF disk images without preserving the image’s resource fork.
|
||||
|
||||
- Don’t double-encode files. Stuffit archives do not need any further encoding. Rather than using `.sit.bin` or `.sit.hqx`, just use `.sit`.
|
||||
|
||||
## Disk Images
|
||||
|
||||
A disk image contains a complete copy of an HFS volume or other filesystem, stored as a file.
|
||||
|
||||
### Raw Disk Image
|
||||
|
||||
A raw disk image is a complete HFS (or other) filesystem, stored as a file. Raw disk are easy to create and can be used by emulators, vintage computers, and tools on modern computers. Raw disk images can be safely transferred to other systems without losing data. Note that files which have been deleted can sometimes be restored from raw disk images! DiskCopy lets you avoid this by choosing the “zero blocks” option, which is enabled by default.
|
||||
|
||||
Raw disk images often use the file extension `.img` or `.dsk`. You may find it useful to name your raw images with the `.dsk` extension to distinguish raw disk images from NDIF disk images.
|
||||
|
||||
You can create an empty raw disk image using DiskCopy, or create one from the contents of a folder. DiskCopy creates raw disk images when you choose a “read/write” image format.
|
||||
|
||||
The [HFS Utilities][hfsutils] package provides tools for moving data to and from raw disk images.
|
||||
|
||||
[hfsutils]: https://www.mars.org/home/rob/proj/hfs/
|
||||
|
||||
### DiskCopy 4.2 Disk Image
|
||||
|
||||
A DiskCopy 4.2 Disk image contains an image of a floppy disk, plus some metadata like data checksums. DiskCopy 4.2 images are supported by Mini vMac. They contain resource forks, but the image can be used even if the resource fork is deleted.
|
||||
|
||||
### NDIF Disk Image
|
||||
|
||||
![DiskCopy and a DiskCopy image](diskcopy.png)
|
||||
|
||||
An NDIF image contains a modified version of an HFS filesystem with additional metadata, such as checksums, stored in the file’s resource fork. This additional data appears to be stored in a 200-byte `'bcem'` resource with ID 128, but NDIF is not publicly documented and it is a matter of guesswork. If you delete the `'bcem'` resource, the disk image will not mount.
|
||||
|
||||
NDIF images can be compressed. If you want to transfer NDIF images to other systems, encode them with MacBinary (or another encoding that preserves the resource fork).
|
||||
|
||||
### UDIF Disk Image
|
||||
|
||||
A UDIF disk image is like an NDIF disk image, but contains all of its data in the data fork. UDIF appeared on Mac OS X, although it is reportedly possible to use them on Mac OS 9. You can also mount a UDIF image in Mac OS X and access the mounted image from the Classic environment.
|
||||
|
||||
UDIF disk images use the file extension `.dmg`. UDIF images are compressed by default.
|
||||
|
||||
### Self-Mounting Image
|
||||
|
||||
Self-mounting images typically use the extension `.smi`. Self-mounting images are applications. Running the application mounts the image.
|
||||
|
||||
### ISO, Toast
|
||||
|
||||
ISO and Toast images are disk images created for burning optical disks.
|
||||
|
||||
## Single-File Encodings
|
||||
|
||||
If you just need to preserve the Macintosh metadata for a single file, there are three common options.
|
||||
|
||||
### MacBinary
|
||||
|
||||
![MacBinary and a MacBinary encoded file](macbinary.png)
|
||||
|
||||
MacBinary combines a file’s data fork, resource fork, and metadata into a single file. It is the preferred encoding for transferring individual Macintosh files between systems or for working with Macintosh files on non-Macintosh systems.
|
||||
|
||||
MacBinary uses the `.bin` extension.
|
||||
|
||||
Note that if a file format has a well-known extension and does not need its resource fork, MacBinary is unnecessary. There is no point in creating a `.sit.bin` file, for example. This increases the file size but does not provide any benefits.
|
||||
|
||||
### BinHex
|
||||
|
||||
BinHex combines a file’s data fork, resource fork, and metadata into a single file. The result is encoded as pure ASCII text, which increases the file size, but allows BinHex files to be transferred over old email systems or newsgroups without being damaged. Old email systems are not _8-bit clean_ and would only transmit the low 7 bits of each byte. This worked for pure ASCII text files, but would mangle most other types of files. BinHex also uses a simple run-length encoding and stores CRC checksums of the data.
|
||||
|
||||
BinHex uses the `.hqx` extension.
|
||||
|
||||
It is common to see BinHex files on old websites, but the format has been superceded by MacBinary.
|
||||
|
||||
Note that it is not necessary or useful to encode Stuffit archives with BinHex, unless you are transferring data across a 7-bit connection (which is unlikely). If you see a `.sit.hqx` file, it can be safely decoded to a `.sit` file, even on non-Macintosh systems.
|
||||
|
||||
### AppleDouble, AppleSingle
|
||||
|
||||
AppleDouble is likely the most ubiquitous format for preserving Macintosh metadata, because it’s still used by modern versions of macOS to preserve medatada in zip files, on network shares, and on disk volumes formatted with non-Macintosh filesystems like FAT. AppleDouble is also used by A/UX.
|
||||
|
||||
AppleDouble is called “AppleDouble” because the data and metadata are stored in separate files. AppleSingle uses the same format, but stores the data and metadata in one file. AppleDouble is convenient because it lets you use the same files from both a Macintosh system and another system.
|
||||
|
||||
There is no standard file extension for AppleDouble files, but there is a standard prefix, which is `._`. If you save a file named `MyFile` on a FAT filesystem or Samba network share, Mac OS X will also create an AppleDouble file named `._MyFile`, if necessary. If you have used flash drives or network shares to share files between Mac OS X and other systems like Linux or Windows, you have problably seen these AppleDouble files (and the related `.DS_Store` files).
|
||||
|
||||
## Compression Formats
|
||||
|
||||
### Stuffit
|
||||
|
||||
![Stuffit Expander and a Stuffit archive](stuffit.png)
|
||||
|
||||
Stuffit is the more popular compression program for the Macintosh during the 1990s and late 1980s. Stuffit archives use the `.sit` extension. Stuffit fell out of use after Apple released Mac OS X, and was replaced by `.zip` and `.dmg`.
|
||||
|
||||
Most Mac files shared on websites are compressed using Stuffit.
|
||||
|
||||
### Compact Pro
|
||||
|
||||
Compact Pro is a compression program which was somewhat less popular than Stuffit. Compact Pro archives use the `.cpt` extension.
|
||||
|
||||
### PackIt
|
||||
|
||||
PackIt is an early compression program for Macintosh. Files are compressed using simple Huffman coding. PackIt archives use the `.pit` extension. PackIt archives are rare.
|
BIN
docs/tech/diskcopy.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
docs/tech/file-exchange.png
Normal file
After Width: | Height: | Size: 7.3 KiB |
53
docs/tech/filesystems.md
Normal file
|
@ -0,0 +1,53 @@
|
|||
---
|
||||
layout: page
|
||||
title: Filesystems
|
||||
nav_order: 5
|
||||
permalink: /tech/filesystems
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# Filesystems
|
||||
|
||||
This document focuses on the different filesystems used by Mac OS over the years and how filenames work on these filesystems.
|
||||
|
||||
## MFS
|
||||
|
||||
Macintosh file system (MFS) is Apple’s filesystem for the first Macintosh. MFS does not support directories and has a maximum filename length of 63 characters. According to Wikipedia, the final OS versions that supported MFS were 7.6 for read-write access and 8.0 for read-only access.
|
||||
|
||||
It is unusual to see this filesystem in practice, since it was replaced by HFS shortly after it appeared.
|
||||
|
||||
## HFS
|
||||
|
||||
Hierarchical file system (HFS) was introduced shortly after MFS and replaced MFS. It first appeared alongside Apple’s first Macintosh hard disk, the “Hard Disk 20” in 1985, and afterwards appeared in the 128K ROM on the Mac Plus. HFS introduces directories and a new set of file APIs.
|
||||
|
||||
The final versions of Mac OS which support HFS are Mac OS X 10.5 Leopard for read-write access and macOS 10.14 Mojave for read-only access.
|
||||
|
||||
HFS has a maximum filename length to 31 characters. The script used for the filename is recorded, but filenames are compared as if they are were encoded using the Macintosh Roman encoding. Filenames are case insensitive, and the sort order is described on page A-20 of _Inside Macintosh: Text_ (1993).
|
||||
|
||||
## HFS Plus
|
||||
|
||||
HFS Plus was introduced with Mac OS 8.1 and added support for filesystem journaling, Unicode filenames, up to 255 characters per filename, and case-sensitive filenames if enabled. Mac OS 8.1 was released in January 1998.
|
||||
|
||||
To provide backwards compatibility with older APIs, HFS Plus records the encoding that filenames should be encoded with when listing files in the older APIs. The volume header tracks a list of all encodings used for all filenames on the volume, so the appropriate conversion tables can be loaded when the volume is mounted. When an application using an older API lists the files in a directory, it will see backwards-compatible filenames substituted for filenames that use unsupported characters or filenames that are too long.
|
||||
|
||||
Filenames are stored in UTF-16, decomposed using the rules from Unicode 2.1 (up to Mac OS X 10.2) or Unicode 3.2 (for Mac OS X 10.3 and later).
|
||||
|
||||
See [Apple Technical Note TN1150: HFS Plus Volume Format][tn1150].
|
||||
|
||||
[tn1150]: https://developer.apple.com/library/archive/technotes/tn/tn1150.html
|
||||
|
||||
There is a variant of HFS Plus called HFSX. The major difference between normal HFS Plus and HFSX is that HFSX does not carry an HFS wrapper for backwards compatibility with systems that do not support HFS Plus.
|
||||
|
||||
## APFS
|
||||
|
||||
APFS is introduced in macOS 10.12.4. Filenames are encoded using UTF-8. Only code points assigned in Unicode 9.0 are permitted in filenames. APFS does not normalize filenames, but does store files by using the hash of the normalized version of the filename.
|
||||
|
||||
## UFS
|
||||
|
||||
UFS stands for _Unix file system_. It is a case-sensitive filesystem which is only supported by Mac OS X versions 10.0 through 10.5.
|
||||
|
||||
UFS is not seen often.
|
||||
|
||||
## Disk Images
|
||||
|
||||
There are various formats for disk images: Disc Copy 4.2 images, NDIF images, and UDIF images.
|
65
docs/tech/finder-info.md
Normal file
|
@ -0,0 +1,65 @@
|
|||
---
|
||||
layout: page
|
||||
title: Finder Info
|
||||
nav_order: 3
|
||||
permalink: /tech/finder-info
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# Finder Info
|
||||
|
||||
Mac OS also stores a small piece of metadata for each file called _Finder Info_. As the name implies, this contains information which is primarily used by the Finder (Finder is the Mac OS shell, which shows the desktop and filesystem). For example, this includes the file’s type and its location on-screen.
|
||||
|
||||
There are different versions of the Finder info structure. This is what one of the older versions looks like, for a file:
|
||||
|
||||
```c
|
||||
struct FileInfo {
|
||||
OSType fileType;
|
||||
OSType fileCreator;
|
||||
UInt16 finderFlags;
|
||||
Point location;
|
||||
UInt16 reservedField;
|
||||
};
|
||||
```
|
||||
|
||||
## Preservation
|
||||
|
||||
When transferring files to old Macintosh systems, it’s usually necessary to assign a correct type code for every file. It can be frustrating to work with files that do not have the correct type code, and you may not be able to open these files at all.
|
||||
|
||||
![Screenshot of error dialog box, saying that a file could not be opened because the application program that created it could not be found](no-type-code.png)
|
||||
|
||||
There are various tools which can fix this problem. ResEdit can fix this problem, and there are some more specialized tools designed specifically to deal with this problem.
|
||||
|
||||
## Type and Creator Codes
|
||||
|
||||
Older versions of Mac OS do not use filename suffixes to associate files with applications. You can use any name you like for a file, and the file type is given by the file’s type code. The application to open it is given by the file’s creator code.
|
||||
|
||||
The type code is a four-character code used to describe the type of the file. For example, `TEXT` is used for text files, `JPEG` is used for JPEG images, and `APPL` is for application programs. File types are used to figure out which applications can open a specific file. For example, SimpleText can open text files, but it refuses to open application programs.
|
||||
|
||||
The creator code is second four-character code used to associate the file with a specific application. For example, SimpleText has the creator code `ttxt`, and PictureViewer uses creator code `ogle`. When you double-click on a file in the Finder, the Finder launches the application with the corresponding creator code, if it exists. The creator code for a file also determines what icon it uses in the Finder.
|
||||
|
||||
For example, this screenshot shows three text files in a folder. Each file has the same filename suffix, `.c`, but that suffix is irrelevant here. Since the files were created with three different programs, they have three different icons—the text file icon for SimpleText files, MPW files, and BBEdit files.
|
||||
|
||||
![Screenshot depicting three text files in a Macintosh System 7 folder, created by MPW, BBEdit, and SimpleText](text-files.png)
|
||||
|
||||
## Location and Color
|
||||
|
||||
You can see that the Finder lets you freely place your files in different locations within a window. You can also assign one of eight different labels to a file or folder—each label corresponds to a specific name and color. The location and color are stored in the Finder info.
|
||||
|
||||
Here is what this looks like System 7. The “Utilities” folder below is given a label which makes the icon red.
|
||||
|
||||
![Screenshot of a volume in the System 7 Finder, depecting a few folders, where one of the folders is red.](finder.png)
|
||||
|
||||
## Rebuilding the Desktop
|
||||
|
||||
Volumes on an old Macintosh system contain an invisible file named “Desktop DB”. This file contains a record of all the applications on the system and the file types that they can open.
|
||||
|
||||
Unfortunately, this database can easily become outdated. When it’s outdated, some applications and files on your computer will use the generic application and file icons, rather than the correct icons. You can rebuild the database by holding down the command and option keys while the computer starts—hold these keys down until you can see the files and folders on your desktop.
|
||||
|
||||
## File Extensions
|
||||
|
||||
Some systems have a control panel called _PC Exchange_ or _File Exchange_, which lets you assign a default type code and creator code to files based on their extension. This was primarily used so that you could read disks formatted for DOS or Windows on a Macintosh, and more easily exchange files with people using DOS or Windows.
|
||||
|
||||
![Screenshot of the File Exchange control panel on Mac OS 9.2](file-exchange.png)
|
||||
|
||||
Starting with Mac OS X, file extensions became the primary way to identify file types, and type codes became unnecessary.
|
BIN
docs/tech/finder.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
11
docs/tech/index.md
Normal file
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
layout: page
|
||||
title: Technical Guide
|
||||
nav_order: 1
|
||||
permalink: /tech/
|
||||
has_children: true
|
||||
---
|
||||
|
||||
# Technical Guide
|
||||
|
||||
This guide explains how files and filesystems work on different versions of Mac OS. There are some important differences—you can’t just write code once and have it work well across a wide variety of Mac OS systems.
|
BIN
docs/tech/macbinary.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
BIN
docs/tech/no-type-code.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
61
docs/tech/resource-forks.md
Normal file
|
@ -0,0 +1,61 @@
|
|||
---
|
||||
layout: page
|
||||
title: Resource Forks
|
||||
nav_order: 2
|
||||
permalink: /tech/resource-forks
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# Resource Forks
|
||||
|
||||
Mac OS traditionally provided two _forks_ for a file: the data fork and the resource fork. A fork is a data stream within a file which can be independently manipulated. You can open one fork and write data to it, change the length of data, or delete the fork entirely without affecting the file’s other fork. Think of the forks as two separate files, bundled up as one file.
|
||||
|
||||
When you copy a file from a non-Mac OS system to a Mac, what you get is a file that only contains a data fork. The resource fork is used to store Mac-specific data. It’s almost always organized into chunks called _resources_, which are discrete pieces of data surch as icons, images, sounds, strings, or 68K code segments. Each resource is identified by a four-character type code and 16-bit ID number.
|
||||
|
||||
The resource fork format has a maximum size of about 16 MiB because it encodes file offsets using 24 bits.
|
||||
|
||||
## Historical Use
|
||||
|
||||
Prior to Mac OS X, programs on the Mac make heavy use of the resource fork. For example, an application’s resource fork contains 68K code, icons, dialog box layouts, version information, text data, and sometimes various custom data types. PowerPC code is stored in the data fork, although various resources are still necessary for PowerPC applications to run correctly.
|
||||
|
||||
Applications also use resource forks in other files to store data. Some text editors use the resource fork of a text file to remember the state of the text editor when editing that file. Games often use files with resource forks to store images, sound effects, or level data.
|
||||
|
||||
Starting with Mac OS X, data that was previously stored as resources in the resource fork are stored as a separate file instead. For example, application and file icons prior to Mac OS X are stored in the resource fork, but in Mac OS X, each icon is stored as a separate file. It is unusual to find a Mac OS X program that uses the resource fork at all.
|
||||
|
||||
## Are Resource Forks Really Structured?
|
||||
|
||||
Note the key words above: a resource fork is **almost always** organized into chunks called resources.
|
||||
|
||||
A file’s resource fork is really just an alternate stream of data, like the data fork. You can put whatever data you like in the resource fork. In practice, the resource fork almost always uses a specific format.
|
||||
|
||||
Note that this goes both ways. Just like you can store arbitrary data in the resource fork, you can also use the data fork to store resources. Using the data fork to store resources has disadvantages, because you can’t edit those resources with ResEdit or use the Macintosh resource manager API to read those resources, so it is rarely done.
|
||||
|
||||
Some applications store application preferences or other data in the resource fork of a file, and because resource forks can get corrupted, you occasionally see a backup copy of the resource fork stored in the data fork of the same file. This is not common, however.
|
||||
|
||||
## Preserving Resource Forks
|
||||
|
||||
Resource forks do not always need to be preserved when synchronizing files between systems. It depends on the file and what is being stored in the resource fork. For example, when you save a text file in BBEdit or MPW, the editor state is recorded as a resource in the text file. Deleting this resource fork doesn’t affect your ability to use the file. On the other hand, if you delete the resource fork of an application, it won’t work at all.
|
||||
|
||||
## Examining the Resource Fork
|
||||
|
||||
You can use _ResEdit_ or _Resorcerer_ to view and edit the resources in a resource fork, or the MPW tools _Rez_ and _DeRez_.
|
||||
|
||||
ResEdit is the most common tool to use, because it’s free (unlike Resorcerer, which costs $256) and has a nice user interface (unlike Rez and DeRez, which convert resource files to and from text files). ResEdit is available from Apple and the latest version is 2.1.3. Here’s what the resource fork of the MacBinary application looks like in ResEdit:
|
||||
|
||||
![Screenshot of ResEdit, depicting MacBinary II](rsrc-macbinary.png)
|
||||
|
||||
When you open a file in ResEdit, ResEdit shows you an overview of the different types of resources in the file. MacBinary contains 18 different types of resources. When you open one of the types, ResEdit shows you a list of resources for that type. For example, we can look at the `CODE` resources in MacBinary, which contain segments of executable 68K code which can be independently loaded. Each individual resource has an ID, which is a signed 16-bit number, and optionally has a name.
|
||||
|
||||
![Screenshot of ResEdit, depicting a list of CODE resources](rsrc-code.png)
|
||||
|
||||
ResEdit provides simple editors for certain types of resources, like icons. This makes it easy to create your own custom icons for Macintosh applications. You didn’t have to be a programmer to take advantage of this—if you wanted to create your own custom folder icons for certain folders, you could do that too. You could find custom icon collections online or on CDs bundled with magazines.
|
||||
|
||||
Application and folder icons are made from several types of icons with the same ID number, creating an icon family. An icon family allows you to create custom icons for different sizes and color depths.
|
||||
|
||||
![Screenshot of ResEdit, depicting the MacBinary application icon being edited](rsrc-icon.png)
|
||||
|
||||
Another common type of resource is string resources. Applications sometimes store strings used by the application in these resources. This might be done to reduce the memory footprint of the application, or it might be done to make it easier to translate an application into another language.
|
||||
|
||||
It’s not necessary to use string resources at all, and MacBinary doesn’t contain any string resources. There are string resources in _SimpleText_, however, containing help messages, error messages, and various other strings.
|
||||
|
||||
![Screenshot of ResEdit, depicting a string resource in SimpleText](rsrc-strings.png)
|
BIN
docs/tech/rsrc-code.png
Normal file
After Width: | Height: | Size: 1.0 KiB |
BIN
docs/tech/rsrc-icon.png
Normal file
After Width: | Height: | Size: 3.3 KiB |
BIN
docs/tech/rsrc-macbinary.png
Normal file
After Width: | Height: | Size: 3.0 KiB |
BIN
docs/tech/rsrc-strings.png
Normal file
After Width: | Height: | Size: 2.1 KiB |
71
docs/tech/safe-saving.md
Normal file
|
@ -0,0 +1,71 @@
|
|||
---
|
||||
layout: page
|
||||
title: Safe Saving
|
||||
nav_order: 6
|
||||
permalink: /tech/safe-saving
|
||||
parent: Technical Guide
|
||||
---
|
||||
|
||||
# Safe Saving
|
||||
|
||||
There are a number of different goals for when your program saves a file:
|
||||
|
||||
- I/O errors should be reported. If the data does not make it to disk, then tell the user that the operation failed.
|
||||
|
||||
- Saves are atomic. After saving, you either get the old version of the file or the complete new version of the file. If your program crashes, it’s okay if the old file is untouched, but it’s not okay if it’s been partially overwritten.
|
||||
|
||||
- Saves preserve file references. Any references to a document (aliases or bookmarks) remain valid after modifying the document.
|
||||
|
||||
- Saves do not change the creation date, or other metadata associated with the file.
|
||||
|
||||
If your first thought is, “that sounds like it could be complicated”, then you’re in good company. Theodore Ts’o wrote an article in 2009, [Don’t fear the fsync!][tso-fsync] which covers some of these issues on Linux in detail.
|
||||
|
||||
[tso-fsync]: https://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/
|
||||
|
||||
## Classic Mac OS
|
||||
|
||||
HFS and HFS+ support an operation which exchanges the contents of files. The high-level API call looks like this:
|
||||
|
||||
```c
|
||||
OSErr FSpExchangeFiles(
|
||||
const FSSpec * source,
|
||||
const FSSpec * dest);
|
||||
```
|
||||
|
||||
This function exchanges the _contents_ of the two files (both forks), and exchanges the modification dates, but leaves the other metadata alone.
|
||||
|
||||
The recipe for safe saving on HFS volumes is:
|
||||
|
||||
1. Save the document to a temporary file on the same volume.
|
||||
|
||||
1. Exchange the contents of the original file and the temporary file with `FSpExchangeFiles`.
|
||||
|
||||
You can test that `FSpExchangeFiles` is supported by a volume by getting the volume parameters. Not all filesystems support this operation.
|
||||
|
||||
## Mac OS X
|
||||
|
||||
Mac OS X provides a Unix system call that provides the same functionality as `FSpExchangeFiles`, but with a Unix API.
|
||||
|
||||
```c
|
||||
int exchangedata(
|
||||
const char * path1
|
||||
const char * path2,
|
||||
unsigned int options);
|
||||
```
|
||||
|
||||
However, this function does not work on APFS.
|
||||
|
||||
## Mac OS X 10.6+
|
||||
|
||||
Starting on Mac OS X 10.6, the Foundation framework provides a method for safely replacing an item on the filesystem with a new item. This method is present on `NSFileManager`:
|
||||
|
||||
```objc
|
||||
- (BOOL)replaceItemAtURL:(NSURL *)originalItemURL
|
||||
withItemAtURL:(NSURL *)newItemURL
|
||||
backupItemName:(NSString *)backupItemName
|
||||
options:(NSFileManagerItemReplacementOptions)options
|
||||
resultingItemURL:(NSURL * _Nullable *)resultingURL
|
||||
error:(NSError * _Nullable *)error;
|
||||
```
|
||||
|
||||
This method should be preferred for Mac OS X 10.6 and newer. Unlike `exchangedata()`, this function works on APFS.
|
BIN
docs/tech/stuffit.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
docs/tech/text-files.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
345
file.c
|
@ -1,345 +0,0 @@
|
|||
#include "defs.h"
|
||||
|
||||
#include "convert.h"
|
||||
|
||||
#include <Files.h>
|
||||
#include <Script.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
// Make an FSSpec for a temporary file.
|
||||
static int make_temp(FSSpec *temp, short vRefNum, long dirID,
|
||||
const unsigned char *name) {
|
||||
Str31 tname;
|
||||
unsigned pfxlen, maxpfx = 31 - 4;
|
||||
OSErr err;
|
||||
|
||||
pfxlen = name[0];
|
||||
if (pfxlen > maxpfx) {
|
||||
pfxlen = maxpfx;
|
||||
}
|
||||
memcpy(tname + 1, name + 1, pfxlen);
|
||||
memcpy(tname + 1 + pfxlen, ".tmp", 4);
|
||||
tname[0] = pfxlen + 4;
|
||||
err = FSMakeFSSpec(vRefNum, dirID, tname, temp);
|
||||
if (err != 0 && err != fnfErr) {
|
||||
print_errcode(err, "could not create temp file spec");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Set the modification time for a file.
|
||||
static int set_modtime(FSSpec *spec, long modTime) {
|
||||
CInfoPBRec ci;
|
||||
Str31 name;
|
||||
OSErr err;
|
||||
|
||||
memset(&ci, 0, sizeof(ci));
|
||||
memcpy(name, spec->name, spec->name[0] + 1);
|
||||
ci.hFileInfo.ioNamePtr = name;
|
||||
ci.hFileInfo.ioVRefNum = spec->vRefNum;
|
||||
ci.hFileInfo.ioDirID = spec->parID;
|
||||
err = PBGetCatInfoSync(&ci);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not get temp file info");
|
||||
return 1;
|
||||
}
|
||||
memcpy(name, spec->name, spec->name[0] + 1);
|
||||
ci.hFileInfo.ioNamePtr = name;
|
||||
ci.hFileInfo.ioVRefNum = spec->vRefNum;
|
||||
ci.hFileInfo.ioDirID = spec->parID;
|
||||
ci.hFileInfo.ioFlMdDat = modTime;
|
||||
err = PBSetCatInfoSync(&ci);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not set temp file info");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Move a temp file over a destination file. This may modify the temp file spec
|
||||
// if it moves in multiple stages.
|
||||
static int replace_file(FSSpec *temp, FSSpec *dest, file_action action) {
|
||||
HParamBlockRec pb;
|
||||
CMovePBRec cm;
|
||||
OSErr err;
|
||||
bool mustMove, mustRename;
|
||||
|
||||
// First, try to exchange files if destination exists.
|
||||
if (action == kActionReplace) {
|
||||
err = FSpExchangeFiles(temp, dest);
|
||||
if (gLogLevel >= kLogVerbose) {
|
||||
log_call(err, "FSpExchangeFiles");
|
||||
}
|
||||
if (err == 0) {
|
||||
err = FSpDelete(temp);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not remove temporary file");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// paramErr: function not supported by volume.
|
||||
if (err != paramErr) {
|
||||
print_errcode(err, "could not exchange files");
|
||||
return 1;
|
||||
}
|
||||
// Otherwise, delete destination and move temp file over.
|
||||
err = FSpDelete(dest);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not remove destination file");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
mustMove = dest->parID != temp->parID;
|
||||
mustRename = memcmp(dest->name, temp->name, dest->name[0] + 1) != 0;
|
||||
|
||||
// Next, try MoveRename.
|
||||
if (mustMove && mustRename) {
|
||||
memset(&pb, 0, sizeof(pb));
|
||||
pb.copyParam.ioNamePtr = temp->name;
|
||||
pb.copyParam.ioVRefNum = temp->vRefNum;
|
||||
pb.copyParam.ioNewName = dest->name;
|
||||
pb.copyParam.ioNewDirID = dest->parID;
|
||||
pb.copyParam.ioDirID = temp->parID;
|
||||
err = PBHMoveRenameSync(&pb);
|
||||
if (gLogLevel >= kLogVerbose) {
|
||||
log_call(err, "PBHMoveRename");
|
||||
}
|
||||
if (err == 0) {
|
||||
return 0;
|
||||
}
|
||||
// paramErr: function not supported by volume.
|
||||
if (err != paramErr) {
|
||||
print_errcode(err, "could not rename temporary file");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, try move and then rename.
|
||||
if (mustMove) {
|
||||
memset(&cm, 0, sizeof(cm));
|
||||
cm.ioNamePtr = temp->name;
|
||||
cm.ioVRefNum = temp->vRefNum;
|
||||
cm.ioNewDirID = dest->parID;
|
||||
cm.ioDirID = temp->parID;
|
||||
err = PBCatMoveSync(&cm);
|
||||
if (gLogLevel >= kLogVerbose) {
|
||||
log_call(err, "PBCatMove");
|
||||
}
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not move temporary file");
|
||||
return 1;
|
||||
}
|
||||
temp->parID = dest->parID;
|
||||
}
|
||||
if (mustRename) {
|
||||
err = FSpRename(temp, dest->name);
|
||||
if (gLogLevel >= kLogVerbose) {
|
||||
log_call(err, "FSpRename");
|
||||
}
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not rename temporary file");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static Ptr gSrcBuffer;
|
||||
static Ptr gDestBuffer;
|
||||
|
||||
int sync_file(struct file_info *file, operation_mode mode, short srcVol,
|
||||
long srcDir, short destVol, long destDir, short tempVol,
|
||||
long tempDir) {
|
||||
OSType creator, fileType;
|
||||
FSSpec src, dest, temp;
|
||||
short srcRef = 0, destRef = 0;
|
||||
bool has_temp = false;
|
||||
int r;
|
||||
OSErr err;
|
||||
|
||||
// Handle actions which don't involve conversion.
|
||||
if (file->action == kActionNone) {
|
||||
return 0;
|
||||
}
|
||||
if (file->action == kActionDelete) {
|
||||
err = FSMakeFSSpec(destVol, destDir, file->name, &dest);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not create destination spec");
|
||||
return 1;
|
||||
}
|
||||
err = FSpDelete(&dest);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not delete destination file");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Create file specs.
|
||||
err = FSMakeFSSpec(srcVol, srcDir, file->name, &src);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not create source spec");
|
||||
return 1;
|
||||
}
|
||||
err = FSMakeFSSpec(destVol, destDir, file->name, &dest);
|
||||
if (err != 0 && err != fnfErr) {
|
||||
print_errcode(err, "could not create destination spec");
|
||||
return 1;
|
||||
}
|
||||
r = make_temp(&temp, tempVol, tempDir, dest.name);
|
||||
if (r != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create the temporary file.
|
||||
switch (file->type) {
|
||||
case kTypeText:
|
||||
case kTypeTextUTF8:
|
||||
creator = 'MPS ';
|
||||
fileType = 'TEXT';
|
||||
break;
|
||||
case kTypeResource:
|
||||
creator = 'RSED';
|
||||
fileType = 'rsrc';
|
||||
break;
|
||||
default:
|
||||
print_err("invalid type");
|
||||
return 1;
|
||||
}
|
||||
err = FSpCreate(&temp, creator, fileType, smSystemScript);
|
||||
if (err == dupFNErr) {
|
||||
err = FSpDelete(&temp);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not delete existing temp file");
|
||||
goto error;
|
||||
}
|
||||
err = FSpCreate(&temp, creator, fileType, smSystemScript);
|
||||
}
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not create file");
|
||||
goto error;
|
||||
}
|
||||
has_temp = true;
|
||||
|
||||
// Get buffers for conversion.
|
||||
if (gSrcBuffer == NULL) {
|
||||
gSrcBuffer = NewPtr(kBufferTotalSize);
|
||||
if (gSrcBuffer == NULL) {
|
||||
print_memerr(kBufferTotalSize);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (gDestBuffer == NULL) {
|
||||
gDestBuffer = NewPtr(kBufferTotalSize);
|
||||
if (gDestBuffer == NULL) {
|
||||
print_memerr(kBufferTotalSize);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
// Open the source file for reading.
|
||||
if (file->type == kTypeResource && mode == kModePush) {
|
||||
err = FSpOpenRF(&src, fsRdPerm, &srcRef);
|
||||
} else {
|
||||
err = FSpOpenDF(&src, fsRdPerm, &srcRef);
|
||||
}
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not open file");
|
||||
goto error;
|
||||
}
|
||||
if (file->type == kTypeResource && mode == kModePull) {
|
||||
err = FSpOpenRF(&temp, fsRdWrPerm, &destRef);
|
||||
} else {
|
||||
err = FSpOpenDF(&temp, fsRdWrPerm, &destRef);
|
||||
}
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not open temp file");
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Convert data.
|
||||
switch (file->type) {
|
||||
case kTypeText:
|
||||
if (mode == kModePush) {
|
||||
r = mac_to_unix(srcRef, destRef, gSrcBuffer, gDestBuffer);
|
||||
} else {
|
||||
r = mac_from_unix(srcRef, destRef, gSrcBuffer, gDestBuffer);
|
||||
}
|
||||
break;
|
||||
case kTypeTextUTF8: {
|
||||
unsigned char srcEnding, destEnding;
|
||||
if (mode == kModePush) {
|
||||
srcEnding = 0x0d;
|
||||
destEnding = 0x0a;
|
||||
} else {
|
||||
srcEnding = 0x0a;
|
||||
destEnding = 0x0d;
|
||||
}
|
||||
r = convert_line_endings(srcRef, destRef, gSrcBuffer, srcEnding,
|
||||
destEnding);
|
||||
} break;
|
||||
case kTypeResource:
|
||||
r = copy_data(srcRef, destRef, gSrcBuffer);
|
||||
break;
|
||||
default:
|
||||
print_err("invalid type");
|
||||
goto error;
|
||||
}
|
||||
if (r != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Close files.
|
||||
err = FSClose(srcRef);
|
||||
srcRef = 0;
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not close source file");
|
||||
goto error;
|
||||
}
|
||||
err = FSClose(destRef);
|
||||
destRef = 0;
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not close temp file");
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Set modification time.
|
||||
r = set_modtime(&temp, file->meta[kSrcDir].modTime);
|
||||
if (r != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Overwrite destination.
|
||||
r = replace_file(&temp, &dest, file->action);
|
||||
if (r != 0) {
|
||||
goto error;
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
// Clean up.
|
||||
if (srcRef != 0) {
|
||||
err = FSClose(srcRef);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not close source file");
|
||||
}
|
||||
}
|
||||
if (destRef != 0) {
|
||||
err = FSClose(destRef);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not close destination file");
|
||||
}
|
||||
}
|
||||
if (has_temp) {
|
||||
err = FSpDelete(&temp);
|
||||
if (err != 0) {
|
||||
print_errcode(err, "could not delete temp file");
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
1
gen/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/macscript
|
18
gen/BUILD.bazel
Normal file
|
@ -0,0 +1,18 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_binary")
|
||||
|
||||
go_binary(
|
||||
name = "macscript",
|
||||
srcs = [
|
||||
"cdata.go",
|
||||
"data.go",
|
||||
"main.go",
|
||||
"rez.go",
|
||||
"scriptmap.go",
|
||||
"source.go",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//gen/charmap",
|
||||
"//gen/table",
|
||||
],
|
||||
)
|
|
@ -1,11 +1,7 @@
|
|||
# Character Conversion Tables
|
||||
# Character Conversion Data Generator
|
||||
|
||||
Used by SyncFiles.
|
||||
|
||||
This program generates the tables necessary to convert from UTF-8 to Mac OS Roman.
|
||||
This program generates the tables necessary to convert between UTF-8 and various historical Mac OS character sets. Not all character sets are currently supported.
|
||||
|
||||
The conversion process is entirely table-driven. The table maps a (state, input) pair to a (state, output) pair. The initial state is 0. A transition to state 0 is considered invalid.
|
||||
|
||||
A transition may have _both_ a state and output. This means that the input may be translated in different ways depending on the bytes that follow. The translation code prefers the longest path through the state table that results in an output.
|
||||
|
||||
The table is compressed with PackBits to reduce its size by a factor of 22x.
|
||||
Right now, the table format is not quite finalized, so the output is not checked in to version control. This means that you have to run this program in order to generate the data and compile SyncFiles. It is planned to check the data files in, making this step unnecessary.
|
||||
|
|
117
gen/cdata.go
Normal file
|
@ -0,0 +1,117 @@
|
|||
package main
|
||||
|
||||
import "fmt"
|
||||
|
||||
const strlookup = `const char *%s(int cmap)
|
||||
{
|
||||
if (cmap < 0 || CHARMAP_COUNT <= cmap) {
|
||||
return 0;
|
||||
}
|
||||
return kCharmapText + %s[cmap];
|
||||
}
|
||||
`
|
||||
|
||||
const datalookup = `struct CharmapData CharmapData(int cmap) {
|
||||
struct CharmapData data;
|
||||
UInt32 off0, off1;
|
||||
data.ptr = 0;
|
||||
data.size = 0;
|
||||
if (cmap < 0 || CHARMAP_COUNT <= cmap) {
|
||||
return data;
|
||||
}
|
||||
off0 = kCharmapOffset[cmap];
|
||||
off1 = kCharmapOffset[cmap+1];
|
||||
if (off0 == off1) {
|
||||
return data;
|
||||
}
|
||||
data.ptr = kCharmapData + off0;
|
||||
data.size = off1 - off0;
|
||||
return data;
|
||||
}
|
||||
`
|
||||
|
||||
func writeInfo(d *scriptdata, filename string) error {
|
||||
strs := newStringtable()
|
||||
ids := make([]int, len(d.charmaps))
|
||||
names := make([]int, len(d.charmaps))
|
||||
for i, cm := range d.charmaps {
|
||||
ids[i] = strs.add(cm.id)
|
||||
names[i] = strs.add(cm.name)
|
||||
}
|
||||
|
||||
s, err := createCSource(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w := s.writer
|
||||
s.include("data.h")
|
||||
|
||||
w.WriteString(formatOff)
|
||||
|
||||
fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps))
|
||||
|
||||
fmt.Fprintf(w, "static const char kCharmapText[] =")
|
||||
s.strings(strs.data)
|
||||
w.WriteString(";\n")
|
||||
|
||||
fmt.Fprintf(w, "static const %s kCharmapIDs[CHARMAP_COUNT] = {", arrayIntType(ids))
|
||||
s.ints(ids)
|
||||
w.WriteString("\n};\n")
|
||||
|
||||
fmt.Fprintf(w, "static const %s kCharmapNames[CHARMAP_COUNT] = {", arrayIntType(ids))
|
||||
s.ints(ids)
|
||||
w.WriteString("\n};\n")
|
||||
|
||||
w.WriteString(formatOn)
|
||||
|
||||
fmt.Fprintf(w, strlookup, "CharmapID", "kCharmapIDs")
|
||||
fmt.Fprintf(w, strlookup, "CharmapName", "kCharmapNames")
|
||||
|
||||
return s.flush()
|
||||
}
|
||||
|
||||
func writeData(d *scriptdata, filename string) error {
|
||||
offsets := make([]int, len(d.charmaps)+1)
|
||||
var offset, last int
|
||||
for i, cm := range d.charmaps {
|
||||
offsets[i] = offset
|
||||
offset += len(cm.data)
|
||||
if len(cm.data) != 0 {
|
||||
last = i
|
||||
}
|
||||
}
|
||||
offsets[len(offsets)-1] = offset
|
||||
|
||||
s, err := createCSource(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w := s.writer
|
||||
w.WriteString(formatOff)
|
||||
s.include("data.h")
|
||||
fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps))
|
||||
|
||||
fmt.Fprintf(w, "static const %s kCharmapOffset[CHARMAP_COUNT + 1] = {", arrayIntType(offsets))
|
||||
s.ints(offsets)
|
||||
w.WriteString("\n};\n")
|
||||
|
||||
w.WriteString("static const UInt8 kCharmapData[] = {")
|
||||
for i, cm := range d.charmaps {
|
||||
if len(cm.data) != 0 {
|
||||
fmt.Fprintf(w, "\n\t/* %s */", cm.name)
|
||||
s.bytes(cm.data, i == last)
|
||||
if i != last {
|
||||
w.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
w.WriteString("\n};\n")
|
||||
|
||||
w.WriteString(formatOn)
|
||||
|
||||
w.WriteString(datalookup)
|
||||
|
||||
return s.flush()
|
||||
}
|
8
gen/charmap/BUILD.bazel
Normal file
|
@ -0,0 +1,8 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "charmap",
|
||||
srcs = ["charmap.go"],
|
||||
importpath = "moria.us/macscript/charmap",
|
||||
visibility = ["//gen:__subpackages__"],
|
||||
)
|
231
gen/charmap/charmap.go
Normal file
|
@ -0,0 +1,231 @@
|
|||
// Package charmap provides a way to read character maps.
|
||||
package charmap
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// An Error indicates an error parsing a character mapping file.
|
||||
type Error struct {
|
||||
File string
|
||||
Line int
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *Error) Error() (s string) {
|
||||
if e.File != "" {
|
||||
s += e.File + ":"
|
||||
}
|
||||
if e.Line != 0 {
|
||||
s += strconv.Itoa(e.Line) + ":"
|
||||
}
|
||||
m := e.Err.Error()
|
||||
if s == "" {
|
||||
return m
|
||||
}
|
||||
return s + " " + m
|
||||
}
|
||||
|
||||
var (
|
||||
errBadType = errors.New("unknown entry type")
|
||||
errColumns = errors.New("expected 2 columns")
|
||||
errUnicode = errors.New("invalid unicode sequence")
|
||||
errCodePointRange = errors.New("code point out of range")
|
||||
errDuplicate = errors.New("duplicate entry")
|
||||
)
|
||||
|
||||
// A Direction is a direction context for Unicode characters.
|
||||
type Direction uint32
|
||||
|
||||
const (
|
||||
// DirectionAny indicates that the character can be omitted in any
|
||||
// direction.
|
||||
DirectionAny Direction = iota
|
||||
// DirectionLR indicates that the character requires left-to-right context.
|
||||
DirectionLR
|
||||
// DirectionRL indicates that the character requires right-to-left context.
|
||||
DirectionRL
|
||||
)
|
||||
|
||||
// An Entry is a single entry in a character map.
|
||||
type Entry struct {
|
||||
Direction Direction
|
||||
Unicode []rune
|
||||
}
|
||||
|
||||
// A Charmap is a character map, mapping characters from a platform encoding to
|
||||
// Unicode.
|
||||
type Charmap struct {
|
||||
// Pairs of valid one-byte characters that have an alternate Unicode
|
||||
// representation.
|
||||
Digraph map[[2]byte]Entry
|
||||
|
||||
// Valid single-byte characters.
|
||||
OneByte map[byte]Entry
|
||||
|
||||
// Valid two-byte characters.
|
||||
TwoByte map[[2]byte]Entry
|
||||
}
|
||||
|
||||
func (m *Charmap) parseLine(line []byte) error {
|
||||
// Remove comment.
|
||||
if i := bytes.IndexByte(line, '#'); i != -1 {
|
||||
line = line[:i]
|
||||
}
|
||||
if len(line) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Split into two columns.
|
||||
i := bytes.IndexByte(line, '\t')
|
||||
if i == -1 || i == 0 {
|
||||
return errColumns
|
||||
}
|
||||
c1 := line[:i]
|
||||
c2 := line[i+1:]
|
||||
if i := bytes.IndexByte(c2, '\t'); i != -1 {
|
||||
c2 = c2[:i]
|
||||
}
|
||||
if len(c2) == 0 {
|
||||
return errColumns
|
||||
}
|
||||
|
||||
// Parse Unicode sequence and context.
|
||||
var e Entry
|
||||
if c2[0] == '<' {
|
||||
i := bytes.IndexByte(c2, '+')
|
||||
if i == -1 {
|
||||
return errors.New("invalid Unicode string")
|
||||
}
|
||||
ctx := c2[:i]
|
||||
c2 = c2[i+1:]
|
||||
switch string(ctx) {
|
||||
case "<LR>":
|
||||
e.Direction = DirectionLR
|
||||
case "<RL>":
|
||||
e.Direction = DirectionRL
|
||||
default:
|
||||
return fmt.Errorf("unknown context: %q", ctx)
|
||||
}
|
||||
}
|
||||
var ubuf [8]rune
|
||||
var ulen int
|
||||
for c2 != nil {
|
||||
var cp []byte
|
||||
if i := bytes.IndexByte(c2, '+'); i != -1 {
|
||||
cp = c2[:i]
|
||||
c2 = c2[i+1:]
|
||||
} else {
|
||||
cp = c2
|
||||
c2 = nil
|
||||
}
|
||||
if len(cp) < 2 || string(cp[0:2]) != "0x" {
|
||||
return errUnicode
|
||||
}
|
||||
cp = cp[2:]
|
||||
x, err := strconv.ParseUint(string(cp), 16, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if x > unicode.MaxRune {
|
||||
return errCodePointRange
|
||||
}
|
||||
if ulen >= len(ubuf) {
|
||||
return errors.New("Unicode sequence too long")
|
||||
}
|
||||
ubuf[ulen] = rune(x)
|
||||
ulen++
|
||||
}
|
||||
e.Unicode = make([]rune, ulen)
|
||||
copy(e.Unicode, ubuf[:])
|
||||
|
||||
// Parse platform encoded value, store value there.
|
||||
switch len(c1) {
|
||||
case 4:
|
||||
if string(c1[0:2]) != "0x" {
|
||||
return errBadType
|
||||
}
|
||||
var k [1]byte
|
||||
if _, err := hex.Decode(k[:], c1[2:]); err != nil {
|
||||
return err
|
||||
}
|
||||
ch := k[0]
|
||||
if m.OneByte == nil {
|
||||
m.OneByte = make(map[byte]Entry)
|
||||
}
|
||||
if _, ok := m.OneByte[ch]; ok {
|
||||
return errDuplicate
|
||||
}
|
||||
m.OneByte[ch] = e
|
||||
case 6:
|
||||
if string(c1[0:2]) != "0x" {
|
||||
return errBadType
|
||||
}
|
||||
var k [2]byte
|
||||
if _, err := hex.Decode(k[:], c1[2:]); err != nil {
|
||||
return err
|
||||
}
|
||||
if m.TwoByte == nil {
|
||||
m.TwoByte = make(map[[2]byte]Entry)
|
||||
}
|
||||
if _, ok := m.TwoByte[k]; ok {
|
||||
return errDuplicate
|
||||
}
|
||||
m.TwoByte[k] = e
|
||||
case 9:
|
||||
if string(c1[0:2]) != "0x" || string(c1[4:7]) != "+0x" {
|
||||
return errBadType
|
||||
}
|
||||
var k [2]byte
|
||||
if _, err := hex.Decode(k[0:1], c1[2:4]); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := hex.Decode(k[1:2], c1[7:9]); err != nil {
|
||||
return err
|
||||
}
|
||||
if m.Digraph == nil {
|
||||
m.Digraph = make(map[[2]byte]Entry)
|
||||
}
|
||||
if _, ok := m.Digraph[k]; ok {
|
||||
return errDuplicate
|
||||
}
|
||||
m.Digraph[k] = e
|
||||
default:
|
||||
return errBadType
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read reads a charmap from a stream.
|
||||
func Read(r io.Reader, name string) (*Charmap, error) {
|
||||
sc := bufio.NewScanner(r)
|
||||
var m Charmap
|
||||
for lineno := 1; sc.Scan(); lineno++ {
|
||||
if err := m.parseLine(sc.Bytes()); err != nil {
|
||||
return nil, &Error{name, lineno, err}
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
// ReadFile reads a charmap from a file on disk.
|
||||
func ReadFile(name string) (*Charmap, error) {
|
||||
fp, err := os.Open(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fp.Close()
|
||||
return Read(fp, name)
|
||||
}
|
243
gen/data.go
Normal file
|
@ -0,0 +1,243 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"moria.us/macscript/charmap"
|
||||
"moria.us/macscript/table"
|
||||
)
|
||||
|
||||
var (
|
||||
isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$")
|
||||
nonIdentPart = regexp.MustCompile("[^a-zA-Z0-9]+")
|
||||
)
|
||||
|
||||
func makeID(name string) string {
|
||||
return nonIdentPart.ReplaceAllLiteralString(name, "")
|
||||
}
|
||||
|
||||
// A dataError indicates an error in the contents of one of the data files.
|
||||
type dataError struct {
|
||||
filename string
|
||||
line int
|
||||
column int
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *dataError) Error() string {
|
||||
var b strings.Builder
|
||||
b.WriteString(e.filename)
|
||||
if e.line != 0 {
|
||||
b.WriteByte(':')
|
||||
b.WriteString(strconv.Itoa(e.line))
|
||||
if e.column != 0 {
|
||||
b.WriteByte(':')
|
||||
b.WriteString(strconv.Itoa(e.column))
|
||||
}
|
||||
}
|
||||
b.WriteString(": ")
|
||||
b.WriteString(e.err.Error())
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// readHeader reads the header row of a CSV file and checks that columns exist with the given names.
|
||||
func readHeader(filename string, r *csv.Reader, names ...string) error {
|
||||
row, err := r.Read()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i, name := range names {
|
||||
if len(row) <= i {
|
||||
line, _ := r.FieldPos(0)
|
||||
return &dataError{filename, line, 0, fmt.Errorf("missing column: %q", name)}
|
||||
}
|
||||
cname := row[i]
|
||||
if !strings.EqualFold(name, cname) {
|
||||
line, col := r.FieldPos(i)
|
||||
return &dataError{filename, line, col, fmt.Errorf("column name is %q, expected %q", cname, name)}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// A constmap is a map between names and integer values.
|
||||
type constmap struct {
|
||||
names map[string]int
|
||||
values map[int]string
|
||||
}
|
||||
|
||||
// readConsts reads a CSV file containing a map between names and integer values.
|
||||
func readConsts(filename string) (m constmap, err error) {
|
||||
fp, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
defer fp.Close()
|
||||
r := csv.NewReader(fp)
|
||||
r.ReuseRecord = true
|
||||
if err := readHeader(filename, r, "name", "value"); err != nil {
|
||||
return m, err
|
||||
}
|
||||
m.names = make(map[string]int)
|
||||
m.values = make(map[int]string)
|
||||
for {
|
||||
row, err := r.Read()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return m, err
|
||||
}
|
||||
if len(row) < 2 {
|
||||
line, _ := r.FieldPos(0)
|
||||
return m, &dataError{filename, line, 0, errors.New("expected at least two columns")}
|
||||
}
|
||||
name := row[0]
|
||||
if !isIdent.MatchString(name) {
|
||||
line, col := r.FieldPos(0)
|
||||
return m, &dataError{filename, line, col, fmt.Errorf("invalid name: %q", name)}
|
||||
}
|
||||
if _, e := m.names[name]; e {
|
||||
line, col := r.FieldPos(0)
|
||||
return m, &dataError{filename, line, col, fmt.Errorf("duplicate name: %q", name)}
|
||||
}
|
||||
value, err := strconv.Atoi(row[1])
|
||||
if err != nil {
|
||||
line, col := r.FieldPos(1)
|
||||
return m, &dataError{filename, line, col, fmt.Errorf("invalid value: %v", err)}
|
||||
}
|
||||
m.names[name] = value
|
||||
if _, e := m.values[value]; !e {
|
||||
m.values[value] = name
|
||||
}
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
type charmapinfo struct {
|
||||
name string
|
||||
filename string
|
||||
id string
|
||||
script int
|
||||
regions []int
|
||||
data []byte
|
||||
}
|
||||
|
||||
// readCharmaps reads and parses the charmaps.csv file.
|
||||
func readCharmaps(srcdir, filename string, scripts, regions map[string]int) ([]charmapinfo, error) {
|
||||
fp, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fp.Close()
|
||||
r := csv.NewReader(fp)
|
||||
r.ReuseRecord = true
|
||||
if err := readHeader(filename, r, "name", "file", "script", "regions"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var arr []charmapinfo
|
||||
gcharmaps := make(map[int]int)
|
||||
type key struct {
|
||||
script int
|
||||
region int
|
||||
}
|
||||
rcharmaps := make(map[key]int)
|
||||
for {
|
||||
row, err := r.Read()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if len(row) < 3 {
|
||||
line, _ := r.FieldPos(0)
|
||||
return nil, &dataError{filename, line, 0, errors.New("expected at least three columns")}
|
||||
}
|
||||
index := len(arr)
|
||||
ifo := charmapinfo{
|
||||
name: row[0],
|
||||
filename: strings.ToLower(strings.TrimSuffix(row[1], ".TXT")),
|
||||
id: makeID(row[0]),
|
||||
}
|
||||
file := row[1]
|
||||
sname := row[2]
|
||||
var e bool
|
||||
ifo.script, e = scripts[sname]
|
||||
if !e {
|
||||
line, col := r.FieldPos(2)
|
||||
return nil, &dataError{filename, line, col, fmt.Errorf("unknown script: %q", sname)}
|
||||
}
|
||||
if len(row) >= 4 && row[3] != "" {
|
||||
sregions := strings.Split(row[3], ";")
|
||||
ifo.regions = make([]int, 0, len(sregions))
|
||||
for _, s := range sregions {
|
||||
rg, e := regions[s]
|
||||
if !e {
|
||||
line, col := r.FieldPos(3)
|
||||
return nil, &dataError{filename, line, col, fmt.Errorf("unknown region: %q", s)}
|
||||
}
|
||||
k := key{ifo.script, rg}
|
||||
switch omap, e := rcharmaps[k]; {
|
||||
case !e:
|
||||
rcharmaps[k] = index
|
||||
ifo.regions = append(ifo.regions, rg)
|
||||
case omap != index:
|
||||
line, _ := r.FieldPos(0)
|
||||
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if omap, e := gcharmaps[ifo.script]; e {
|
||||
line, _ := r.FieldPos(0)
|
||||
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)}
|
||||
}
|
||||
}
|
||||
if file != "" {
|
||||
cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", file))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t, err := table.Create(cm)
|
||||
if err != nil {
|
||||
if e, ok := err.(*table.UnsupportedError); ok {
|
||||
if !flagQuiet {
|
||||
fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", file, e.Message)
|
||||
}
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("%s: %v", file, err)
|
||||
}
|
||||
ifo.data = t.Data()
|
||||
}
|
||||
arr = append(arr, ifo)
|
||||
}
|
||||
return arr, nil
|
||||
}
|
||||
|
||||
type scriptdata struct {
|
||||
scripts constmap
|
||||
regions constmap
|
||||
charmaps []charmapinfo
|
||||
}
|
||||
|
||||
func readData(srcdir string) (d scriptdata, err error) {
|
||||
d.scripts, err = readConsts(filepath.Join(srcdir, "scripts/script.csv"))
|
||||
if err != nil {
|
||||
return d, err
|
||||
}
|
||||
d.regions, err = readConsts(filepath.Join(srcdir, "scripts/region.csv"))
|
||||
if err != nil {
|
||||
return d, err
|
||||
}
|
||||
d.charmaps, err = readCharmaps(srcdir, filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names)
|
||||
return
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
module moria.us/macroman
|
||||
module moria.us/macscript
|
||||
|
||||
go 1.16
|
||||
|
||||
|
|
258
gen/macroman.go
|
@ -1,258 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var (
|
||||
flagDumpSequences bool
|
||||
flagDumpTransitions bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.BoolVar(&flagDumpSequences, "dump-sequences", false, "dump Unicode sequences")
|
||||
flag.BoolVar(&flagDumpTransitions, "dump-transitions", false, "dump state machine state transition tables")
|
||||
}
|
||||
|
||||
var characters [256]uint16
|
||||
|
||||
func init() {
|
||||
hichars := [128]uint16{
|
||||
0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
|
||||
0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
|
||||
0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
|
||||
0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
|
||||
0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
|
||||
0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
|
||||
0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
|
||||
0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
|
||||
0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
|
||||
0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
|
||||
0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
|
||||
0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
|
||||
0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
|
||||
0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
|
||||
0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
|
||||
0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
|
||||
}
|
||||
for i := 0; i < 128; i++ {
|
||||
characters[i] = uint16(i)
|
||||
}
|
||||
for i, c := range hichars {
|
||||
characters[i+128] = c
|
||||
}
|
||||
characters['\n'] = '\r'
|
||||
}
|
||||
|
||||
type state struct {
|
||||
chars [256]uint8
|
||||
states [256]*state
|
||||
}
|
||||
|
||||
func genStates() *state {
|
||||
root := new(state)
|
||||
// Iterate over each Unicode normalization form.
|
||||
// Omit norm.NFKC, norm.NFKD
|
||||
for _, form := range []norm.Form{norm.NFC, norm.NFD} {
|
||||
// Iterate over Macintosh, Unicode characters.
|
||||
for m, u := range characters {
|
||||
st := root
|
||||
bytes := []byte(form.String(string(rune(u))))
|
||||
for _, b := range bytes[:len(bytes)-1] {
|
||||
ost := st
|
||||
st = st.states[b]
|
||||
if st == nil {
|
||||
st = new(state)
|
||||
ost.states[b] = st
|
||||
}
|
||||
}
|
||||
b := bytes[len(bytes)-1]
|
||||
if st.chars[b] == 0 {
|
||||
st.chars[b] = uint8(m)
|
||||
if flagDumpSequences {
|
||||
fmt.Fprintf(os.Stderr, "%02x: %x\n", m, bytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return root
|
||||
}
|
||||
|
||||
func (s *state) count() int {
|
||||
n := 1
|
||||
for _, s := range s.states {
|
||||
if s != nil {
|
||||
n += s.count()
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (s *state) writeTable(table []uint16, pos int) int {
|
||||
data := table[pos*256 : pos*256+256 : pos*256+256]
|
||||
pos++
|
||||
for i, c := range s.chars {
|
||||
data[i] = uint16(c)
|
||||
}
|
||||
for i, c := range s.states {
|
||||
if c != nil {
|
||||
data[i] |= uint16(pos << 8)
|
||||
pos = c.writeTable(table, pos)
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func (s *state) genTable() []uint16 {
|
||||
n := s.count()
|
||||
table := make([]uint16, 256*n)
|
||||
pos := s.writeTable(table, 0)
|
||||
if pos != n {
|
||||
panic("bad table")
|
||||
}
|
||||
return table
|
||||
}
|
||||
|
||||
func dumpTransitions(table []uint16) {
|
||||
n := len(table) >> 8
|
||||
for i := 0; i < n; i++ {
|
||||
t := table[i<<8 : (i+1)<<8]
|
||||
fmt.Fprintf(os.Stderr, "State $%02x\n", i)
|
||||
for m, v := range t {
|
||||
if v != 0 {
|
||||
fmt.Fprintf(os.Stderr, " $%02x ->", m)
|
||||
st := v >> 8
|
||||
chr := v & 255
|
||||
if st != 0 {
|
||||
fmt.Fprintf(os.Stderr, " state $%02x", st)
|
||||
}
|
||||
if chr != 0 {
|
||||
fmt.Fprintf(os.Stderr, " char $%02x", chr)
|
||||
}
|
||||
fmt.Fprintln(os.Stderr)
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(os.Stderr)
|
||||
}
|
||||
}
|
||||
|
||||
func tableToBytes(t []uint16) []byte {
|
||||
b := make([]byte, len(t)*2)
|
||||
for i, x := range t {
|
||||
b[i*2] = byte(x >> 8)
|
||||
b[i*2+1] = byte(x)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func getRun(bytes []byte) (repeat bool, run []byte) {
|
||||
if len(bytes) == 0 {
|
||||
return
|
||||
}
|
||||
ref := bytes[0]
|
||||
n := 1
|
||||
for n < len(bytes) && bytes[n] == ref {
|
||||
n++
|
||||
}
|
||||
if n >= 2 {
|
||||
return true, bytes[:n]
|
||||
}
|
||||
for i, b := range bytes[1:] {
|
||||
if b == ref {
|
||||
return false, bytes[:i]
|
||||
}
|
||||
ref = b
|
||||
}
|
||||
return false, bytes
|
||||
}
|
||||
|
||||
func packBits(bytes []byte) []byte {
|
||||
var result []byte
|
||||
for len(bytes) > 0 {
|
||||
repeat, run := getRun(bytes)
|
||||
if len(run) > 128 {
|
||||
run = run[:128]
|
||||
}
|
||||
if repeat {
|
||||
result = append(result, byte(1-len(run)), run[0])
|
||||
} else {
|
||||
result = append(result, byte(len(run)-1))
|
||||
result = append(result, run...)
|
||||
}
|
||||
bytes = bytes[len(run):]
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func printTable(table []uint16) error {
|
||||
if _, err := fmt.Print("static const unsigned short kFromUnixTable[] = {"); err != nil {
|
||||
return err
|
||||
}
|
||||
for i, n := range table {
|
||||
if i&15 == 0 {
|
||||
if _, err := fmt.Println(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := fmt.Printf("%d,", n); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
_, err := fmt.Print("\n};\n")
|
||||
return err
|
||||
}
|
||||
|
||||
func printData(f *os.File, ulen int, data []byte) error {
|
||||
if _, err := fmt.Fprint(f, "/* This file is automatically generated. */\n"+
|
||||
"// clang-format off\n"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(f, "#define FROM_UNIX_DATALEN %d\n", ulen); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(f, "static const unsigned char kFromUnixData[%d] = {\n", len(data)); err != nil {
|
||||
return err
|
||||
}
|
||||
var line []byte
|
||||
for _, n := range data {
|
||||
sv := len(line)
|
||||
line = strconv.AppendUint(line, uint64(n), 10)
|
||||
line = append(line, ',')
|
||||
if len(line) > 80 {
|
||||
line = append(line[:sv], '\n')
|
||||
if _, err := f.Write(line); err != nil {
|
||||
return err
|
||||
}
|
||||
line = strconv.AppendUint(line[:0], uint64(n), 10)
|
||||
line = append(line, ',')
|
||||
}
|
||||
}
|
||||
line = append(line, '\n')
|
||||
if _, err := f.Write(line); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := fmt.Print("};\n")
|
||||
return err
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
root := genStates()
|
||||
table := root.genTable()
|
||||
if flagDumpTransitions {
|
||||
dumpTransitions(table)
|
||||
}
|
||||
bytes := tableToBytes(table)
|
||||
// printTable(table)
|
||||
bits := packBits(bytes)
|
||||
if err := printData(os.Stdout, len(bytes), bits); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Error:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
105
gen/main.go
Normal file
|
@ -0,0 +1,105 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
const (
|
||||
header = "/* This file is automatically generated. */\n"
|
||||
srcdirname = "convert"
|
||||
)
|
||||
|
||||
var (
|
||||
flagDest string
|
||||
flagSrc string
|
||||
flagQuiet bool
|
||||
flagFormat bool
|
||||
)
|
||||
|
||||
func getSrcdir() (string, error) {
|
||||
if flagSrc != "" {
|
||||
return flagSrc, nil
|
||||
}
|
||||
workspace := os.Getenv("BUILD_WORKSPACE_DIRECTORY")
|
||||
if workspace != "" {
|
||||
return workspace, nil
|
||||
}
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Dir(filepath.Dir(exe)), nil
|
||||
}
|
||||
|
||||
func mainE() error {
|
||||
srcdir, err := getSrcdir()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not find source dir: %v", err)
|
||||
}
|
||||
destdir := flagDest
|
||||
if destdir == "" {
|
||||
destdir = filepath.Join(srcdir, srcdirname)
|
||||
}
|
||||
|
||||
// Read metadata.
|
||||
d, err := readData(srcdir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Compile and emit charmap data.
|
||||
var hascmap bool
|
||||
for _, c := range d.charmaps {
|
||||
if len(c.data) != 0 {
|
||||
name := "charmap_" + c.filename + ".dat"
|
||||
fpath := filepath.Join(destdir, name)
|
||||
if !flagQuiet {
|
||||
fmt.Fprintln(os.Stderr, "Writing:", fpath)
|
||||
}
|
||||
if err := ioutil.WriteFile(fpath, c.data, 0666); err != nil {
|
||||
return err
|
||||
}
|
||||
hascmap = true
|
||||
}
|
||||
}
|
||||
if !hascmap {
|
||||
return errors.New("could not compile any character map")
|
||||
}
|
||||
|
||||
// Write generated output.
|
||||
m := genMap(&d)
|
||||
if err := writeMap(&d, m, filepath.Join(destdir, "charmap_region.c")); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeInfo(&d, filepath.Join(destdir, "charmap_info.c")); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeData(&d, filepath.Join(destdir, "charmap_data.c")); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeRez(&d, filepath.Join(destdir, "charmap.r")); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.StringVar(&flagDest, "dest", "", "output directory")
|
||||
flag.StringVar(&flagSrc, "src", "", "source directory")
|
||||
flag.BoolVar(&flagQuiet, "quiet", false, "only output error messages")
|
||||
flag.BoolVar(&flagFormat, "format", true, "run clang-format on C output")
|
||||
flag.Parse()
|
||||
if args := flag.Args(); len(args) != 0 {
|
||||
fmt.Fprintf(os.Stderr, "Error: unexpected argument: %q\n", args[0])
|
||||
os.Exit(2)
|
||||
}
|
||||
if err := mainE(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Error:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
72
gen/rez.go
Normal file
|
@ -0,0 +1,72 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// writeStrings writes a 'STR#' resource in Rez format.
|
||||
func writeStrings(w *bufio.Writer, rsrc string, strs []string) {
|
||||
fmt.Fprintf(w, "resource 'STR#' (%s) {{\n", rsrc)
|
||||
for i, s := range strs {
|
||||
fmt.Fprintf(w, "\t%q", s)
|
||||
if i < len(strs)-1 {
|
||||
w.WriteByte(',')
|
||||
}
|
||||
w.WriteByte('\n')
|
||||
}
|
||||
w.WriteString("}};\n")
|
||||
}
|
||||
|
||||
func charmapNames(d *scriptdata) []string {
|
||||
r := make([]string, len(d.charmaps))
|
||||
for i, c := range d.charmaps {
|
||||
r[i] = c.name
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func constStrings(c *constmap) []string {
|
||||
var n int
|
||||
for i := range c.values {
|
||||
if i >= n {
|
||||
n = i + 1
|
||||
}
|
||||
}
|
||||
r := make([]string, n)
|
||||
for i, name := range c.values {
|
||||
r[i] = name
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func writeRez(d *scriptdata, filename string) error {
|
||||
if !flagQuiet {
|
||||
fmt.Fprintln(os.Stderr, "Writing:", filename)
|
||||
}
|
||||
fp, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fp.Close()
|
||||
w := bufio.NewWriter(fp)
|
||||
|
||||
w.WriteString(header)
|
||||
w.WriteString(
|
||||
"#include \"resources.h\"\n" +
|
||||
"#include \"MacTypes.r\"\n")
|
||||
writeStrings(w, `rSTRS_Charmaps, "Character Maps"`, charmapNames(d))
|
||||
writeStrings(w, `rSTRS_Scripts, "Scripts"`, constStrings(&d.scripts))
|
||||
writeStrings(w, `rSTRS_Regions, "Regions"`, constStrings(&d.regions))
|
||||
for i, cm := range d.charmaps {
|
||||
if cm.filename != "" {
|
||||
fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, cm.name, "charmap_"+cm.filename+".dat")
|
||||
}
|
||||
}
|
||||
|
||||
if err := w.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
return fp.Close()
|
||||
}
|
117
gen/scriptmap.go
Normal file
|
@ -0,0 +1,117 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type regionmap struct {
|
||||
regions []int
|
||||
charmap int
|
||||
}
|
||||
|
||||
type regionmaps []regionmap
|
||||
|
||||
func (s regionmaps) Len() int { return len(s) }
|
||||
func (s regionmaps) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s regionmaps) Less(i, j int) bool {
|
||||
x := s[i]
|
||||
y := s[j]
|
||||
switch {
|
||||
case len(y.regions) == 0:
|
||||
return true
|
||||
case len(x.regions) == 0:
|
||||
return false
|
||||
default:
|
||||
return x.regions[0] < y.regions[0]
|
||||
}
|
||||
}
|
||||
|
||||
type scriptmap struct {
|
||||
script int
|
||||
regions []regionmap
|
||||
}
|
||||
|
||||
type scriptmaps []*scriptmap
|
||||
|
||||
func (s scriptmaps) Len() int { return len(s) }
|
||||
func (s scriptmaps) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s scriptmaps) Less(i, j int) bool { return s[i].script < s[j].script }
|
||||
|
||||
// genMap generates a map from scripts and regions to charmaps.
|
||||
func genMap(d *scriptdata) []*scriptmap {
|
||||
m := make(map[int]*scriptmap)
|
||||
var r []*scriptmap
|
||||
for i, cm := range d.charmaps {
|
||||
s := m[cm.script]
|
||||
if s == nil {
|
||||
s = &scriptmap{script: cm.script}
|
||||
m[cm.script] = s
|
||||
r = append(r, s)
|
||||
}
|
||||
var rgs []int
|
||||
if len(cm.regions) != 0 {
|
||||
rgs = make([]int, len(cm.regions))
|
||||
copy(rgs, cm.regions)
|
||||
sort.Ints(rgs)
|
||||
}
|
||||
s.regions = append(s.regions, regionmap{
|
||||
regions: cm.regions,
|
||||
charmap: i,
|
||||
})
|
||||
}
|
||||
for _, s := range r {
|
||||
sort.Sort(regionmaps(s.regions))
|
||||
}
|
||||
sort.Sort(scriptmaps(r))
|
||||
return r
|
||||
}
|
||||
|
||||
// writeMap writes out a C function that returns the correct character map for a
|
||||
// given script and region.
|
||||
func writeMap(d *scriptdata, m []*scriptmap, filename string) error {
|
||||
s, err := createCSource(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer s.close()
|
||||
|
||||
w := s.writer
|
||||
w.WriteString(header)
|
||||
s.include("convert.h")
|
||||
w.WriteString(
|
||||
"int GetCharmap(int script, int region) {\n" +
|
||||
"switch (script) {\n")
|
||||
for _, s := range m {
|
||||
fmt.Fprintf(w, "case %d: /* %s */\n", s.script, d.scripts.values[s.script])
|
||||
if len(s.regions) == 1 && len(s.regions[0].regions) == 0 {
|
||||
r := s.regions[0]
|
||||
fmt.Fprintf(w, "return %d; /* %s */\n", r.charmap, d.charmaps[r.charmap].name)
|
||||
} else {
|
||||
w.WriteString("switch (region) {\n")
|
||||
var hasdefault bool
|
||||
for _, r := range s.regions {
|
||||
if len(r.regions) == 0 {
|
||||
w.WriteString("default:\n")
|
||||
hasdefault = true
|
||||
} else {
|
||||
for _, rg := range r.regions {
|
||||
fmt.Fprintf(w, "case %d: /* %s */\n", rg, d.regions.values[rg])
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "return %d; /* %s */\n", r.charmap, d.charmaps[r.charmap].name)
|
||||
}
|
||||
if !hasdefault {
|
||||
w.WriteString("default:\nreturn -1;\n")
|
||||
}
|
||||
w.WriteString("}\n")
|
||||
}
|
||||
}
|
||||
w.WriteString(
|
||||
"default:\n" +
|
||||
"return -1;\n" +
|
||||
"}\n" +
|
||||
"}\n")
|
||||
|
||||
return s.flush()
|
||||
}
|
206
gen/source.go
Normal file
|
@ -0,0 +1,206 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
width = 80
|
||||
formatOff = "/* clang-format off */\n"
|
||||
formatOn = "/* clang-format on */\n"
|
||||
)
|
||||
|
||||
type csource struct {
|
||||
filename string
|
||||
file *os.File
|
||||
writer *bufio.Writer
|
||||
}
|
||||
|
||||
func createCSource(filename string) (s csource, err error) {
|
||||
if !flagQuiet {
|
||||
fmt.Fprintln(os.Stderr, "Writing:", filename)
|
||||
}
|
||||
|
||||
fp, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
w := bufio.NewWriter(fp)
|
||||
w.WriteString(header)
|
||||
return csource{
|
||||
filename: filename,
|
||||
file: fp,
|
||||
writer: w,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *csource) close() {
|
||||
if s.file != nil {
|
||||
s.file.Close()
|
||||
s.file = nil
|
||||
}
|
||||
if s.filename != "" {
|
||||
os.Remove(s.filename)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *csource) flush() error {
|
||||
if s.file == nil {
|
||||
panic("already closed")
|
||||
}
|
||||
err := s.writer.Flush()
|
||||
s.writer = nil
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = s.file.Close()
|
||||
s.file = nil
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagFormat {
|
||||
cmd := exec.Command("clang-format", "-i", s.filename)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
s.filename = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *csource) include(name string) {
|
||||
fmt.Fprintf(s.writer, "#include \"%s\"\n", path.Join(srcdirname, name))
|
||||
}
|
||||
|
||||
func (s *csource) bytes(data []byte, final bool) {
|
||||
if len(data) == 0 {
|
||||
return
|
||||
}
|
||||
line := make([]byte, 0, width+8)
|
||||
for i, x := range data {
|
||||
cur := line
|
||||
line = strconv.AppendUint(line, uint64(x), 10)
|
||||
if i < len(data)-1 || !final {
|
||||
line = append(line, ',')
|
||||
}
|
||||
if len(line) > width-4 {
|
||||
s.writer.WriteString("\n\t")
|
||||
s.writer.Write(cur)
|
||||
nline := line[len(cur):]
|
||||
copy(line, nline)
|
||||
line = line[:len(nline)]
|
||||
}
|
||||
}
|
||||
s.writer.WriteString("\n\t")
|
||||
s.writer.Write(line)
|
||||
}
|
||||
|
||||
func (s *csource) ints(data []int) {
|
||||
if len(data) == 0 {
|
||||
return
|
||||
}
|
||||
line := make([]byte, 0, width+16)
|
||||
for i, x := range data {
|
||||
cur := line
|
||||
line = strconv.AppendInt(line, int64(x), 10)
|
||||
if i < len(data)-1 {
|
||||
line = append(line, ',')
|
||||
}
|
||||
if len(line) > width-4 {
|
||||
s.writer.WriteString("\n\t")
|
||||
s.writer.Write(cur)
|
||||
nline := line[len(cur):]
|
||||
copy(line, nline)
|
||||
line = line[:len(nline)]
|
||||
}
|
||||
}
|
||||
s.writer.WriteString("\n\t")
|
||||
s.writer.Write(line)
|
||||
}
|
||||
|
||||
func (s *csource) strings(data []string) {
|
||||
for i, x := range data {
|
||||
s.writer.WriteString("\n\t\"")
|
||||
var last byte
|
||||
for _, c := range []byte(x) {
|
||||
if 32 <= c && c <= 126 {
|
||||
if c == '\\' || c == '"' {
|
||||
s.writer.WriteByte('\\')
|
||||
} else if '0' <= c && c <= '9' && last == 0 && i == 0 {
|
||||
s.writer.WriteString("00")
|
||||
}
|
||||
s.writer.WriteByte(c)
|
||||
} else {
|
||||
var e string
|
||||
switch c {
|
||||
case 0:
|
||||
e = `\0`
|
||||
case '\t':
|
||||
e = `\t`
|
||||
case '\n':
|
||||
e = `\n`
|
||||
case '\r':
|
||||
e = `\r`
|
||||
}
|
||||
if e == "" {
|
||||
fmt.Fprintf(s.writer, "\\x%02x", c)
|
||||
} else {
|
||||
s.writer.WriteString(e)
|
||||
}
|
||||
}
|
||||
last = c
|
||||
}
|
||||
if i < len(data)-1 {
|
||||
s.writer.WriteString(`\0`)
|
||||
}
|
||||
s.writer.WriteByte('"')
|
||||
}
|
||||
}
|
||||
|
||||
func intType(maxval int) string {
|
||||
if maxval <= math.MaxUint8 {
|
||||
return "UInt8"
|
||||
}
|
||||
if maxval <= math.MaxUint16 {
|
||||
return "UInt16"
|
||||
}
|
||||
return "UInt32"
|
||||
}
|
||||
|
||||
func arrayIntType(arr []int) string {
|
||||
var max int
|
||||
for _, x := range arr {
|
||||
if x > max {
|
||||
max = x
|
||||
}
|
||||
}
|
||||
return intType(max)
|
||||
}
|
||||
|
||||
type stringtable struct {
|
||||
data []string
|
||||
offset int
|
||||
offsets map[string]int
|
||||
}
|
||||
|
||||
func newStringtable() (s stringtable) {
|
||||
s.offsets = make(map[string]int)
|
||||
return
|
||||
}
|
||||
|
||||
func (t *stringtable) add(s string) int {
|
||||
if offset, exist := t.offsets[s]; exist {
|
||||
return offset
|
||||
}
|
||||
t.data = append(t.data, s)
|
||||
offset := t.offset
|
||||
t.offset += len(s) + 1
|
||||
t.offsets[s] = offset
|
||||
return offset
|
||||
}
|
12
gen/table/BUILD.bazel
Normal file
|
@ -0,0 +1,12 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "table",
|
||||
srcs = ["table.go"],
|
||||
importpath = "moria.us/macscript/table",
|
||||
visibility = ["//gen:__subpackages__"],
|
||||
deps = [
|
||||
"//gen/charmap",
|
||||
"@org_golang_x_text//unicode/norm:go_default_library",
|
||||
],
|
||||
)
|