mirror of
https://github.com/depp/syncfiles.git
synced 2024-11-24 17:31:40 +00:00
Take Mac OS text encoding constants from headers
Among other uses, these constants are used on HFS+ volumes to choose which encoding to use when converting Unicode filenames back to an 8-bit string for old Mac OS APIs. This is documented in TN 1150 under "Text Encodings". https://developer.apple.com/library/archive/technotes/tn/tn1150.html
This commit is contained in:
parent
3dcda5579e
commit
4b20b30330
41
scripts/encoding.csv
Normal file
41
scripts/encoding.csv
Normal file
@ -0,0 +1,41 @@
|
||||
Name,Value
|
||||
kTextEncodingMacRoman,0
|
||||
kTextEncodingMacJapanese,1
|
||||
kTextEncodingMacChineseTrad,2
|
||||
kTextEncodingMacKorean,3
|
||||
kTextEncodingMacArabic,4
|
||||
kTextEncodingMacHebrew,5
|
||||
kTextEncodingMacGreek,6
|
||||
kTextEncodingMacCyrillic,7
|
||||
kTextEncodingMacDevanagari,9
|
||||
kTextEncodingMacGurmukhi,10
|
||||
kTextEncodingMacGujarati,11
|
||||
kTextEncodingMacOriya,12
|
||||
kTextEncodingMacBengali,13
|
||||
kTextEncodingMacTamil,14
|
||||
kTextEncodingMacTelugu,15
|
||||
kTextEncodingMacKannada,16
|
||||
kTextEncodingMacMalayalam,17
|
||||
kTextEncodingMacSinhalese,18
|
||||
kTextEncodingMacBurmese,19
|
||||
kTextEncodingMacKhmer,20
|
||||
kTextEncodingMacThai,21
|
||||
kTextEncodingMacLaotian,22
|
||||
kTextEncodingMacGeorgian,23
|
||||
kTextEncodingMacArmenian,24
|
||||
kTextEncodingMacChineseSimp,25
|
||||
kTextEncodingMacTibetan,26
|
||||
kTextEncodingMacMongolian,27
|
||||
kTextEncodingMacEthiopic,28
|
||||
kTextEncodingMacCentralEurRoman,29
|
||||
kTextEncodingMacVietnamese,30
|
||||
kTextEncodingMacExtArabic,31
|
||||
kTextEncodingMacSymbol,33
|
||||
kTextEncodingMacDingbats,34
|
||||
kTextEncodingMacTurkish,35
|
||||
kTextEncodingMacCroatian,36
|
||||
kTextEncodingMacIcelandic,37
|
||||
kTextEncodingMacRomanian,38
|
||||
kTextEncodingMacCeltic,39
|
||||
kTextEncodingMacGaelic,40
|
||||
kTextEncodingMacKeyboardGlyphs,41
|
|
@ -1,12 +1,21 @@
|
||||
"""Extract script and region constants from Script.h."""
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from typing import List, Tuple
|
||||
from typing import Iterator, List, Tuple
|
||||
|
||||
Item = Tuple[str, int]
|
||||
|
||||
def list_enums(filename: str) -> Iterator[Item]:
|
||||
"""List enum definitions in a file."""
|
||||
with open(filename, 'rb') as fp:
|
||||
data = fp.read()
|
||||
for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE):
|
||||
name, value = item.groups()
|
||||
yield name.decode('ASCII'), int(value)
|
||||
|
||||
def index_of(data: List[Item], key: str) -> int:
|
||||
for i, (name, _) in enumerate(data):
|
||||
if name == key:
|
||||
@ -17,28 +26,51 @@ def slice(data: List[Item], first: str, last: str) -> List[Item]:
|
||||
return data[index_of(data, first):index_of(data, last)+1]
|
||||
|
||||
def write_csv(fname: str, data: List[Item]) -> None:
|
||||
print('Writing', fname, file=sys.stderr)
|
||||
with open(fname, 'w') as fp:
|
||||
w = csv.writer(fp)
|
||||
w.writerow(['Name', 'Value'])
|
||||
for item in data:
|
||||
w.writerow(item)
|
||||
|
||||
def main(argv: List[str]) -> None:
|
||||
if len(argv) != 1:
|
||||
print('usage: script_gen.py <Script.h>', file=sys.stderr)
|
||||
raise SystemExit(2)
|
||||
with open(argv[0], 'rb') as fp:
|
||||
data = fp.read()
|
||||
def process_script(filename: str) -> None:
|
||||
scripts: List[Item] = []
|
||||
regions: List[Item] = []
|
||||
for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE):
|
||||
name, value = item.groups()
|
||||
if name.startswith(b'sm'):
|
||||
scripts.append((name.decode('ASCII'), int(value)))
|
||||
elif name.startswith(b'ver'):
|
||||
regions.append((name.decode('ASCII'), int(value)))
|
||||
for name, value in list_enums(filename):
|
||||
if name.startswith('sm'):
|
||||
scripts.append((name, value))
|
||||
elif name.startswith('ver'):
|
||||
regions.append((name, value))
|
||||
write_csv('script.csv', slice(scripts, 'smRoman', 'smUninterp'))
|
||||
write_csv('region.csv', slice(regions, 'verUS', 'verGreenland'))
|
||||
|
||||
def process_textcommon(filename: str) -> None:
|
||||
encodings: List[Item] = []
|
||||
for name, value in list_enums(filename):
|
||||
if name.startswith('kTextEncoding'):
|
||||
encodings.append((name, value))
|
||||
write_csv('encoding.csv',
|
||||
slice(encodings, 'kTextEncodingMacRoman',
|
||||
'kTextEncodingMacKeyboardGlyphs'))
|
||||
|
||||
def process(filename: str) -> None:
|
||||
name = os.path.basename(filename).lower()
|
||||
if name == 'script.h':
|
||||
process_script(filename)
|
||||
elif name == 'textcommon.h':
|
||||
process_textcommon(filename)
|
||||
else:
|
||||
print('Error: unknown header file:', repr(filename), file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
def main(argv: List[str]) -> None:
|
||||
if not argv:
|
||||
sys.stderr.write(
|
||||
'Usage: script_gen.py [<file.h>...]\n'
|
||||
'This will read Script.h and TextCommon.h\n')
|
||||
raise SystemExit(2)
|
||||
for arg in argv:
|
||||
process(arg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
Loading…
Reference in New Issue
Block a user