mirror of
https://github.com/depp/syncfiles.git
synced 2024-11-24 17:31:40 +00:00
Take Mac OS text encoding constants from headers
Among other uses, these constants are used on HFS+ volumes to choose which encoding to use when converting Unicode filenames back to an 8-bit string for old Mac OS APIs. This is documented in TN 1150 under "Text Encodings". https://developer.apple.com/library/archive/technotes/tn/tn1150.html
This commit is contained in:
parent
3dcda5579e
commit
4b20b30330
41
scripts/encoding.csv
Normal file
41
scripts/encoding.csv
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
Name,Value
|
||||||
|
kTextEncodingMacRoman,0
|
||||||
|
kTextEncodingMacJapanese,1
|
||||||
|
kTextEncodingMacChineseTrad,2
|
||||||
|
kTextEncodingMacKorean,3
|
||||||
|
kTextEncodingMacArabic,4
|
||||||
|
kTextEncodingMacHebrew,5
|
||||||
|
kTextEncodingMacGreek,6
|
||||||
|
kTextEncodingMacCyrillic,7
|
||||||
|
kTextEncodingMacDevanagari,9
|
||||||
|
kTextEncodingMacGurmukhi,10
|
||||||
|
kTextEncodingMacGujarati,11
|
||||||
|
kTextEncodingMacOriya,12
|
||||||
|
kTextEncodingMacBengali,13
|
||||||
|
kTextEncodingMacTamil,14
|
||||||
|
kTextEncodingMacTelugu,15
|
||||||
|
kTextEncodingMacKannada,16
|
||||||
|
kTextEncodingMacMalayalam,17
|
||||||
|
kTextEncodingMacSinhalese,18
|
||||||
|
kTextEncodingMacBurmese,19
|
||||||
|
kTextEncodingMacKhmer,20
|
||||||
|
kTextEncodingMacThai,21
|
||||||
|
kTextEncodingMacLaotian,22
|
||||||
|
kTextEncodingMacGeorgian,23
|
||||||
|
kTextEncodingMacArmenian,24
|
||||||
|
kTextEncodingMacChineseSimp,25
|
||||||
|
kTextEncodingMacTibetan,26
|
||||||
|
kTextEncodingMacMongolian,27
|
||||||
|
kTextEncodingMacEthiopic,28
|
||||||
|
kTextEncodingMacCentralEurRoman,29
|
||||||
|
kTextEncodingMacVietnamese,30
|
||||||
|
kTextEncodingMacExtArabic,31
|
||||||
|
kTextEncodingMacSymbol,33
|
||||||
|
kTextEncodingMacDingbats,34
|
||||||
|
kTextEncodingMacTurkish,35
|
||||||
|
kTextEncodingMacCroatian,36
|
||||||
|
kTextEncodingMacIcelandic,37
|
||||||
|
kTextEncodingMacRomanian,38
|
||||||
|
kTextEncodingMacCeltic,39
|
||||||
|
kTextEncodingMacGaelic,40
|
||||||
|
kTextEncodingMacKeyboardGlyphs,41
|
|
@ -1,12 +1,21 @@
|
|||||||
"""Extract script and region constants from Script.h."""
|
"""Extract script and region constants from Script.h."""
|
||||||
import csv
|
import csv
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from typing import List, Tuple
|
from typing import Iterator, List, Tuple
|
||||||
|
|
||||||
Item = Tuple[str, int]
|
Item = Tuple[str, int]
|
||||||
|
|
||||||
|
def list_enums(filename: str) -> Iterator[Item]:
|
||||||
|
"""List enum definitions in a file."""
|
||||||
|
with open(filename, 'rb') as fp:
|
||||||
|
data = fp.read()
|
||||||
|
for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE):
|
||||||
|
name, value = item.groups()
|
||||||
|
yield name.decode('ASCII'), int(value)
|
||||||
|
|
||||||
def index_of(data: List[Item], key: str) -> int:
|
def index_of(data: List[Item], key: str) -> int:
|
||||||
for i, (name, _) in enumerate(data):
|
for i, (name, _) in enumerate(data):
|
||||||
if name == key:
|
if name == key:
|
||||||
@ -17,28 +26,51 @@ def slice(data: List[Item], first: str, last: str) -> List[Item]:
|
|||||||
return data[index_of(data, first):index_of(data, last)+1]
|
return data[index_of(data, first):index_of(data, last)+1]
|
||||||
|
|
||||||
def write_csv(fname: str, data: List[Item]) -> None:
|
def write_csv(fname: str, data: List[Item]) -> None:
|
||||||
|
print('Writing', fname, file=sys.stderr)
|
||||||
with open(fname, 'w') as fp:
|
with open(fname, 'w') as fp:
|
||||||
w = csv.writer(fp)
|
w = csv.writer(fp)
|
||||||
w.writerow(['Name', 'Value'])
|
w.writerow(['Name', 'Value'])
|
||||||
for item in data:
|
for item in data:
|
||||||
w.writerow(item)
|
w.writerow(item)
|
||||||
|
|
||||||
def main(argv: List[str]) -> None:
|
def process_script(filename: str) -> None:
|
||||||
if len(argv) != 1:
|
|
||||||
print('usage: script_gen.py <Script.h>', file=sys.stderr)
|
|
||||||
raise SystemExit(2)
|
|
||||||
with open(argv[0], 'rb') as fp:
|
|
||||||
data = fp.read()
|
|
||||||
scripts: List[Item] = []
|
scripts: List[Item] = []
|
||||||
regions: List[Item] = []
|
regions: List[Item] = []
|
||||||
for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE):
|
for name, value in list_enums(filename):
|
||||||
name, value = item.groups()
|
if name.startswith('sm'):
|
||||||
if name.startswith(b'sm'):
|
scripts.append((name, value))
|
||||||
scripts.append((name.decode('ASCII'), int(value)))
|
elif name.startswith('ver'):
|
||||||
elif name.startswith(b'ver'):
|
regions.append((name, value))
|
||||||
regions.append((name.decode('ASCII'), int(value)))
|
|
||||||
write_csv('script.csv', slice(scripts, 'smRoman', 'smUninterp'))
|
write_csv('script.csv', slice(scripts, 'smRoman', 'smUninterp'))
|
||||||
write_csv('region.csv', slice(regions, 'verUS', 'verGreenland'))
|
write_csv('region.csv', slice(regions, 'verUS', 'verGreenland'))
|
||||||
|
|
||||||
|
def process_textcommon(filename: str) -> None:
|
||||||
|
encodings: List[Item] = []
|
||||||
|
for name, value in list_enums(filename):
|
||||||
|
if name.startswith('kTextEncoding'):
|
||||||
|
encodings.append((name, value))
|
||||||
|
write_csv('encoding.csv',
|
||||||
|
slice(encodings, 'kTextEncodingMacRoman',
|
||||||
|
'kTextEncodingMacKeyboardGlyphs'))
|
||||||
|
|
||||||
|
def process(filename: str) -> None:
|
||||||
|
name = os.path.basename(filename).lower()
|
||||||
|
if name == 'script.h':
|
||||||
|
process_script(filename)
|
||||||
|
elif name == 'textcommon.h':
|
||||||
|
process_textcommon(filename)
|
||||||
|
else:
|
||||||
|
print('Error: unknown header file:', repr(filename), file=sys.stderr)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
def main(argv: List[str]) -> None:
|
||||||
|
if not argv:
|
||||||
|
sys.stderr.write(
|
||||||
|
'Usage: script_gen.py [<file.h>...]\n'
|
||||||
|
'This will read Script.h and TextCommon.h\n')
|
||||||
|
raise SystemExit(2)
|
||||||
|
for arg in argv:
|
||||||
|
process(arg)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(sys.argv[1:])
|
main(sys.argv[1:])
|
||||||
|
Loading…
Reference in New Issue
Block a user