diff --git a/scripts/encoding.csv b/scripts/encoding.csv new file mode 100644 index 0000000..b3a77a9 --- /dev/null +++ b/scripts/encoding.csv @@ -0,0 +1,41 @@ +Name,Value +kTextEncodingMacRoman,0 +kTextEncodingMacJapanese,1 +kTextEncodingMacChineseTrad,2 +kTextEncodingMacKorean,3 +kTextEncodingMacArabic,4 +kTextEncodingMacHebrew,5 +kTextEncodingMacGreek,6 +kTextEncodingMacCyrillic,7 +kTextEncodingMacDevanagari,9 +kTextEncodingMacGurmukhi,10 +kTextEncodingMacGujarati,11 +kTextEncodingMacOriya,12 +kTextEncodingMacBengali,13 +kTextEncodingMacTamil,14 +kTextEncodingMacTelugu,15 +kTextEncodingMacKannada,16 +kTextEncodingMacMalayalam,17 +kTextEncodingMacSinhalese,18 +kTextEncodingMacBurmese,19 +kTextEncodingMacKhmer,20 +kTextEncodingMacThai,21 +kTextEncodingMacLaotian,22 +kTextEncodingMacGeorgian,23 +kTextEncodingMacArmenian,24 +kTextEncodingMacChineseSimp,25 +kTextEncodingMacTibetan,26 +kTextEncodingMacMongolian,27 +kTextEncodingMacEthiopic,28 +kTextEncodingMacCentralEurRoman,29 +kTextEncodingMacVietnamese,30 +kTextEncodingMacExtArabic,31 +kTextEncodingMacSymbol,33 +kTextEncodingMacDingbats,34 +kTextEncodingMacTurkish,35 +kTextEncodingMacCroatian,36 +kTextEncodingMacIcelandic,37 +kTextEncodingMacRomanian,38 +kTextEncodingMacCeltic,39 +kTextEncodingMacGaelic,40 +kTextEncodingMacKeyboardGlyphs,41 diff --git a/scripts/extract.py b/scripts/extract.py index 7ba8560..58f9633 100644 --- a/scripts/extract.py +++ b/scripts/extract.py @@ -1,12 +1,21 @@ """Extract script and region constants from Script.h.""" import csv +import os import re import sys -from typing import List, Tuple +from typing import Iterator, List, Tuple Item = Tuple[str, int] +def list_enums(filename: str) -> Iterator[Item]: + """List enum definitions in a file.""" + with open(filename, 'rb') as fp: + data = fp.read() + for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE): + name, value = item.groups() + yield name.decode('ASCII'), int(value) + def index_of(data: List[Item], key: str) -> int: for i, (name, _) in enumerate(data): if name == key: @@ -17,28 +26,51 @@ def slice(data: List[Item], first: str, last: str) -> List[Item]: return data[index_of(data, first):index_of(data, last)+1] def write_csv(fname: str, data: List[Item]) -> None: + print('Writing', fname, file=sys.stderr) with open(fname, 'w') as fp: w = csv.writer(fp) w.writerow(['Name', 'Value']) for item in data: w.writerow(item) -def main(argv: List[str]) -> None: - if len(argv) != 1: - print('usage: script_gen.py ', file=sys.stderr) - raise SystemExit(2) - with open(argv[0], 'rb') as fp: - data = fp.read() +def process_script(filename: str) -> None: scripts: List[Item] = [] regions: List[Item] = [] - for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE): - name, value = item.groups() - if name.startswith(b'sm'): - scripts.append((name.decode('ASCII'), int(value))) - elif name.startswith(b'ver'): - regions.append((name.decode('ASCII'), int(value))) + for name, value in list_enums(filename): + if name.startswith('sm'): + scripts.append((name, value)) + elif name.startswith('ver'): + regions.append((name, value)) write_csv('script.csv', slice(scripts, 'smRoman', 'smUninterp')) write_csv('region.csv', slice(regions, 'verUS', 'verGreenland')) +def process_textcommon(filename: str) -> None: + encodings: List[Item] = [] + for name, value in list_enums(filename): + if name.startswith('kTextEncoding'): + encodings.append((name, value)) + write_csv('encoding.csv', + slice(encodings, 'kTextEncodingMacRoman', + 'kTextEncodingMacKeyboardGlyphs')) + +def process(filename: str) -> None: + name = os.path.basename(filename).lower() + if name == 'script.h': + process_script(filename) + elif name == 'textcommon.h': + process_textcommon(filename) + else: + print('Error: unknown header file:', repr(filename), file=sys.stderr) + raise SystemExit(1) + +def main(argv: List[str]) -> None: + if not argv: + sys.stderr.write( + 'Usage: script_gen.py [...]\n' + 'This will read Script.h and TextCommon.h\n') + raise SystemExit(2) + for arg in argv: + process(arg) + if __name__ == '__main__': main(sys.argv[1:])