From 39ebe329758f47efe7696c77db838d8882430c92 Mon Sep 17 00:00:00 2001 From: Dietrich Epp Date: Fri, 1 Apr 2022 12:17:43 -0400 Subject: [PATCH] Extract hexadecimal constants from enums --- scripts/encoding.csv | 77 ++++++++++++++++++++++++++++++++++++++++++++ scripts/extract.py | 10 +++--- 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/scripts/encoding.csv b/scripts/encoding.csv index b3a77a9..1727191 100644 --- a/scripts/encoding.csv +++ b/scripts/encoding.csv @@ -39,3 +39,80 @@ kTextEncodingMacRomanian,38 kTextEncodingMacCeltic,39 kTextEncodingMacGaelic,40 kTextEncodingMacKeyboardGlyphs,41 +kTextEncodingMacRSymbol,8 +kTextEncodingMacUninterp,32 +kTextEncodingMacUkrainian,152 +kTextEncodingUnicodeDefault,256 +kTextEncodingUnicodeV1_1,257 +kTextEncodingISO10646_1993,257 +kTextEncodingUnicodeV2_0,259 +kTextEncodingUnicodeV2_1,259 +kTextEncodingUnicodeV3_0,260 +kTextEncodingUnicodeV3_1,261 +kTextEncodingUnicodeV3_2,262 +kTextEncodingISOLatin1,513 +kTextEncodingISOLatin2,514 +kTextEncodingISOLatin3,515 +kTextEncodingISOLatin4,516 +kTextEncodingISOLatinCyrillic,517 +kTextEncodingISOLatinArabic,518 +kTextEncodingISOLatinGreek,519 +kTextEncodingISOLatinHebrew,520 +kTextEncodingISOLatin5,521 +kTextEncodingDOSLatinUS,1024 +kTextEncodingDOSGreek,1029 +kTextEncodingDOSBalticRim,1030 +kTextEncodingDOSLatin1,1040 +kTextEncodingDOSGreek1,1041 +kTextEncodingDOSLatin2,1042 +kTextEncodingDOSCyrillic,1043 +kTextEncodingDOSTurkish,1044 +kTextEncodingDOSPortuguese,1045 +kTextEncodingDOSIcelandic,1046 +kTextEncodingDOSHebrew,1047 +kTextEncodingDOSCanadianFrench,1048 +kTextEncodingDOSArabic,1049 +kTextEncodingDOSJapanese,1056 +kTextEncodingDOSChineseSimplif,1057 +kTextEncodingDOSKorean,1058 +kTextEncodingDOSChineseTrad,1059 +kTextEncodingWindowsLatin1,1280 +kTextEncodingWindowsANSI,1280 +kTextEncodingWindowsLatin2,1281 +kTextEncodingWindowsCyrillic,1282 +kTextEncodingWindowsGreek,1283 +kTextEncodingWindowsLatin5,1284 +kTextEncodingWindowsHebrew,1285 +kTextEncodingWindowsArabic,1286 +kTextEncodingWindowsBalticRim,1287 +kTextEncodingWindowsVietnamese,1288 +kTextEncodingWindowsKoreanJohab,1296 +kTextEncodingUS_ASCII,1536 +kTextEncodingJIS_X0201_76,1568 +kTextEncodingJIS_X0208_83,1569 +kTextEncodingJIS_X0208_90,1570 +kTextEncodingJIS_X0212_90,1571 +kTextEncodingJIS_C6226_78,1572 +kTextEncodingGB_2312_80,1584 +kTextEncodingGBK_95,1585 +kTextEncodingKSC_5601_87,1600 +kTextEncodingKSC_5601_92_Johab,1601 +kTextEncodingCNS_11643_92_P1,1617 +kTextEncodingCNS_11643_92_P2,1618 +kTextEncodingCNS_11643_92_P3,1619 +kTextEncodingISO_2022_JP,2080 +kTextEncodingISO_2022_JP_2,2081 +kTextEncodingISO_2022_JP_1,2082 +kTextEncodingISO_2022_CN,2096 +kTextEncodingISO_2022_CN_EXT,2097 +kTextEncodingISO_2022_KR,2112 +kTextEncodingEUC_JP,2336 +kTextEncodingEUC_CN,2352 +kTextEncodingEUC_TW,2353 +kTextEncodingEUC_KR,2368 +kTextEncodingDefaultVariant,0 +kTextEncodingDefaultFormat,0 +kTextEncodingFullName,0 +kTextEncodingBaseName,1 +kTextEncodingVariantName,2 +kTextEncodingFormatName,3 diff --git a/scripts/extract.py b/scripts/extract.py index 58f9633..efc1e0d 100644 --- a/scripts/extract.py +++ b/scripts/extract.py @@ -12,9 +12,11 @@ def list_enums(filename: str) -> Iterator[Item]: """List enum definitions in a file.""" with open(filename, 'rb') as fp: data = fp.read() - for item in re.finditer(rb'^\s*(\w+)\s*=\s*(\d+)', data, re.MULTILINE): + for item in re.finditer( + rb'^\s*(\w+)\s*=\s*((?:0x)?\d+)u?l?\b', + data, re.MULTILINE | re.IGNORECASE): name, value = item.groups() - yield name.decode('ASCII'), int(value) + yield name.decode('ASCII'), int(value, 0) def index_of(data: List[Item], key: str) -> int: for i, (name, _) in enumerate(data): @@ -49,9 +51,7 @@ def process_textcommon(filename: str) -> None: for name, value in list_enums(filename): if name.startswith('kTextEncoding'): encodings.append((name, value)) - write_csv('encoding.csv', - slice(encodings, 'kTextEncodingMacRoman', - 'kTextEncodingMacKeyboardGlyphs')) + write_csv('encoding.csv', encodings) def process(filename: str) -> None: name = os.path.basename(filename).lower()