ToolboxToolbox/MacRomanExploration.py

82 lines
3.6 KiB
Python
Raw Permalink Normal View History

2021-06-07 08:46:42 +00:00
#!/usr/bin/env python3
import unicodedata
2021-09-19 11:34:25 +00:00
combining = set()
2021-06-07 08:46:42 +00:00
col_widths = [7, 54, 20]
rows = [['MacRom', 'UTF-8 NFC', 'UTF-8 NFD']]
for i in range(256):
rows.append(['[%02X]' % i])
for form in ('NFC', 'NFD'):
unistr = bytes([i]).decode('mac_roman')
unistr = unicodedata.normalize(form, unistr)
codepoints = []
2021-09-19 11:34:25 +00:00
if len(unistr) > 1: combining.add(unistr)
2021-06-07 08:46:42 +00:00
for cp in unistr:
utf8hex = cp.encode('utf-8').hex().upper()
name = unicodedata.name(cp, 'U+%04X' % ord(cp))
codepoints.append(f'[{utf8hex}] {name}')
rows[-1].append(' + '.join(codepoints))
for row in rows:
accum = ''
for wid, col in zip(col_widths, row):
accum += (col + ' ').ljust(wid)
accum = accum.rstrip()
print(accum)
2021-09-19 11:34:25 +00:00
thelist = {}
for pair in combining:
thelist.setdefault(pair[1], []).append(pair[0])
for combining, bases in thelist.items():
print(f'case {hex(ord(combining))}: // {unicodedata.name(combining)}')
print(' switch mac[-1] {')
for base in sorted(bases, key=ord):
better = unicodedata.normalize('NFC', base + combining).encode('mac_roman')[0]
print(f' case \'{base}\':')
print(f' mac [-1] = {hex(better)}')
print(' default:')
print(' goto fail')
print(' }')
print(' continue')
transtable = [
0x0000, 0x0100, 0x0200, 0x0300, 0x0400, 0x0500, 0x0600, 0x0700,
0x0800, 0x0900, 0x0a00, 0x0b00, 0x0c00, 0x0d00, 0x0e00, 0x0f00,
0x1000, 0x1100, 0x1200, 0x1300, 0x1400, 0x1500, 0x1600, 0x1700,
0x1800, 0x1900, 0x1a00, 0x1b00, 0x1c00, 0x1d00, 0x1e00, 0x1f00,
0x2000, 0x2100, 0x2200, 0x2300, 0x2400, 0x2500, 0x2600, 0x2700,
0x2800, 0x2900, 0x2a00, 0x2b00, 0x2c00, 0x2d00, 0x2e00, 0x2f00,
0x3000, 0x3100, 0x3200, 0x3300, 0x3400, 0x3500, 0x3600, 0x3700,
0x3800, 0x3900, 0x3a00, 0x3b00, 0x3c00, 0x3d00, 0x3e00, 0x3f00,
0x4000, 0x4100, 0x4200, 0x4300, 0x4400, 0x4500, 0x4600, 0x4700,
0x4800, 0x4900, 0x4a00, 0x4b00, 0x4c00, 0x4d00, 0x4e00, 0x4f00,
0x5000, 0x5100, 0x5200, 0x5300, 0x5400, 0x5500, 0x5600, 0x5700,
0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, 0x5e00, 0x5f00,
0x6100, 0x4180, 0x4280, 0x4380, 0x4480, 0x4580, 0x4680, 0x4780,
0x4880, 0x4980, 0x4a80, 0x4b80, 0x4c80, 0x4d80, 0x4e80, 0x4f80,
0x5080, 0x5180, 0x5280, 0x5380, 0x5480, 0x5580, 0x5680, 0x5780,
0x5880, 0x5980, 0x5a80, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00,
0x4108, 0x410c, 0x4310, 0x4502, 0x4e0a, 0x4f08, 0x5508, 0x4182,
0x4184, 0x4186, 0x4188, 0x418a, 0x418c, 0x4390, 0x4582, 0x4584,
0x4586, 0x4588, 0x4982, 0x4984, 0x4986, 0x4988, 0x4e8a, 0x4f82,
0x4f84, 0x4f86, 0x4f88, 0x4f8a, 0x5582, 0x5584, 0x5586, 0x5588,
0xa000, 0xa100, 0xa200, 0xa300, 0xa400, 0xa500, 0xa600, 0x5382,
0xa800, 0xa900, 0xaa00, 0xab00, 0xac00, 0xad00, 0x4114, 0x4f0e,
0xb000, 0xb100, 0xb200, 0xb300, 0xb400, 0xb500, 0xb600, 0xb700,
0xb800, 0xb900, 0xba00, 0x4192, 0x4f92, 0xbd00, 0x4194, 0x4f8e,
0xc000, 0xc100, 0xc200, 0xc300, 0xc400, 0xc500, 0xc600, 0x2206,
0x2208, 0xc900, 0x2000, 0x4104, 0x410a, 0x4f0a, 0x4f14, 0x4f94,
0xd000, 0xd100, 0x2202, 0x2204, 0x2702, 0x2704, 0xd600, 0xd700,
0x5988, 0xd900, 0xda00, 0xdb00, 0xdc00, 0xdd00, 0xde00, 0xdf00,
0xe000, 0xe100, 0xe200, 0xe300, 0xe400, 0xe500, 0xe600, 0xe700,
0xe800, 0xe900, 0xea00, 0xeb00, 0xec00, 0xed00, 0xee00, 0xef00,
0xf000, 0xf100, 0xf200, 0xf300, 0xf400, 0xf500, 0xf600, 0xf700,
0xf800, 0xf900, 0xfa00, 0xfb00, 0xfc00, 0xfd00, 0xfe00, 0xff00,
]
idxlist = sorted(transtable)
print(['0x%02x' % (idxlist.index(n)) for n in transtable])