Add generator for UTF-8 to Macintosh tables

GitOrigin-RevId: 96f2909330b94c895688bfa2f5b50c3e4408fe54
This commit is contained in:
Dietrich Epp 2021-03-11 01:47:59 -05:00
parent fbe60364fe
commit 9945fc0383
3 changed files with 224 additions and 0 deletions

5
gen/go.mod Normal file
View File

@ -0,0 +1,5 @@
module moria.us/macroman
go 1.16
require golang.org/x/text v0.3.5

3
gen/go.sum Normal file
View File

@ -0,0 +1,3 @@
golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

216
gen/macroman.go Normal file
View File

@ -0,0 +1,216 @@
package main
import (
"fmt"
"os"
"strconv"
"golang.org/x/text/unicode/norm"
)
var characters [256]uint16
func init() {
hichars := [128]uint16{
0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
}
for i := 0; i < 128; i++ {
characters[i] = uint16(i)
}
for i, c := range hichars {
characters[i+128] = c
}
characters['\n'] = '\r'
}
type state struct {
chars [256]uint8
states [256]*state
}
func genStates() *state {
root := new(state)
// Iterate over each Unicode normalization form.
// Omit norm.NFKC, norm.NFKD
for _, form := range []norm.Form{norm.NFC, norm.NFD} {
// Iterate over Macintosh, Unicode characters.
for m, u := range characters {
st := root
bytes := []byte(form.String(string(rune(u))))
for _, b := range bytes[:len(bytes)-1] {
ost := st
st = st.states[b]
if st == nil {
st = new(state)
ost.states[b] = st
}
}
b := bytes[len(bytes)-1]
if st.chars[b] == 0 {
st.chars[b] = uint8(m)
}
}
}
return root
}
func (s *state) count() int {
n := 1
for _, s := range s.states {
if s != nil {
n += s.count()
}
}
return n
}
func (s *state) writeTable(table []uint16, pos int) int {
data := table[pos*256 : pos*256+256 : pos*256+256]
pos++
for i, c := range s.chars {
data[i] = uint16(c)
}
for i, c := range s.states {
if c != nil {
data[i] |= uint16(pos << 8)
pos = c.writeTable(table, pos)
}
}
return pos
}
func (s *state) genTable() []uint16 {
n := s.count()
table := make([]uint16, 256*n)
pos := s.writeTable(table, 0)
if pos != n {
panic("bad table")
}
return table
}
func tableToBytes(t []uint16) []byte {
b := make([]byte, len(t)*2)
for i, x := range t {
b[i*2] = byte(x >> 8)
b[i*2+1] = byte(x)
}
return b
}
func getRun(bytes []byte) (repeat bool, run []byte) {
if len(bytes) == 0 {
return
}
ref := bytes[0]
n := 1
for n < len(bytes) && bytes[n] == ref {
n++
}
if n >= 2 {
return true, bytes[:n]
}
for i, b := range bytes[1:] {
if b == ref {
return false, bytes[:i]
}
ref = b
}
return false, bytes
}
func packBits(bytes []byte) []byte {
var result []byte
for len(bytes) > 0 {
repeat, run := getRun(bytes)
if len(run) > 128 {
run = run[:128]
}
if repeat {
result = append(result, byte(1-len(run)), run[0])
} else {
result = append(result, byte(len(run)-1))
result = append(result, run...)
}
bytes = bytes[len(run):]
}
return result
}
func printTable(table []uint16) error {
if _, err := fmt.Print("static const unsigned short kFromUnixTable[] = {"); err != nil {
return err
}
for i, n := range table {
if i&15 == 0 {
if _, err := fmt.Println(); err != nil {
return err
}
}
if _, err := fmt.Printf("%d,", n); err != nil {
return err
}
}
_, err := fmt.Print("\n};\n")
return err
}
func printData(f *os.File, ulen int, data []byte) error {
if _, err := fmt.Fprint(f, "/* This file is automatically generated. */\n"+
"// clang-format off\n"); err != nil {
return err
}
if _, err := fmt.Fprintf(f, "#define FROM_UNIX_DATALEN %d\n", ulen); err != nil {
return err
}
if _, err := fmt.Fprintf(f, "static const unsigned char kFromUnixData[%d] = {\n", len(data)); err != nil {
return err
}
var line []byte
for _, n := range data {
sv := len(line)
line = strconv.AppendUint(line, uint64(n), 10)
line = append(line, ',')
if len(line) > 80 {
line = append(line[:sv], '\n')
if _, err := f.Write(line); err != nil {
return err
}
line = strconv.AppendUint(line[:0], uint64(n), 10)
line = append(line, ',')
}
}
line = append(line, '\n')
if _, err := f.Write(line); err != nil {
return err
}
_, err := fmt.Print("};\n")
return err
}
func main() {
root := genStates()
table := root.genTable()
bytes := tableToBytes(table)
// printTable(table)
bits := packBits(bytes)
if err := printData(os.Stdout, len(bytes), bits); err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
os.Exit(1)
}
}