diff --git a/gen/go.mod b/gen/go.mod new file mode 100644 index 0000000..e3c75ad --- /dev/null +++ b/gen/go.mod @@ -0,0 +1,5 @@ +module moria.us/macroman + +go 1.16 + +require golang.org/x/text v0.3.5 diff --git a/gen/go.sum b/gen/go.sum new file mode 100644 index 0000000..bbd33e8 --- /dev/null +++ b/gen/go.sum @@ -0,0 +1,3 @@ +golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/gen/macroman.go b/gen/macroman.go new file mode 100644 index 0000000..dd8b29a --- /dev/null +++ b/gen/macroman.go @@ -0,0 +1,216 @@ +package main + +import ( + "fmt" + "os" + "strconv" + + "golang.org/x/text/unicode/norm" +) + +var characters [256]uint16 + +func init() { + hichars := [128]uint16{ + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, + 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, + 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, + 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, + 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, + 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, + 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, + 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, + 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, + 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, + 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, + 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, + 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, + 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, + 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, + } + for i := 0; i < 128; i++ { + characters[i] = uint16(i) + } + for i, c := range hichars { + characters[i+128] = c + } + characters['\n'] = '\r' +} + +type state struct { + chars [256]uint8 + states [256]*state +} + +func genStates() *state { + root := new(state) + // Iterate over each Unicode normalization form. + // Omit norm.NFKC, norm.NFKD + for _, form := range []norm.Form{norm.NFC, norm.NFD} { + // Iterate over Macintosh, Unicode characters. + for m, u := range characters { + st := root + bytes := []byte(form.String(string(rune(u)))) + for _, b := range bytes[:len(bytes)-1] { + ost := st + st = st.states[b] + if st == nil { + st = new(state) + ost.states[b] = st + } + } + b := bytes[len(bytes)-1] + if st.chars[b] == 0 { + st.chars[b] = uint8(m) + } + } + } + return root +} + +func (s *state) count() int { + n := 1 + for _, s := range s.states { + if s != nil { + n += s.count() + } + } + return n +} + +func (s *state) writeTable(table []uint16, pos int) int { + data := table[pos*256 : pos*256+256 : pos*256+256] + pos++ + for i, c := range s.chars { + data[i] = uint16(c) + } + for i, c := range s.states { + if c != nil { + data[i] |= uint16(pos << 8) + pos = c.writeTable(table, pos) + } + } + return pos +} + +func (s *state) genTable() []uint16 { + n := s.count() + table := make([]uint16, 256*n) + pos := s.writeTable(table, 0) + if pos != n { + panic("bad table") + } + return table +} + +func tableToBytes(t []uint16) []byte { + b := make([]byte, len(t)*2) + for i, x := range t { + b[i*2] = byte(x >> 8) + b[i*2+1] = byte(x) + } + return b +} + +func getRun(bytes []byte) (repeat bool, run []byte) { + if len(bytes) == 0 { + return + } + ref := bytes[0] + n := 1 + for n < len(bytes) && bytes[n] == ref { + n++ + } + if n >= 2 { + return true, bytes[:n] + } + for i, b := range bytes[1:] { + if b == ref { + return false, bytes[:i] + } + ref = b + } + return false, bytes +} + +func packBits(bytes []byte) []byte { + var result []byte + for len(bytes) > 0 { + repeat, run := getRun(bytes) + if len(run) > 128 { + run = run[:128] + } + if repeat { + result = append(result, byte(1-len(run)), run[0]) + } else { + result = append(result, byte(len(run)-1)) + result = append(result, run...) + } + bytes = bytes[len(run):] + } + return result +} + +func printTable(table []uint16) error { + if _, err := fmt.Print("static const unsigned short kFromUnixTable[] = {"); err != nil { + return err + } + for i, n := range table { + if i&15 == 0 { + if _, err := fmt.Println(); err != nil { + return err + } + } + if _, err := fmt.Printf("%d,", n); err != nil { + return err + } + } + _, err := fmt.Print("\n};\n") + return err +} + +func printData(f *os.File, ulen int, data []byte) error { + if _, err := fmt.Fprint(f, "/* This file is automatically generated. */\n"+ + "// clang-format off\n"); err != nil { + return err + } + if _, err := fmt.Fprintf(f, "#define FROM_UNIX_DATALEN %d\n", ulen); err != nil { + return err + } + if _, err := fmt.Fprintf(f, "static const unsigned char kFromUnixData[%d] = {\n", len(data)); err != nil { + return err + } + var line []byte + for _, n := range data { + sv := len(line) + line = strconv.AppendUint(line, uint64(n), 10) + line = append(line, ',') + if len(line) > 80 { + line = append(line[:sv], '\n') + if _, err := f.Write(line); err != nil { + return err + } + line = strconv.AppendUint(line[:0], uint64(n), 10) + line = append(line, ',') + } + } + line = append(line, '\n') + if _, err := f.Write(line); err != nil { + return err + } + _, err := fmt.Print("};\n") + return err +} + +func main() { + root := genStates() + table := root.genTable() + bytes := tableToBytes(table) + // printTable(table) + bits := packBits(bytes) + if err := printData(os.Stdout, len(bytes), bits); err != nil { + fmt.Fprintln(os.Stderr, "Error:", err) + os.Exit(1) + } +}