diff --git a/gen/macroman.go b/gen/macroman.go index 213396a..1f7343c 100644 --- a/gen/macroman.go +++ b/gen/macroman.go @@ -1,6 +1,7 @@ package main import ( + "bufio" "flag" "fmt" "os" @@ -20,7 +21,7 @@ func init() { flag.BoolVar(&flagDumpTransitions, "dump-transitions", false, "dump state machine state transition tables") } -var characters [256]uint16 +var characters [256]rune func init() { hichars := [128]uint16{ @@ -42,131 +43,42 @@ func init() { 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, } for i := 0; i < 128; i++ { - characters[i] = uint16(i) + characters[i] = rune(i) } for i, c := range hichars { - characters[i+128] = c + characters[i+128] = rune(c) } - characters['\n'] = '\r' } -type state struct { - chars [256]uint8 - states [256]*state -} +var ( + // lineBreaks is the set of all sequences recognized as line breaks. + lineBreaks = [][]byte{{'\n'}, {'\r'}, {'\r', '\n'}} + // normForms is the set of Unicode normalization forms recognized. + normForms = []norm.Form{norm.NFC, norm.NFD} +) -func genStates() *state { - root := new(state) - // Iterate over each Unicode normalization form. - // Omit norm.NFKC, norm.NFKD - for _, form := range []norm.Form{norm.NFC, norm.NFD} { - // Iterate over Macintosh, Unicode characters. - for m, u := range characters { - st := root - bytes := []byte(form.String(string(rune(u)))) - for _, b := range bytes[:len(bytes)-1] { - ost := st - st = st.states[b] - if st == nil { - st = new(state) - ost.states[b] = st - } - } - b := bytes[len(bytes)-1] - if st.chars[b] == 0 { - st.chars[b] = uint8(m) - if flagDumpSequences { - fmt.Fprintf(os.Stderr, "%02x: %x\n", m, bytes) - } +func makeConverter(cmap *[256]rune) (*node, error) { + var n node + // Special case for CR and LF. + for _, b := range lineBreaks { + if err := n.add('\r', b); err != nil { + return nil, err + } + } + for m, u := range *cmap { + if m == '\r' || m == '\n' { + continue + } + us := string(u) + for _, form := range normForms { + bytes := []byte(form.String(us)) + fmt.Fprintf(os.Stderr, "%d -> %v\n", u, bytes) + if err := n.add(byte(m), bytes); err != nil { + return nil, err } } } - return root -} - -func (s *state) count() int { - n := 1 - for _, s := range s.states { - if s != nil { - n += s.count() - } - } - return n -} - -func (s *state) writeTable(table []uint16, pos int) int { - data := table[pos*256 : pos*256+256 : pos*256+256] - pos++ - for i, c := range s.chars { - data[i] = uint16(c) - } - for i, c := range s.states { - if c != nil { - data[i] |= uint16(pos << 8) - pos = c.writeTable(table, pos) - } - } - return pos -} - -func (s *state) genTable() []uint16 { - n := s.count() - table := make([]uint16, 256*n) - pos := s.writeTable(table, 0) - if pos != n { - panic("bad table") - } - return table -} - -func dumpTransitions(table []uint16) { - n := len(table) >> 8 - for i := 0; i < n; i++ { - t := table[i<<8 : (i+1)<<8] - fmt.Fprintf(os.Stderr, "State $%02x\n", i) - for m, v := range t { - if v != 0 { - fmt.Fprintf(os.Stderr, " $%02x ->", m) - st := v >> 8 - chr := v & 255 - if st != 0 { - fmt.Fprintf(os.Stderr, " state $%02x", st) - } - if chr != 0 { - fmt.Fprintf(os.Stderr, " char $%02x", chr) - } - fmt.Fprintln(os.Stderr) - } - } - fmt.Fprintln(os.Stderr) - } -} - -func tableToBytes(t []uint16) []byte { - b := make([]byte, len(t)*2) - for i, x := range t { - b[i*2] = byte(x >> 8) - b[i*2+1] = byte(x) - } - return b -} - -func printTable(table []uint16) error { - if _, err := fmt.Print("static const unsigned short kFromUnixTable[] = {"); err != nil { - return err - } - for i, n := range table { - if i&15 == 0 { - if _, err := fmt.Println(); err != nil { - return err - } - } - if _, err := fmt.Printf("%d,", n); err != nil { - return err - } - } - _, err := fmt.Print("\n};\n") - return err + return &n, nil } func printData(f *os.File, ulen int, data []byte) error { @@ -202,17 +114,25 @@ func printData(f *os.File, ulen int, data []byte) error { return err } +func mainE() error { + n, err := makeConverter(&characters) + if err != nil { + return err + } + table := n.genTable() + if flagDumpTransitions { + w := bufio.NewWriter(os.Stderr) + table.dumpTransitions(w) + w.Flush() + } + bytes := table.toBytes() + bits := packbits.Pack(bytes) + return printData(os.Stdout, len(bytes), bits) +} + func main() { flag.Parse() - - root := genStates() - table := root.genTable() - if flagDumpTransitions { - dumpTransitions(table) - } - bytes := tableToBytes(table) - bits := packbits.Pack(bytes) - if err := printData(os.Stdout, len(bytes), bits); err != nil { + if err := mainE(); err != nil { fmt.Fprintln(os.Stderr, "Error:", err) os.Exit(1) } diff --git a/gen/table.go b/gen/table.go new file mode 100644 index 0000000..888db1d --- /dev/null +++ b/gen/table.go @@ -0,0 +1,136 @@ +package main + +import ( + "bufio" + "errors" + "fmt" +) + +var ( + errEmptyString = errors.New("empty input") + errZeroInput = errors.New("zero byte input") + errZeroOutput = errors.New("zero byte output") +) + +type inputConflictErr struct { + input []byte + out1 byte + out2 byte +} + +func (e *inputConflictErr) Error() string { + return fmt.Sprintf("table conflict: %d maps to both %d and %d", e.input, e.out1, e.out2) +} + +// A node is an element in a Unicode decoding graph. +type node struct { + chars [256]uint8 + children [256]*node +} + +// add adds the mapping from "in" to "out", creating additional nodes as +// necessary. +func (n *node) add(out byte, in []byte) error { + if len(in) == 0 { + return errEmptyString + } + if in[0] == 0 { + if out == 0 { + return nil + } + } + if out == 0 { + return errZeroOutput + } + for _, b := range in[:len(in)-1] { + old := n + n = n.children[b] + if n == nil { + n = new(node) + old.children[b] = n + } + } + b := in[len(in)-1] + x := n.chars[b] + if x == 0 { + n.chars[b] = out + return nil + } + if x == out { + return nil + } + return &inputConflictErr{ + input: in, + out1: x, + out2: out, + } +} + +func (n *node) size() int { + sz := 1 + for _, c := range n.children { + if c != nil { + sz += c.size() + } + } + return sz +} + +func (n *node) writeTable(table decoderTable, pos int) int { + data := table[pos*256 : pos*256+256 : pos*256+256] + pos++ + for i, c := range n.chars { + data[i] = uint16(c) + } + for i, c := range n.children { + if c != nil { + data[i] |= uint16(pos << 8) + pos = c.writeTable(table, pos) + } + } + return pos +} + +func (n *node) genTable() decoderTable { + sz := n.size() + table := make(decoderTable, 256*sz) + pos := n.writeTable(table, 0) + if pos != sz { + panic("bad table") + } + return table +} + +type decoderTable []uint16 + +func (t decoderTable) dumpTransitions(w *bufio.Writer) { + n := len(t) >> 8 + for i := 0; i < n; i++ { + t := t[i<<8 : (i+1)<<8] + fmt.Fprintf(w, "State $%02x\n", i) + for m, v := range t { + if v != 0 { + fmt.Fprintf(w, " $%02x ->", m) + st := v >> 8 + chr := v & 255 + if st != 0 { + fmt.Fprintf(w, " state $%02x", st) + } + if chr != 0 { + fmt.Fprintf(w, " char $%02x", chr) + } + w.WriteByte('\n') + } + } + w.WriteByte('\n') + } +} + +func (t decoderTable) toBytes() []byte { + b := make([]byte, len(t)*2) + for i, x := range t { + b[i*2] = byte(x >> 8) + b[i*2+1] = byte(x) + } + return b +} diff --git a/mac_from_unix_data.h b/mac_from_unix_data.h index 6ab6eb7..3d7281d 100644 --- a/mac_from_unix_data.h +++ b/mac_from_unix_data.h @@ -1,52 +1,52 @@ /* This file is automatically generated. */ // clang-format off -#define FROM_UNIX_DATALEN 27648 -static const unsigned char kFromUnixData[1256] = { -254,0,16,1,0,2,0,3,0,4,0,5,0,6,0,7,0,8,0,9,254,0,127,11,0,12,0,10,0,14,0,15,0, -16,0,17,0,18,0,19,0,20,0,21,0,22,0,23,0,24,0,25,0,26,0,27,0,28,0,29,0,30,0,31,0, -32,0,33,0,34,0,35,0,36,0,37,0,38,0,39,0,40,0,41,0,42,0,43,0,44,0,45,0,46,0,47,0, -48,0,49,0,50,0,51,0,52,0,53,0,54,0,55,0,56,0,57,0,58,0,59,0,60,1,61,0,62,0,63,0, -64,3,65,0,66,5,67,0,68,7,69,0,70,0,71,0,72,9,73,0,74,0,104,75,0,76,0,77,11,78, -13,79,0,80,0,81,0,82,0,83,0,84,15,85,0,86,0,87,0,88,17,89,0,90,0,91,0,92,0,93,0, -94,0,95,0,96,19,97,0,98,21,99,0,100,23,101,0,102,0,103,0,104,25,105,0,106,0,107, -0,108,0,109,27,110,29,111,0,112,0,113,0,114,0,115,0,116,31,117,0,118,0,119,0, -120,33,121,0,122,0,123,0,124,0,125,0,126,0,127,129,0,253,0,8,35,0,36,0,37,0,38, -0,39,248,0,0,40,252,0,2,41,0,42,220,0,0,43,232,0,0,51,129,0,129,0,129,0,200,0,0, -2,129,0,129,0,129,0,169,0,0,173,129,0,129,0,129,0,129,0,219,0,0,4,129,0,129,0, -153,0,6,203,0,231,0,229,0,204,248,0,0,128,254,0,0,129,129,0,129,0,129,0,129,0, -129,0,255,0,0,6,129,0,129,0,129,0,203,0,0,130,129,0,129,0,129,0,129,0,185,0,0,8, -129,0,129,0,153,0,4,233,0,131,0,230,246,0,0,232,129,0,129,0,129,0,129,0,129,0, -251,0,0,10,129,0,129,0,153,0,4,237,0,234,0,235,246,0,0,236,129,0,129,0,129,0, -129,0,129,0,251,0,0,12,129,0,129,0,147,0,0,132,129,0,129,0,129,0,129,0,129,0, -241,0,0,14,129,0,129,0,153,0,6,241,0,238,0,239,0,205,248,0,0,133,129,0,129,0, -129,0,129,0,129,0,251,0,0,16,129,0,129,0,153,0,4,244,0,242,0,243,246,0,0,134, -129,0,129,0,129,0,129,0,129,0,251,0,0,18,129,0,129,0,137,0,0,217,129,0,129,0, -129,0,129,0,129,0,251,0,0,20,129,0,129,0,153,0,6,136,0,135,0,137,0,139,248,0,0, -138,254,0,0,140,129,0,129,0,129,0,129,0,129,0,255,0,0,22,129,0,129,0,129,0,203, -0,0,141,129,0,129,0,129,0,129,0,185,0,0,24,129,0,129,0,153,0,4,143,0,142,0,144, -246,0,0,145,129,0,129,0,129,0,129,0,129,0,251,0,0,26,129,0,129,0,153,0,4,147,0, -146,0,148,246,0,0,149,129,0,129,0,129,0,129,0,129,0,251,0,0,28,129,0,129,0,147, -0,0,150,129,0,129,0,129,0,129,0,129,0,241,0,0,30,129,0,129,0,153,0,6,152,0,151, -0,153,0,155,248,0,0,154,129,0,129,0,129,0,129,0,129,0,251,0,0,32,129,0,129,0, -153,0,4,157,0,156,0,158,246,0,0,159,129,0,129,0,129,0,129,0,129,0,251,0,0,34, -129,0,129,0,137,0,0,216,129,0,129,0,129,0,129,0,210,0,6,202,0,193,0,162,0,163, -254,0,0,180,254,0,10,164,0,172,0,169,0,187,0,199,0,194,254,0,6,168,0,248,0,161, -0,177,252,0,8,171,0,181,0,166,0,225,0,252,254,0,2,188,0,200,250,0,1,192,0,129,0, -129,0,129,0,30,203,0,231,0,229,0,204,0,128,0,129,0,174,0,130,0,233,0,131,0,230, -0,232,0,237,0,234,0,235,0,236,254,0,10,132,0,241,0,238,0,239,0,205,0,133,254,0, -8,175,0,244,0,242,0,243,0,134,252,0,32,167,0,136,0,135,0,137,0,139,0,138,0,140, -0,190,0,141,0,143,0,142,0,144,0,145,0,147,0,146,0,148,0,149,254,0,22,150,0,152, -0,151,0,153,0,155,0,154,0,214,0,191,0,157,0,156,0,158,0,159,252,0,0,216,129,0, -129,0,129,0,158,0,0,245,129,0,129,0,129,0,192,0,2,206,0,207,184,0,0,217,129,0, -129,0,129,0,206,0,0,196,129,0,129,0,129,0,154,0,2,246,0,255,224,0,10,249,0,250, -0,251,0,254,0,247,0,253,129,0,129,0,129,0,129,0,234,0,0,189,129,0,129,0,129,0, -212,0,0,185,129,0,129,0,129,0,131,0,4,44,0,45,0,46,254,0,0,47,250,0,2,48,0,49, -230,0,0,50,129,0,129,0,129,0,137,0,2,208,0,209,250,0,4,212,0,213,0,226,254,0,4, -210,0,211,0,227,254,0,4,160,0,224,0,165,250,0,0,201,238,0,0,228,240,0,2,220,0, -221,129,0,129,0,129,0,238,0,0,218,129,0,129,0,129,0,129,0,178,0,0,219,129,0,129, -0,129,0,150,0,0,170,129,0,129,0,129,0,194,0,0,182,250,0,0,198,240,0,0,184,254,0, -0,183,240,0,0,195,250,0,0,176,232,0,0,186,129,0,129,0,129,0,200,0,0,197,210,0,0, -173,250,0,2,178,0,179,129,0,129,0,129,0,184,0,0,215,129,0,129,0,129,0,129,0,209, -0,0,52,240,0,0,53,129,0,129,0,129,0,129,0,219,0,0,240,129,0,129,0,129,0,254,0,2, -222,0,223,129,0,135,0, +#define FROM_UNIX_DATALEN 28160 +static const unsigned char kFromUnixData[1268] = { +254,0,127,1,0,2,0,3,0,4,0,5,0,6,0,7,0,8,0,9,0,13,0,11,0,12,1,13,0,14,0,15,0,16, +0,17,0,18,0,19,0,20,0,21,0,22,0,23,0,24,0,25,0,26,0,27,0,28,0,29,0,30,0,31,0,32, +0,33,0,34,0,35,0,36,0,37,0,38,0,39,0,40,0,41,0,42,0,43,0,44,0,45,0,46,0,47,0,48, +0,49,0,50,0,51,0,52,0,53,0,54,0,55,0,56,0,57,0,58,0,59,0,60,2,61,0,62,0,63,0,64, +4,124,65,0,66,6,67,0,68,8,69,0,70,0,71,0,72,10,73,0,74,0,75,0,76,0,77,12,78,14, +79,0,80,0,81,0,82,0,83,0,84,16,85,0,86,0,87,0,88,18,89,0,90,0,91,0,92,0,93,0,94, +0,95,0,96,20,97,0,98,22,99,0,100,24,101,0,102,0,103,0,104,26,105,0,106,0,107,0, +108,0,109,28,110,30,111,0,112,0,113,0,114,0,115,0,116,32,117,0,118,0,119,0,120, +34,121,0,122,0,123,0,124,0,125,0,126,0,127,129,0,253,0,8,36,0,37,0,38,0,39,0,40, +248,0,0,41,252,0,2,42,0,43,220,0,0,44,232,0,0,52,203,0,0,13,129,0,129,0,129,0, +129,0,129,0,129,0,129,0,255,0,0,3,129,0,129,0,129,0,169,0,0,173,129,0,129,0,129, +0,129,0,219,0,0,5,129,0,129,0,153,0,6,203,0,231,0,229,0,204,248,0,0,128,254,0,0, +129,129,0,129,0,129,0,129,0,129,0,255,0,0,7,129,0,129,0,129,0,203,0,0,130,129,0, +129,0,129,0,129,0,185,0,0,9,129,0,129,0,153,0,4,233,0,131,0,230,246,0,0,232,129, +0,129,0,129,0,129,0,129,0,251,0,0,11,129,0,129,0,153,0,4,237,0,234,0,235,246,0, +0,236,129,0,129,0,129,0,129,0,129,0,251,0,0,13,129,0,129,0,147,0,0,132,129,0, +129,0,129,0,129,0,129,0,241,0,0,15,129,0,129,0,153,0,6,241,0,238,0,239,0,205, +248,0,0,133,129,0,129,0,129,0,129,0,129,0,251,0,0,17,129,0,129,0,153,0,4,244,0, +242,0,243,246,0,0,134,129,0,129,0,129,0,129,0,129,0,251,0,0,19,129,0,129,0,137, +0,0,217,129,0,129,0,129,0,129,0,129,0,251,0,0,21,129,0,129,0,153,0,6,136,0,135, +0,137,0,139,248,0,0,138,254,0,0,140,129,0,129,0,129,0,129,0,129,0,255,0,0,23, +129,0,129,0,129,0,203,0,0,141,129,0,129,0,129,0,129,0,185,0,0,25,129,0,129,0, +153,0,4,143,0,142,0,144,246,0,0,145,129,0,129,0,129,0,129,0,129,0,251,0,0,27, +129,0,129,0,153,0,4,147,0,146,0,148,246,0,0,149,129,0,129,0,129,0,129,0,129,0, +251,0,0,29,129,0,129,0,147,0,0,150,129,0,129,0,129,0,129,0,129,0,241,0,0,31,129, +0,129,0,153,0,6,152,0,151,0,153,0,155,248,0,0,154,129,0,129,0,129,0,129,0,129,0, +251,0,0,33,129,0,129,0,153,0,4,157,0,156,0,158,246,0,0,159,129,0,129,0,129,0, +129,0,129,0,251,0,0,35,129,0,129,0,137,0,0,216,129,0,129,0,129,0,129,0,210,0,6, +202,0,193,0,162,0,163,254,0,0,180,254,0,10,164,0,172,0,169,0,187,0,199,0,194, +254,0,6,168,0,248,0,161,0,177,252,0,8,171,0,181,0,166,0,225,0,252,254,0,2,188,0, +200,250,0,1,192,0,129,0,129,0,129,0,30,203,0,231,0,229,0,204,0,128,0,129,0,174, +0,130,0,233,0,131,0,230,0,232,0,237,0,234,0,235,0,236,254,0,10,132,0,241,0,238, +0,239,0,205,0,133,254,0,8,175,0,244,0,242,0,243,0,134,252,0,32,167,0,136,0,135, +0,137,0,139,0,138,0,140,0,190,0,141,0,143,0,142,0,144,0,145,0,147,0,146,0,148,0, +149,254,0,22,150,0,152,0,151,0,153,0,155,0,154,0,214,0,191,0,157,0,156,0,158,0, +159,252,0,0,216,129,0,129,0,129,0,158,0,0,245,129,0,129,0,129,0,192,0,2,206,0, +207,184,0,0,217,129,0,129,0,129,0,206,0,0,196,129,0,129,0,129,0,154,0,2,246,0, +255,224,0,10,249,0,250,0,251,0,254,0,247,0,253,129,0,129,0,129,0,129,0,234,0,0, +189,129,0,129,0,129,0,212,0,0,185,129,0,129,0,129,0,131,0,4,45,0,46,0,47,254,0, +0,48,250,0,2,49,0,50,230,0,0,51,129,0,129,0,129,0,137,0,2,208,0,209,250,0,4,212, +0,213,0,226,254,0,4,210,0,211,0,227,254,0,4,160,0,224,0,165,250,0,0,201,238,0,0, +228,240,0,2,220,0,221,129,0,129,0,129,0,238,0,0,218,129,0,129,0,129,0,129,0,178, +0,0,219,129,0,129,0,129,0,150,0,0,170,129,0,129,0,129,0,194,0,0,182,250,0,0,198, +240,0,0,184,254,0,0,183,240,0,0,195,250,0,0,176,232,0,0,186,129,0,129,0,129,0, +200,0,0,197,210,0,0,173,250,0,2,178,0,179,129,0,129,0,129,0,184,0,0,215,129,0, +129,0,129,0,129,0,209,0,0,53,240,0,0,54,129,0,129,0,129,0,129,0,219,0,0,240,129, +0,129,0,129,0,254,0,2,222,0,223,129,0,135,0, };