syncfiles/gen/macroman.go

140 lines
3.6 KiB
Go

package main
import (
"bufio"
"flag"
"fmt"
"os"
"strconv"
"github.com/depp/packbits"
"golang.org/x/text/unicode/norm"
)
var (
flagDumpSequences bool
flagDumpTransitions bool
)
func init() {
flag.BoolVar(&flagDumpSequences, "dump-sequences", false, "dump Unicode sequences")
flag.BoolVar(&flagDumpTransitions, "dump-transitions", false, "dump state machine state transition tables")
}
var characters [256]rune
func init() {
hichars := [128]uint16{
0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
}
for i := 0; i < 128; i++ {
characters[i] = rune(i)
}
for i, c := range hichars {
characters[i+128] = rune(c)
}
}
var (
// lineBreaks is the set of all sequences recognized as line breaks.
lineBreaks = [][]byte{{'\n'}, {'\r'}, {'\r', '\n'}}
// normForms is the set of Unicode normalization forms recognized.
normForms = []norm.Form{norm.NFC, norm.NFD}
)
func makeConverter(cmap *[256]rune) (*node, error) {
var n node
// Special case for CR and LF.
for _, b := range lineBreaks {
if err := n.add('\r', b); err != nil {
return nil, err
}
}
for m, u := range *cmap {
if m == '\r' || m == '\n' {
continue
}
us := string(u)
for _, form := range normForms {
bytes := []byte(form.String(us))
fmt.Fprintf(os.Stderr, "%d -> %v\n", u, bytes)
if err := n.add(byte(m), bytes); err != nil {
return nil, err
}
}
}
return &n, nil
}
func printData(f *os.File, ulen int, data []byte) error {
if _, err := fmt.Fprint(f, "/* This file is automatically generated. */\n"+
"// clang-format off\n"); err != nil {
return err
}
if _, err := fmt.Fprintf(f, "#define FROM_UNIX_DATALEN %d\n", ulen); err != nil {
return err
}
if _, err := fmt.Fprintf(f, "static const unsigned char kFromUnixData[%d] = {\n", len(data)); err != nil {
return err
}
var line []byte
for _, n := range data {
sv := len(line)
line = strconv.AppendUint(line, uint64(n), 10)
line = append(line, ',')
if len(line) > 80 {
line = append(line[:sv], '\n')
if _, err := f.Write(line); err != nil {
return err
}
line = strconv.AppendUint(line[:0], uint64(n), 10)
line = append(line, ',')
}
}
line = append(line, '\n')
if _, err := f.Write(line); err != nil {
return err
}
_, err := fmt.Print("};\n")
return err
}
func mainE() error {
n, err := makeConverter(&characters)
if err != nil {
return err
}
table := n.genTable()
if flagDumpTransitions {
w := bufio.NewWriter(os.Stderr)
table.dumpTransitions(w)
w.Flush()
}
bytes := table.toBytes()
bits := packbits.Pack(bytes)
return printData(os.Stdout, len(bytes), bits)
}
func main() {
flag.Parse()
if err := mainE(); err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
os.Exit(1)
}
}