Add flags for debugging charset conversion tables

GitOrigin-RevId: d993358c037d8edd00d5819cac852c7822a89d3f
This commit is contained in:
Dietrich Epp 2021-03-16 13:07:56 -04:00
parent da5a06e76e
commit 1315f49c1e
1 changed files with 42 additions and 0 deletions

View File

@ -1,6 +1,7 @@
package main
import (
"flag"
"fmt"
"os"
"strconv"
@ -8,6 +9,16 @@ import (
"golang.org/x/text/unicode/norm"
)
var (
flagDumpSequences bool
flagDumpTransitions bool
)
func init() {
flag.BoolVar(&flagDumpSequences, "dump-sequences", false, "dump Unicode sequences")
flag.BoolVar(&flagDumpTransitions, "dump-transitions", false, "dump state machine state transition tables")
}
var characters [256]uint16
func init() {
@ -63,6 +74,9 @@ func genStates() *state {
b := bytes[len(bytes)-1]
if st.chars[b] == 0 {
st.chars[b] = uint8(m)
if flagDumpSequences {
fmt.Fprintf(os.Stderr, "%02x: %x\n", m, bytes)
}
}
}
}
@ -104,6 +118,29 @@ func (s *state) genTable() []uint16 {
return table
}
func dumpTransitions(table []uint16) {
n := len(table) >> 8
for i := 0; i < n; i++ {
t := table[i<<8 : (i+1)<<8]
fmt.Fprintf(os.Stderr, "State $%02x\n", i)
for m, v := range t {
if v != 0 {
fmt.Fprintf(os.Stderr, " $%02x ->", m)
st := v >> 8
chr := v & 255
if st != 0 {
fmt.Fprintf(os.Stderr, " state $%02x", st)
}
if chr != 0 {
fmt.Fprintf(os.Stderr, " char $%02x", chr)
}
fmt.Fprintln(os.Stderr)
}
}
fmt.Fprintln(os.Stderr)
}
}
func tableToBytes(t []uint16) []byte {
b := make([]byte, len(t)*2)
for i, x := range t {
@ -204,8 +241,13 @@ func printData(f *os.File, ulen int, data []byte) error {
}
func main() {
flag.Parse()
root := genStates()
table := root.genTable()
if flagDumpTransitions {
dumpTransitions(table)
}
bytes := tableToBytes(table)
// printTable(table)
bits := packBits(bytes)