mirror of
https://github.com/autc04/Retro68.git
synced 2025-03-03 02:30:58 +00:00
315 lines
7.5 KiB
Go
315 lines
7.5 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ignore
|
|
|
|
// Trie table generator.
|
|
// Used by make*tables tools to generate a go file with trie data structures
|
|
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
|
|
// sequence are used to lookup offsets in the index table to be used for the
|
|
// next byte. The last byte is used to index into a table with 16-bit values.
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"hash/crc32"
|
|
"log"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
const blockSize = 64
|
|
const maxSparseEntries = 16
|
|
|
|
// Intermediate trie structure
|
|
type trieNode struct {
|
|
table [256]*trieNode
|
|
value int
|
|
b byte
|
|
leaf bool
|
|
}
|
|
|
|
func newNode() *trieNode {
|
|
return new(trieNode)
|
|
}
|
|
|
|
func (n trieNode) String() string {
|
|
s := fmt.Sprint("trieNode{table: { non-nil at index: ")
|
|
for i, v := range n.table {
|
|
if v != nil {
|
|
s += fmt.Sprintf("%d, ", i)
|
|
}
|
|
}
|
|
s += fmt.Sprintf("}, value:%#x, b:%#x leaf:%v}", n.value, n.b, n.leaf)
|
|
return s
|
|
}
|
|
|
|
func (n trieNode) isInternal() bool {
|
|
internal := true
|
|
for i := 0; i < 256; i++ {
|
|
if nn := n.table[i]; nn != nil {
|
|
if !internal && !nn.leaf {
|
|
log.Fatalf("triegen: isInternal: node contains both leaf and non-leaf children (%v)", n)
|
|
}
|
|
internal = internal && !nn.leaf
|
|
}
|
|
}
|
|
return internal
|
|
}
|
|
|
|
func (n trieNode) mostFrequentStride() int {
|
|
counts := make(map[int]int)
|
|
v := 0
|
|
for _, t := range n.table[0x80 : 0x80+blockSize] {
|
|
if t != nil {
|
|
if stride := t.value - v; v != 0 && stride >= 0 {
|
|
counts[stride]++
|
|
}
|
|
v = t.value
|
|
} else {
|
|
v = 0
|
|
}
|
|
}
|
|
var maxs, maxc int
|
|
for stride, cnt := range counts {
|
|
if cnt > maxc || (cnt == maxc && stride < maxs) {
|
|
maxs, maxc = stride, cnt
|
|
}
|
|
}
|
|
return maxs
|
|
}
|
|
|
|
func (n trieNode) countSparseEntries() int {
|
|
stride := n.mostFrequentStride()
|
|
var count, v int
|
|
for _, t := range n.table[0x80 : 0x80+blockSize] {
|
|
tv := 0
|
|
if t != nil {
|
|
tv = t.value
|
|
}
|
|
if tv-v != stride {
|
|
if tv != 0 {
|
|
count++
|
|
}
|
|
}
|
|
v = tv
|
|
}
|
|
return count
|
|
}
|
|
|
|
func (n *trieNode) insert(r rune, value uint16) {
|
|
var p [utf8.UTFMax]byte
|
|
sz := utf8.EncodeRune(p[:], r)
|
|
|
|
for i := 0; i < sz; i++ {
|
|
if n.leaf {
|
|
log.Fatalf("triegen: insert: node (%#v) should not be a leaf", n)
|
|
}
|
|
nn := n.table[p[i]]
|
|
if nn == nil {
|
|
nn = newNode()
|
|
nn.b = p[i]
|
|
n.table[p[i]] = nn
|
|
}
|
|
n = nn
|
|
}
|
|
n.value = int(value)
|
|
n.leaf = true
|
|
}
|
|
|
|
type nodeIndex struct {
|
|
lookupBlocks []*trieNode
|
|
valueBlocks []*trieNode
|
|
sparseBlocks []*trieNode
|
|
sparseOffset []uint16
|
|
sparseCount int
|
|
|
|
lookupBlockIdx map[uint32]int
|
|
valueBlockIdx map[uint32]int
|
|
}
|
|
|
|
func newIndex() *nodeIndex {
|
|
index := &nodeIndex{}
|
|
index.lookupBlocks = make([]*trieNode, 0)
|
|
index.valueBlocks = make([]*trieNode, 0)
|
|
index.sparseBlocks = make([]*trieNode, 0)
|
|
index.sparseOffset = make([]uint16, 1)
|
|
index.lookupBlockIdx = make(map[uint32]int)
|
|
index.valueBlockIdx = make(map[uint32]int)
|
|
return index
|
|
}
|
|
|
|
func computeOffsets(index *nodeIndex, n *trieNode) int {
|
|
if n.leaf {
|
|
return n.value
|
|
}
|
|
hasher := crc32.New(crc32.MakeTable(crc32.IEEE))
|
|
// We only index continuation bytes.
|
|
for i := 0; i < blockSize; i++ {
|
|
v := 0
|
|
if nn := n.table[0x80+i]; nn != nil {
|
|
v = computeOffsets(index, nn)
|
|
}
|
|
hasher.Write([]byte{uint8(v >> 8), uint8(v)})
|
|
}
|
|
h := hasher.Sum32()
|
|
if n.isInternal() {
|
|
v, ok := index.lookupBlockIdx[h]
|
|
if !ok {
|
|
v = len(index.lookupBlocks)
|
|
index.lookupBlocks = append(index.lookupBlocks, n)
|
|
index.lookupBlockIdx[h] = v
|
|
}
|
|
n.value = v
|
|
} else {
|
|
v, ok := index.valueBlockIdx[h]
|
|
if !ok {
|
|
if c := n.countSparseEntries(); c > maxSparseEntries {
|
|
v = len(index.valueBlocks)
|
|
index.valueBlocks = append(index.valueBlocks, n)
|
|
index.valueBlockIdx[h] = v
|
|
} else {
|
|
v = -len(index.sparseOffset)
|
|
index.sparseBlocks = append(index.sparseBlocks, n)
|
|
index.sparseOffset = append(index.sparseOffset, uint16(index.sparseCount))
|
|
index.sparseCount += c + 1
|
|
index.valueBlockIdx[h] = v
|
|
}
|
|
}
|
|
n.value = v
|
|
}
|
|
return n.value
|
|
}
|
|
|
|
func printValueBlock(nr int, n *trieNode, offset int) {
|
|
boff := nr * blockSize
|
|
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
|
|
var printnewline bool
|
|
for i := 0; i < blockSize; i++ {
|
|
if i%6 == 0 {
|
|
printnewline = true
|
|
}
|
|
v := 0
|
|
if nn := n.table[i+offset]; nn != nil {
|
|
v = nn.value
|
|
}
|
|
if v != 0 {
|
|
if printnewline {
|
|
fmt.Printf("\n")
|
|
printnewline = false
|
|
}
|
|
fmt.Printf("%#04x:%#04x, ", boff+i, v)
|
|
}
|
|
}
|
|
}
|
|
|
|
func printSparseBlock(nr int, n *trieNode) {
|
|
boff := -n.value
|
|
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
|
|
v := 0
|
|
//stride := f(n)
|
|
stride := n.mostFrequentStride()
|
|
c := n.countSparseEntries()
|
|
fmt.Printf("\n{value:%#04x,lo:%#02x},", stride, uint8(c))
|
|
for i, nn := range n.table[0x80 : 0x80+blockSize] {
|
|
nv := 0
|
|
if nn != nil {
|
|
nv = nn.value
|
|
}
|
|
if nv-v != stride {
|
|
if v != 0 {
|
|
fmt.Printf(",hi:%#02x},", 0x80+i-1)
|
|
}
|
|
if nv != 0 {
|
|
fmt.Printf("\n{value:%#04x,lo:%#02x", nv, nn.b)
|
|
}
|
|
}
|
|
v = nv
|
|
}
|
|
if v != 0 {
|
|
fmt.Printf(",hi:%#02x},", 0x80+blockSize-1)
|
|
}
|
|
}
|
|
|
|
func printLookupBlock(nr int, n *trieNode, offset, cutoff int) {
|
|
boff := nr * blockSize
|
|
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
|
|
var printnewline bool
|
|
for i := 0; i < blockSize; i++ {
|
|
if i%8 == 0 {
|
|
printnewline = true
|
|
}
|
|
v := 0
|
|
if nn := n.table[i+offset]; nn != nil {
|
|
v = nn.value
|
|
}
|
|
if v != 0 {
|
|
if v < 0 {
|
|
v = -v - 1 + cutoff
|
|
}
|
|
if printnewline {
|
|
fmt.Printf("\n")
|
|
printnewline = false
|
|
}
|
|
fmt.Printf("%#03x:%#02x, ", boff+i, v)
|
|
}
|
|
}
|
|
}
|
|
|
|
// printTables returns the size in bytes of the generated tables.
|
|
func (t *trieNode) printTables(name string) int {
|
|
index := newIndex()
|
|
// Values for 7-bit ASCII are stored in first two block, followed by nil block.
|
|
index.valueBlocks = append(index.valueBlocks, nil, nil, nil)
|
|
// First byte of multi-byte UTF-8 codepoints are indexed in 4th block.
|
|
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil, nil)
|
|
// Index starter bytes of multi-byte UTF-8.
|
|
for i := 0xC0; i < 0x100; i++ {
|
|
if t.table[i] != nil {
|
|
computeOffsets(index, t.table[i])
|
|
}
|
|
}
|
|
|
|
nv := len(index.valueBlocks) * blockSize
|
|
fmt.Printf("// %sValues: %d entries, %d bytes\n", name, nv, nv*2)
|
|
fmt.Printf("// Block 2 is the null block.\n")
|
|
fmt.Printf("var %sValues = [%d]uint16 {", name, nv)
|
|
printValueBlock(0, t, 0)
|
|
printValueBlock(1, t, 64)
|
|
printValueBlock(2, newNode(), 0)
|
|
for i := 3; i < len(index.valueBlocks); i++ {
|
|
printValueBlock(i, index.valueBlocks[i], 0x80)
|
|
}
|
|
fmt.Print("\n}\n\n")
|
|
|
|
ls := len(index.sparseBlocks)
|
|
fmt.Printf("// %sSparseOffset: %d entries, %d bytes\n", name, ls, ls*2)
|
|
fmt.Printf("var %sSparseOffset = %#v\n\n", name, index.sparseOffset[1:])
|
|
|
|
ns := index.sparseCount
|
|
fmt.Printf("// %sSparseValues: %d entries, %d bytes\n", name, ns, ns*4)
|
|
fmt.Printf("var %sSparseValues = [%d]valueRange {", name, ns)
|
|
for i, n := range index.sparseBlocks {
|
|
printSparseBlock(i, n)
|
|
}
|
|
fmt.Print("\n}\n\n")
|
|
|
|
cutoff := len(index.valueBlocks)
|
|
ni := len(index.lookupBlocks) * blockSize
|
|
fmt.Printf("// %sLookup: %d bytes\n", name, ni)
|
|
fmt.Printf("// Block 0 is the null block.\n")
|
|
fmt.Printf("var %sLookup = [%d]uint8 {", name, ni)
|
|
printLookupBlock(0, newNode(), 0, cutoff)
|
|
printLookupBlock(1, newNode(), 0, cutoff)
|
|
printLookupBlock(2, newNode(), 0, cutoff)
|
|
printLookupBlock(3, t, 0xC0, cutoff)
|
|
for i := 4; i < len(index.lookupBlocks); i++ {
|
|
printLookupBlock(i, index.lookupBlocks[i], 0x80, cutoff)
|
|
}
|
|
fmt.Print("\n}\n\n")
|
|
fmt.Printf("var %sTrie = trie{ %sLookup[:], %sValues[:], %sSparseValues[:], %sSparseOffset[:], %d}\n\n",
|
|
name, name, name, name, name, cutoff)
|
|
return nv*2 + ns*4 + ni + ls*2
|
|
}
|