Embed character map tables in executable

This simplifies the conversion test, since we don't need to be careful
about which data we run the conversion test in. It will also simplify
the command-line conversion tool and its distribution. The classic Mac
OS version of this program will continue to embed conversion tables in
the resource fork.
This commit is contained in:
Dietrich Epp 2022-03-24 22:34:32 -04:00
parent 6267606293
commit 5ad207f785
11 changed files with 380 additions and 183 deletions

View File

@ -1,20 +1,6 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("//bazel:copts.bzl", "COPTS")
_data = [
"charmap_roman.dat",
"charmap_turkish.dat",
"charmap_croatian.dat",
"charmap_iceland.dat",
"charmap_romanian.dat",
"charmap_celtic.dat",
"charmap_gaelic.dat",
"charmap_greek.dat",
"charmap_cyrillic.dat",
"charmap_inuit.dat",
"charmap_centeuro.dat",
]
genrule(
name = "data",
srcs = [
@ -22,11 +8,12 @@ genrule(
"//scripts:data",
],
outs = [
"charmap.c",
"charmap_data.c",
"charmap_info.c",
"charmap_region.c",
"charmap.r",
"charmap_name.c",
] + _data,
cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet",
],
cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet -format=false",
tools = [
"//gen:macscript",
],
@ -35,14 +22,19 @@ genrule(
cc_library(
name = "convert",
srcs = [
"charmap.c",
"charmap_data.c",
"charmap_info.c",
"charmap_region.c",
"convert.c",
"convert.h",
"convert_1f.c",
"convert_1r.c",
"toolbox.c",
],
hdrs = [
"convert.h",
"data.h",
"defs.h",
"test.h",
"toolbox.c",
],
copts = COPTS,
)
@ -50,11 +42,9 @@ cc_library(
cc_test(
name = "convert_test",
srcs = [
"charmap_name.c",
"convert_test.c",
],
copts = COPTS,
data = _data,
deps = [
":convert",
],

View File

@ -2,6 +2,7 @@
#define _XOPEN_SOURCE 500
#include "convert/convert.h"
#include "convert/data.h"
#include "convert/test.h"
#include <errno.h>
@ -65,65 +66,6 @@ static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...)
}
}
/* Read a file in its entirety. */
static void ReadFile(const char *filename, void **datap, size_t *sizep)
{
char fnbuf[128];
FILE *fp = NULL;
char *buf = NULL, *newbuf;
size_t size, alloc, newalloc, amt;
int err;
StringPrintf(fnbuf, sizeof(fnbuf), "convert/%s", filename);
fp = fopen(fnbuf, "rb");
if (fp == NULL) {
err = errno;
goto error;
}
buf = malloc(kInitialBufSize);
if (buf == NULL) {
err = errno;
goto error;
}
size = 0;
alloc = kInitialBufSize;
for (;;) {
if (size >= alloc) {
newalloc = alloc * 2;
newbuf = realloc(buf, newalloc);
if (newbuf == NULL) {
err = errno;
goto error;
}
alloc = newalloc;
buf = newbuf;
}
amt = fread(buf + size, 1, alloc - size, fp);
if (amt == 0) {
if (feof(fp)) {
break;
}
err = errno;
goto error;
}
size += amt;
}
fclose(fp);
*datap = buf;
*sizep = size;
return;
error:
if (fp != NULL) {
fclose(fp);
}
if (buf != NULL) {
free(buf);
}
DieErrorf(err, "read %s", filename);
}
static UInt8 *gBuffer[3];
static void PrintQuotedString(const UInt8 *buf, int len)
@ -203,10 +145,8 @@ static const char *const kLineBreakData[4] = {
static const char *const kLineBreakName[4] = {"keep", "LF", "CR", "CRLF"};
static void TestConverter(const char *filename)
static void TestConverter(const char *name, struct CharmapData data)
{
void *data;
size_t datasz;
Ptr datap;
Handle datah;
struct Converter cf, cr, cc;
@ -218,24 +158,22 @@ static void TestConverter(const char *filename)
UInt8 *optr, *oend;
int lblen[4];
data = NULL;
cf.data = NULL;
cr.data = NULL;
StringPrintf(gTestName, sizeof(gTestName), "%s", filename);
StringPrintf(gTestName, sizeof(gTestName), "%s", name);
/* Load the converter into memory and build the conversion table. */
ReadFile(filename, &data, &datasz);
datap = data;
datap = (void *)data.ptr;
datah = &datap;
r = ConverterBuild(&cf, datah, datasz, kToUTF8, &err);
r = ConverterBuild(&cf, datah, data.size, kToUTF8, &err);
if (r != 0) {
Failf("ConverterBuild: %s (to UTF-8): %s", filename, ErrorName(r));
Failf("ConverterBuild: to UTF-8: %s", ErrorName(r));
goto done;
}
r = ConverterBuild(&cr, datah, datasz, kFromUTF8, &err);
r = ConverterBuild(&cr, datah, data.size, kFromUTF8, &err);
if (r != 0) {
Failf("ConverterBuild: %s (from UTF-8): %s", filename, ErrorName(r));
Failf("ConverterBuild: from UTF-8: %s", ErrorName(r));
goto done;
}
@ -269,7 +207,7 @@ static void TestConverter(const char *filename)
}
for (j = 1; j <= jmax; j++) {
StringPrintf(gTestName, sizeof(gTestName), "%s reverse i=%d j=%d",
filename, i, j);
name, i, j);
st.data = 0;
iptr = gBuffer[1];
optr = gBuffer[2];
@ -300,7 +238,7 @@ static void TestConverter(const char *filename)
len0 = lblen[i]; /* Expected output */
for (j = 1; j < len1; j++) {
StringPrintf(gTestName, sizeof(gTestName),
"%s %s linebreak %s split=%d", filename,
"%s %s linebreak %s split=%d", name,
k == 0 ? "forward" : "backward", kLineBreakName[i],
j);
st.data = 0;
@ -323,7 +261,6 @@ static void TestConverter(const char *filename)
}
done:
free(data);
if (cf.data != NULL) {
DisposeHandle(cf.data);
}
@ -335,7 +272,8 @@ done:
int main(int argc, char **argv)
{
void *buf;
const char *filename;
struct CharmapData data;
const char *name;
int i;
(void)argc;
@ -350,16 +288,24 @@ int main(int argc, char **argv)
}
for (i = 0;; i++) {
filename = kCharsetFilename[i];
if (filename == NULL) {
name = CharmapName(i);
if (name == NULL) {
break;
}
TestConverter(filename);
data = CharmapData(i);
if (data.ptr != NULL) {
TestConverter(name, data);
}
}
for (i = 0; i < 3; i++) {
free(gBuffer[i]);
}
return gFailCount == 0 ? 0 : 1;
if (gFailCount > 0) {
fputs("failed\n", stderr);
return 1;
}
fputs("ok\n", stderr);
return 0;
}

25
convert/data.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef data_h
#define data_h
/* data.h - charmap data, not used for classic Mac OS builds */
#include "convert/defs.h"
/* Get the ID of the given character map. Return NULL if no such character map
exists. */
const char *CharmapID(int cmap);
/* Get the human-readable name fo the given character map. Return NULL if no
such character map exists. */
const char *CharmapName(int cmap);
/* Conversion table data. */
struct CharmapData {
const UInt8 *ptr;
Size size;
};
/* Get the conversion table data for the given charmap. Returns an empty buffer
with a NULL pointer if the character map does not exist or if no conversion
table exists for that character map. */
struct CharmapData CharmapData(int cmap);
#endif

View File

@ -4,9 +4,6 @@
#include "convert/defs.h"
/* List of all data files, terminated by NULL. */
extern const char *const kCharsetFilename[];
/* Print an error message and exit. */
void Dief(const char *msg, ...) __attribute__((noreturn, format(printf, 1, 2)));

View File

@ -3,8 +3,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary")
go_binary(
name = "macscript",
srcs = [
"cdata.go",
"data.go",
"filenames.go",
"main.go",
"rez.go",
"scriptmap.go",

117
gen/cdata.go Normal file
View File

@ -0,0 +1,117 @@
package main
import "fmt"
const strlookup = `const char *%s(int cmap)
{
if (cmap < 0 || CHARMAP_COUNT <= cmap) {
return 0;
}
return kCharmapText + %s[cmap];
}
`
const datalookup = `struct CharmapData CharmapData(int cmap) {
struct CharmapData data;
UInt32 off0, off1;
data.ptr = 0;
data.size = 0;
if (cmap < 0 || CHARMAP_COUNT <= cmap) {
return data;
}
off0 = kCharmapOffset[cmap];
off1 = kCharmapOffset[cmap+1];
if (off0 == off1) {
return data;
}
data.ptr = kCharmapData + off0;
data.size = off1 - off0;
return data;
}
`
func writeInfo(d *scriptdata, filename string) error {
strs := newStringtable()
ids := make([]int, len(d.charmaps))
names := make([]int, len(d.charmaps))
for i, cm := range d.charmaps {
ids[i] = strs.add(cm.id)
names[i] = strs.add(cm.name)
}
s, err := createCSource(filename)
if err != nil {
return err
}
w := s.writer
s.include("data.h")
w.WriteString(formatOff)
fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps))
fmt.Fprintf(w, "static const char kCharmapText[] =")
s.strings(strs.data)
w.WriteString(";\n")
fmt.Fprintf(w, "static const %s kCharmapIDs[CHARMAP_COUNT] = {", arrayIntType(ids))
s.ints(ids)
w.WriteString("\n};\n")
fmt.Fprintf(w, "static const %s kCharmapNames[CHARMAP_COUNT] = {", arrayIntType(ids))
s.ints(ids)
w.WriteString("\n};\n")
w.WriteString(formatOn)
fmt.Fprintf(w, strlookup, "CharmapID", "kCharmapIDs")
fmt.Fprintf(w, strlookup, "CharmapName", "kCharmapNames")
return s.flush()
}
func writeData(d *scriptdata, filename string) error {
offsets := make([]int, len(d.charmaps)+1)
var offset, last int
for i, cm := range d.charmaps {
offsets[i] = offset
offset += len(cm.data)
if len(cm.data) != 0 {
last = i
}
}
offsets[len(offsets)-1] = offset
s, err := createCSource(filename)
if err != nil {
return err
}
w := s.writer
w.WriteString(formatOff)
s.include("data.h")
fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps))
fmt.Fprintf(w, "static const %s kCharmapOffset[CHARMAP_COUNT + 1] = {", arrayIntType(offsets))
s.ints(offsets)
w.WriteString("\n};\n")
w.WriteString("static const UInt8 kCharmapData[] = {")
for i, cm := range d.charmaps {
if len(cm.data) != 0 {
fmt.Fprintf(w, "\n\t/* %s */", cm.name)
s.bytes(cm.data, i == last)
if i != last {
w.WriteByte('\n')
}
}
}
w.WriteString("\n};\n")
w.WriteString(formatOn)
w.WriteString(datalookup)
return s.flush()
}

View File

@ -10,9 +10,19 @@ import (
"regexp"
"strconv"
"strings"
"moria.us/macscript/charmap"
"moria.us/macscript/table"
)
var isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$")
var (
isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$")
nonIdentPart = regexp.MustCompile("[^a-zA-Z0-9]+")
)
func makeID(name string) string {
return nonIdentPart.ReplaceAllLiteralString(name, "")
}
// A dataError indicates an error in the contents of one of the data files.
type dataError struct {
@ -113,14 +123,16 @@ func readConsts(filename string) (m constmap, err error) {
}
type charmapinfo struct {
name string
file string
script int
regions []int
name string
filename string
id string
script int
regions []int
data []byte
}
// readCharmaps reads and parses the charmaps.csv file.
func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapinfo, error) {
func readCharmaps(srcdir, filename string, scripts, regions map[string]int) ([]charmapinfo, error) {
fp, err := os.Open(filename)
if err != nil {
return nil, err
@ -152,9 +164,11 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin
}
index := len(arr)
ifo := charmapinfo{
name: row[0],
file: row[1],
name: row[0],
filename: strings.ToLower(strings.TrimSuffix(row[1], ".TXT")),
id: makeID(row[0]),
}
file := row[1]
sname := row[2]
var e bool
ifo.script, e = scripts[sname]
@ -178,15 +192,32 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin
ifo.regions = append(ifo.regions, rg)
case omap != index:
line, _ := r.FieldPos(0)
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)}
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)}
}
}
} else {
if omap, e := gcharmaps[ifo.script]; e {
line, _ := r.FieldPos(0)
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)}
return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)}
}
}
if file != "" {
cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", file))
if err != nil {
return nil, err
}
t, err := table.Create(cm)
if err != nil {
if e, ok := err.(*table.UnsupportedError); ok {
if !flagQuiet {
fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", file, e.Message)
}
continue
}
return nil, fmt.Errorf("%s: %v", file, err)
}
ifo.data = t.Data()
}
arr = append(arr, ifo)
}
return arr, nil
@ -207,6 +238,6 @@ func readData(srcdir string) (d scriptdata, err error) {
if err != nil {
return d, err
}
d.charmaps, err = readCharmaps(filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names)
d.charmaps, err = readCharmaps(srcdir, filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names)
return
}

View File

@ -1,27 +0,0 @@
package main
import (
"strconv"
)
func writeFilenames(charmaps []string, filename string) error {
s, err := createCSource(filename)
if err != nil {
return err
}
w := s.writer
w.WriteString(header)
s.include("test.h")
w.WriteString("const char *const kCharsetFilename[] = {\n")
for _, fn := range charmaps {
if fn != "" {
w.WriteByte('\t')
w.WriteString(strconv.Quote(fn))
w.WriteString(",\n")
}
}
w.WriteString("\tNULL\n};\n")
return s.flush()
}

View File

@ -7,10 +7,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strings"
"moria.us/macscript/charmap"
"moria.us/macscript/table"
)
const (
@ -57,37 +53,19 @@ func mainE() error {
}
// Compile and emit charmap data.
cms := make([]string, len(d.charmaps))
var hascmap bool
for i, c := range d.charmaps {
if c.file == "" {
continue
}
cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", c.file))
if err != nil {
return err
}
t, err := table.Create(cm)
if err != nil {
if e, ok := err.(*table.UnsupportedError); ok {
if !flagQuiet {
fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", c.file, e.Message)
}
continue
for _, c := range d.charmaps {
if len(c.data) != 0 {
name := "charmap_" + c.filename + ".dat"
fpath := filepath.Join(destdir, name)
if !flagQuiet {
fmt.Fprintln(os.Stderr, "Writing:", fpath)
}
return fmt.Errorf("%s: %v", c.file, err)
if err := ioutil.WriteFile(fpath, c.data, 0666); err != nil {
return err
}
hascmap = true
}
data := t.Data()
name := "charmap_" + strings.ToLower(strings.TrimSuffix(c.file, ".TXT")) + ".dat"
fpath := filepath.Join(destdir, name)
if !flagQuiet {
fmt.Fprintln(os.Stderr, "Writing:", fpath)
}
if err := ioutil.WriteFile(fpath, data, 0666); err != nil {
return err
}
cms[i] = name
hascmap = true
}
if !hascmap {
return errors.New("could not compile any character map")
@ -95,13 +73,16 @@ func mainE() error {
// Write generated output.
m := genMap(&d)
if err := writeMap(&d, m, filepath.Join(destdir, "charmap.c")); err != nil {
if err := writeMap(&d, m, filepath.Join(destdir, "charmap_region.c")); err != nil {
return err
}
if err := writeFilenames(cms, filepath.Join(destdir, "charmap_name.c")); err != nil {
if err := writeInfo(&d, filepath.Join(destdir, "charmap_info.c")); err != nil {
return err
}
if err := writeRez(&d, cms, filepath.Join(destdir, "charmap.r")); err != nil {
if err := writeData(&d, filepath.Join(destdir, "charmap_data.c")); err != nil {
return err
}
if err := writeRez(&d, filepath.Join(destdir, "charmap.r")); err != nil {
return err
}
return nil

View File

@ -41,7 +41,7 @@ func constStrings(c *constmap) []string {
return r
}
func writeRez(d *scriptdata, charmaps []string, filename string) error {
func writeRez(d *scriptdata, filename string) error {
if !flagQuiet {
fmt.Fprintln(os.Stderr, "Writing:", filename)
}
@ -59,9 +59,9 @@ func writeRez(d *scriptdata, charmaps []string, filename string) error {
writeStrings(w, `rSTRS_Charmaps, "Character Maps"`, charmapNames(d))
writeStrings(w, `rSTRS_Scripts, "Scripts"`, constStrings(&d.scripts))
writeStrings(w, `rSTRS_Regions, "Regions"`, constStrings(&d.regions))
for i, cm := range charmaps {
if cm != "" {
fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, d.charmaps[i].name, cm)
for i, cm := range d.charmaps {
if cm.filename != "" {
fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, cm.name, cm.filename)
}
}

View File

@ -3,9 +3,17 @@ package main
import (
"bufio"
"fmt"
"math"
"os"
"os/exec"
"path"
"strconv"
)
const (
width = 80
formatOff = "/* clang-format off */\n"
formatOn = "/* clang-format on */\n"
)
type csource struct {
@ -23,10 +31,12 @@ func createCSource(filename string) (s csource, err error) {
if err != nil {
return s, err
}
w := bufio.NewWriter(fp)
w.WriteString(header)
return csource{
filename: filename,
file: fp,
writer: bufio.NewWriter(fp),
writer: w,
}, nil
}
@ -67,3 +77,130 @@ func (s *csource) flush() error {
func (s *csource) include(name string) {
fmt.Fprintf(s.writer, "#include \"%s\"\n", path.Join(srcdirname, name))
}
func (s *csource) bytes(data []byte, final bool) {
if len(data) == 0 {
return
}
line := make([]byte, 0, width+8)
for i, x := range data {
cur := line
line = strconv.AppendUint(line, uint64(x), 10)
if i < len(data)-1 || !final {
line = append(line, ',')
}
if len(line) > width-4 {
s.writer.WriteString("\n\t")
s.writer.Write(cur)
nline := line[len(cur):]
copy(line, nline)
line = line[:len(nline)]
}
}
s.writer.WriteString("\n\t")
s.writer.Write(line)
}
func (s *csource) ints(data []int) {
if len(data) == 0 {
return
}
line := make([]byte, 0, width+16)
for i, x := range data {
cur := line
line = strconv.AppendInt(line, int64(x), 10)
if i < len(data)-1 {
line = append(line, ',')
}
if len(line) > width-4 {
s.writer.WriteString("\n\t")
s.writer.Write(cur)
nline := line[len(cur):]
copy(line, nline)
line = line[:len(nline)]
}
}
s.writer.WriteString("\n\t")
s.writer.Write(line)
}
func (s *csource) strings(data []string) {
for i, x := range data {
s.writer.WriteString("\n\t\"")
var last byte
for _, c := range []byte(x) {
if 32 <= c && c <= 126 {
if c == '\\' || c == '"' {
s.writer.WriteByte('\\')
} else if '0' <= c && c <= '9' && last == 0 && i == 0 {
s.writer.WriteString("00")
}
s.writer.WriteByte(c)
} else {
var e string
switch c {
case 0:
e = `\0`
case '\t':
e = `\t`
case '\n':
e = `\n`
case '\r':
e = `\r`
}
if e == "" {
fmt.Fprintf(s.writer, "\\x%02x", c)
} else {
s.writer.WriteString(e)
}
}
last = c
}
if i < len(data)-1 {
s.writer.WriteString(`\0`)
}
s.writer.WriteByte('"')
}
}
func intType(maxval int) string {
if maxval <= math.MaxUint8 {
return "UInt8"
}
if maxval <= math.MaxUint16 {
return "UInt16"
}
return "UInt32"
}
func arrayIntType(arr []int) string {
var max int
for _, x := range arr {
if x > max {
max = x
}
}
return intType(max)
}
type stringtable struct {
data []string
offset int
offsets map[string]int
}
func newStringtable() (s stringtable) {
s.offsets = make(map[string]int)
return
}
func (t *stringtable) add(s string) int {
if offset, exist := t.offsets[s]; exist {
return offset
}
t.data = append(t.data, s)
offset := t.offset
t.offset += len(s) + 1
t.offsets[s] = offset
return offset
}