From 5ad207f7858ca3df3e0ed1d507860e02abc327e6 Mon Sep 17 00:00:00 2001 From: Dietrich Epp Date: Thu, 24 Mar 2022 22:34:32 -0400 Subject: [PATCH] Embed character map tables in executable This simplifies the conversion test, since we don't need to be careful about which data we run the conversion test in. It will also simplify the command-line conversion tool and its distribution. The classic Mac OS version of this program will continue to embed conversion tables in the resource fork. --- convert/BUILD.bazel | 36 ++++------- convert/convert_test.c | 102 +++++++----------------------- convert/data.h | 25 ++++++++ convert/test.h | 3 - gen/BUILD.bazel | 2 +- gen/cdata.go | 117 ++++++++++++++++++++++++++++++++++ gen/data.go | 53 ++++++++++++---- gen/filenames.go | 27 -------- gen/main.go | 51 +++++---------- gen/rez.go | 8 +-- gen/source.go | 139 ++++++++++++++++++++++++++++++++++++++++- 11 files changed, 380 insertions(+), 183 deletions(-) create mode 100644 convert/data.h create mode 100644 gen/cdata.go delete mode 100644 gen/filenames.go diff --git a/convert/BUILD.bazel b/convert/BUILD.bazel index f4b0b68..ee554f1 100644 --- a/convert/BUILD.bazel +++ b/convert/BUILD.bazel @@ -1,20 +1,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load("//bazel:copts.bzl", "COPTS") -_data = [ - "charmap_roman.dat", - "charmap_turkish.dat", - "charmap_croatian.dat", - "charmap_iceland.dat", - "charmap_romanian.dat", - "charmap_celtic.dat", - "charmap_gaelic.dat", - "charmap_greek.dat", - "charmap_cyrillic.dat", - "charmap_inuit.dat", - "charmap_centeuro.dat", -] - genrule( name = "data", srcs = [ @@ -22,11 +8,12 @@ genrule( "//scripts:data", ], outs = [ - "charmap.c", + "charmap_data.c", + "charmap_info.c", + "charmap_region.c", "charmap.r", - "charmap_name.c", - ] + _data, - cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet", + ], + cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet -format=false", tools = [ "//gen:macscript", ], @@ -35,14 +22,19 @@ genrule( cc_library( name = "convert", srcs = [ - "charmap.c", + "charmap_data.c", + "charmap_info.c", + "charmap_region.c", "convert.c", - "convert.h", "convert_1f.c", "convert_1r.c", + "toolbox.c", + ], + hdrs = [ + "convert.h", + "data.h", "defs.h", "test.h", - "toolbox.c", ], copts = COPTS, ) @@ -50,11 +42,9 @@ cc_library( cc_test( name = "convert_test", srcs = [ - "charmap_name.c", "convert_test.c", ], copts = COPTS, - data = _data, deps = [ ":convert", ], diff --git a/convert/convert_test.c b/convert/convert_test.c index e54785f..4e6fa4d 100644 --- a/convert/convert_test.c +++ b/convert/convert_test.c @@ -2,6 +2,7 @@ #define _XOPEN_SOURCE 500 #include "convert/convert.h" +#include "convert/data.h" #include "convert/test.h" #include @@ -65,65 +66,6 @@ static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...) } } -/* Read a file in its entirety. */ -static void ReadFile(const char *filename, void **datap, size_t *sizep) -{ - char fnbuf[128]; - FILE *fp = NULL; - char *buf = NULL, *newbuf; - size_t size, alloc, newalloc, amt; - int err; - - StringPrintf(fnbuf, sizeof(fnbuf), "convert/%s", filename); - - fp = fopen(fnbuf, "rb"); - if (fp == NULL) { - err = errno; - goto error; - } - buf = malloc(kInitialBufSize); - if (buf == NULL) { - err = errno; - goto error; - } - size = 0; - alloc = kInitialBufSize; - for (;;) { - if (size >= alloc) { - newalloc = alloc * 2; - newbuf = realloc(buf, newalloc); - if (newbuf == NULL) { - err = errno; - goto error; - } - alloc = newalloc; - buf = newbuf; - } - amt = fread(buf + size, 1, alloc - size, fp); - if (amt == 0) { - if (feof(fp)) { - break; - } - err = errno; - goto error; - } - size += amt; - } - fclose(fp); - *datap = buf; - *sizep = size; - return; - -error: - if (fp != NULL) { - fclose(fp); - } - if (buf != NULL) { - free(buf); - } - DieErrorf(err, "read %s", filename); -} - static UInt8 *gBuffer[3]; static void PrintQuotedString(const UInt8 *buf, int len) @@ -203,10 +145,8 @@ static const char *const kLineBreakData[4] = { static const char *const kLineBreakName[4] = {"keep", "LF", "CR", "CRLF"}; -static void TestConverter(const char *filename) +static void TestConverter(const char *name, struct CharmapData data) { - void *data; - size_t datasz; Ptr datap; Handle datah; struct Converter cf, cr, cc; @@ -218,24 +158,22 @@ static void TestConverter(const char *filename) UInt8 *optr, *oend; int lblen[4]; - data = NULL; cf.data = NULL; cr.data = NULL; - StringPrintf(gTestName, sizeof(gTestName), "%s", filename); + StringPrintf(gTestName, sizeof(gTestName), "%s", name); /* Load the converter into memory and build the conversion table. */ - ReadFile(filename, &data, &datasz); - datap = data; + datap = (void *)data.ptr; datah = &datap; - r = ConverterBuild(&cf, datah, datasz, kToUTF8, &err); + r = ConverterBuild(&cf, datah, data.size, kToUTF8, &err); if (r != 0) { - Failf("ConverterBuild: %s (to UTF-8): %s", filename, ErrorName(r)); + Failf("ConverterBuild: to UTF-8: %s", ErrorName(r)); goto done; } - r = ConverterBuild(&cr, datah, datasz, kFromUTF8, &err); + r = ConverterBuild(&cr, datah, data.size, kFromUTF8, &err); if (r != 0) { - Failf("ConverterBuild: %s (from UTF-8): %s", filename, ErrorName(r)); + Failf("ConverterBuild: from UTF-8: %s", ErrorName(r)); goto done; } @@ -269,7 +207,7 @@ static void TestConverter(const char *filename) } for (j = 1; j <= jmax; j++) { StringPrintf(gTestName, sizeof(gTestName), "%s reverse i=%d j=%d", - filename, i, j); + name, i, j); st.data = 0; iptr = gBuffer[1]; optr = gBuffer[2]; @@ -300,7 +238,7 @@ static void TestConverter(const char *filename) len0 = lblen[i]; /* Expected output */ for (j = 1; j < len1; j++) { StringPrintf(gTestName, sizeof(gTestName), - "%s %s linebreak %s split=%d", filename, + "%s %s linebreak %s split=%d", name, k == 0 ? "forward" : "backward", kLineBreakName[i], j); st.data = 0; @@ -323,7 +261,6 @@ static void TestConverter(const char *filename) } done: - free(data); if (cf.data != NULL) { DisposeHandle(cf.data); } @@ -335,7 +272,8 @@ done: int main(int argc, char **argv) { void *buf; - const char *filename; + struct CharmapData data; + const char *name; int i; (void)argc; @@ -350,16 +288,24 @@ int main(int argc, char **argv) } for (i = 0;; i++) { - filename = kCharsetFilename[i]; - if (filename == NULL) { + name = CharmapName(i); + if (name == NULL) { break; } - TestConverter(filename); + data = CharmapData(i); + if (data.ptr != NULL) { + TestConverter(name, data); + } } for (i = 0; i < 3; i++) { free(gBuffer[i]); } - return gFailCount == 0 ? 0 : 1; + if (gFailCount > 0) { + fputs("failed\n", stderr); + return 1; + } + fputs("ok\n", stderr); + return 0; } diff --git a/convert/data.h b/convert/data.h new file mode 100644 index 0000000..5dd2320 --- /dev/null +++ b/convert/data.h @@ -0,0 +1,25 @@ +#ifndef data_h +#define data_h +/* data.h - charmap data, not used for classic Mac OS builds */ +#include "convert/defs.h" + +/* Get the ID of the given character map. Return NULL if no such character map + exists. */ +const char *CharmapID(int cmap); + +/* Get the human-readable name fo the given character map. Return NULL if no + such character map exists. */ +const char *CharmapName(int cmap); + +/* Conversion table data. */ +struct CharmapData { + const UInt8 *ptr; + Size size; +}; + +/* Get the conversion table data for the given charmap. Returns an empty buffer + with a NULL pointer if the character map does not exist or if no conversion + table exists for that character map. */ +struct CharmapData CharmapData(int cmap); + +#endif diff --git a/convert/test.h b/convert/test.h index f04fbf6..1c8d4cc 100644 --- a/convert/test.h +++ b/convert/test.h @@ -4,9 +4,6 @@ #include "convert/defs.h" -/* List of all data files, terminated by NULL. */ -extern const char *const kCharsetFilename[]; - /* Print an error message and exit. */ void Dief(const char *msg, ...) __attribute__((noreturn, format(printf, 1, 2))); diff --git a/gen/BUILD.bazel b/gen/BUILD.bazel index c34114e..ffbe110 100644 --- a/gen/BUILD.bazel +++ b/gen/BUILD.bazel @@ -3,8 +3,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary") go_binary( name = "macscript", srcs = [ + "cdata.go", "data.go", - "filenames.go", "main.go", "rez.go", "scriptmap.go", diff --git a/gen/cdata.go b/gen/cdata.go new file mode 100644 index 0000000..e224b8e --- /dev/null +++ b/gen/cdata.go @@ -0,0 +1,117 @@ +package main + +import "fmt" + +const strlookup = `const char *%s(int cmap) +{ + if (cmap < 0 || CHARMAP_COUNT <= cmap) { + return 0; + } + return kCharmapText + %s[cmap]; +} +` + +const datalookup = `struct CharmapData CharmapData(int cmap) { + struct CharmapData data; + UInt32 off0, off1; + data.ptr = 0; + data.size = 0; + if (cmap < 0 || CHARMAP_COUNT <= cmap) { + return data; + } + off0 = kCharmapOffset[cmap]; + off1 = kCharmapOffset[cmap+1]; + if (off0 == off1) { + return data; + } + data.ptr = kCharmapData + off0; + data.size = off1 - off0; + return data; +} +` + +func writeInfo(d *scriptdata, filename string) error { + strs := newStringtable() + ids := make([]int, len(d.charmaps)) + names := make([]int, len(d.charmaps)) + for i, cm := range d.charmaps { + ids[i] = strs.add(cm.id) + names[i] = strs.add(cm.name) + } + + s, err := createCSource(filename) + if err != nil { + return err + } + + w := s.writer + s.include("data.h") + + w.WriteString(formatOff) + + fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps)) + + fmt.Fprintf(w, "static const char kCharmapText[] =") + s.strings(strs.data) + w.WriteString(";\n") + + fmt.Fprintf(w, "static const %s kCharmapIDs[CHARMAP_COUNT] = {", arrayIntType(ids)) + s.ints(ids) + w.WriteString("\n};\n") + + fmt.Fprintf(w, "static const %s kCharmapNames[CHARMAP_COUNT] = {", arrayIntType(ids)) + s.ints(ids) + w.WriteString("\n};\n") + + w.WriteString(formatOn) + + fmt.Fprintf(w, strlookup, "CharmapID", "kCharmapIDs") + fmt.Fprintf(w, strlookup, "CharmapName", "kCharmapNames") + + return s.flush() +} + +func writeData(d *scriptdata, filename string) error { + offsets := make([]int, len(d.charmaps)+1) + var offset, last int + for i, cm := range d.charmaps { + offsets[i] = offset + offset += len(cm.data) + if len(cm.data) != 0 { + last = i + } + } + offsets[len(offsets)-1] = offset + + s, err := createCSource(filename) + if err != nil { + return err + } + + w := s.writer + w.WriteString(formatOff) + s.include("data.h") + fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps)) + + fmt.Fprintf(w, "static const %s kCharmapOffset[CHARMAP_COUNT + 1] = {", arrayIntType(offsets)) + s.ints(offsets) + w.WriteString("\n};\n") + + w.WriteString("static const UInt8 kCharmapData[] = {") + for i, cm := range d.charmaps { + if len(cm.data) != 0 { + fmt.Fprintf(w, "\n\t/* %s */", cm.name) + s.bytes(cm.data, i == last) + if i != last { + w.WriteByte('\n') + } + } + } + w.WriteString("\n};\n") + + w.WriteString(formatOn) + + w.WriteString(datalookup) + + return s.flush() +} diff --git a/gen/data.go b/gen/data.go index fcbf05c..117bfa2 100644 --- a/gen/data.go +++ b/gen/data.go @@ -10,9 +10,19 @@ import ( "regexp" "strconv" "strings" + + "moria.us/macscript/charmap" + "moria.us/macscript/table" ) -var isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$") +var ( + isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$") + nonIdentPart = regexp.MustCompile("[^a-zA-Z0-9]+") +) + +func makeID(name string) string { + return nonIdentPart.ReplaceAllLiteralString(name, "") +} // A dataError indicates an error in the contents of one of the data files. type dataError struct { @@ -113,14 +123,16 @@ func readConsts(filename string) (m constmap, err error) { } type charmapinfo struct { - name string - file string - script int - regions []int + name string + filename string + id string + script int + regions []int + data []byte } // readCharmaps reads and parses the charmaps.csv file. -func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapinfo, error) { +func readCharmaps(srcdir, filename string, scripts, regions map[string]int) ([]charmapinfo, error) { fp, err := os.Open(filename) if err != nil { return nil, err @@ -152,9 +164,11 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin } index := len(arr) ifo := charmapinfo{ - name: row[0], - file: row[1], + name: row[0], + filename: strings.ToLower(strings.TrimSuffix(row[1], ".TXT")), + id: makeID(row[0]), } + file := row[1] sname := row[2] var e bool ifo.script, e = scripts[sname] @@ -178,15 +192,32 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin ifo.regions = append(ifo.regions, rg) case omap != index: line, _ := r.FieldPos(0) - return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)} + return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)} } } } else { if omap, e := gcharmaps[ifo.script]; e { line, _ := r.FieldPos(0) - return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)} + return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)} } } + if file != "" { + cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", file)) + if err != nil { + return nil, err + } + t, err := table.Create(cm) + if err != nil { + if e, ok := err.(*table.UnsupportedError); ok { + if !flagQuiet { + fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", file, e.Message) + } + continue + } + return nil, fmt.Errorf("%s: %v", file, err) + } + ifo.data = t.Data() + } arr = append(arr, ifo) } return arr, nil @@ -207,6 +238,6 @@ func readData(srcdir string) (d scriptdata, err error) { if err != nil { return d, err } - d.charmaps, err = readCharmaps(filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names) + d.charmaps, err = readCharmaps(srcdir, filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names) return } diff --git a/gen/filenames.go b/gen/filenames.go deleted file mode 100644 index a3aba0b..0000000 --- a/gen/filenames.go +++ /dev/null @@ -1,27 +0,0 @@ -package main - -import ( - "strconv" -) - -func writeFilenames(charmaps []string, filename string) error { - s, err := createCSource(filename) - if err != nil { - return err - } - - w := s.writer - w.WriteString(header) - s.include("test.h") - w.WriteString("const char *const kCharsetFilename[] = {\n") - for _, fn := range charmaps { - if fn != "" { - w.WriteByte('\t') - w.WriteString(strconv.Quote(fn)) - w.WriteString(",\n") - } - } - w.WriteString("\tNULL\n};\n") - - return s.flush() -} diff --git a/gen/main.go b/gen/main.go index a65ad3b..145a00c 100644 --- a/gen/main.go +++ b/gen/main.go @@ -7,10 +7,6 @@ import ( "io/ioutil" "os" "path/filepath" - "strings" - - "moria.us/macscript/charmap" - "moria.us/macscript/table" ) const ( @@ -57,37 +53,19 @@ func mainE() error { } // Compile and emit charmap data. - cms := make([]string, len(d.charmaps)) var hascmap bool - for i, c := range d.charmaps { - if c.file == "" { - continue - } - cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", c.file)) - if err != nil { - return err - } - t, err := table.Create(cm) - if err != nil { - if e, ok := err.(*table.UnsupportedError); ok { - if !flagQuiet { - fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", c.file, e.Message) - } - continue + for _, c := range d.charmaps { + if len(c.data) != 0 { + name := "charmap_" + c.filename + ".dat" + fpath := filepath.Join(destdir, name) + if !flagQuiet { + fmt.Fprintln(os.Stderr, "Writing:", fpath) } - return fmt.Errorf("%s: %v", c.file, err) + if err := ioutil.WriteFile(fpath, c.data, 0666); err != nil { + return err + } + hascmap = true } - data := t.Data() - name := "charmap_" + strings.ToLower(strings.TrimSuffix(c.file, ".TXT")) + ".dat" - fpath := filepath.Join(destdir, name) - if !flagQuiet { - fmt.Fprintln(os.Stderr, "Writing:", fpath) - } - if err := ioutil.WriteFile(fpath, data, 0666); err != nil { - return err - } - cms[i] = name - hascmap = true } if !hascmap { return errors.New("could not compile any character map") @@ -95,13 +73,16 @@ func mainE() error { // Write generated output. m := genMap(&d) - if err := writeMap(&d, m, filepath.Join(destdir, "charmap.c")); err != nil { + if err := writeMap(&d, m, filepath.Join(destdir, "charmap_region.c")); err != nil { return err } - if err := writeFilenames(cms, filepath.Join(destdir, "charmap_name.c")); err != nil { + if err := writeInfo(&d, filepath.Join(destdir, "charmap_info.c")); err != nil { return err } - if err := writeRez(&d, cms, filepath.Join(destdir, "charmap.r")); err != nil { + if err := writeData(&d, filepath.Join(destdir, "charmap_data.c")); err != nil { + return err + } + if err := writeRez(&d, filepath.Join(destdir, "charmap.r")); err != nil { return err } return nil diff --git a/gen/rez.go b/gen/rez.go index 28537d6..4364151 100644 --- a/gen/rez.go +++ b/gen/rez.go @@ -41,7 +41,7 @@ func constStrings(c *constmap) []string { return r } -func writeRez(d *scriptdata, charmaps []string, filename string) error { +func writeRez(d *scriptdata, filename string) error { if !flagQuiet { fmt.Fprintln(os.Stderr, "Writing:", filename) } @@ -59,9 +59,9 @@ func writeRez(d *scriptdata, charmaps []string, filename string) error { writeStrings(w, `rSTRS_Charmaps, "Character Maps"`, charmapNames(d)) writeStrings(w, `rSTRS_Scripts, "Scripts"`, constStrings(&d.scripts)) writeStrings(w, `rSTRS_Regions, "Regions"`, constStrings(&d.regions)) - for i, cm := range charmaps { - if cm != "" { - fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, d.charmaps[i].name, cm) + for i, cm := range d.charmaps { + if cm.filename != "" { + fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, cm.name, cm.filename) } } diff --git a/gen/source.go b/gen/source.go index e42f369..81cb494 100644 --- a/gen/source.go +++ b/gen/source.go @@ -3,9 +3,17 @@ package main import ( "bufio" "fmt" + "math" "os" "os/exec" "path" + "strconv" +) + +const ( + width = 80 + formatOff = "/* clang-format off */\n" + formatOn = "/* clang-format on */\n" ) type csource struct { @@ -23,10 +31,12 @@ func createCSource(filename string) (s csource, err error) { if err != nil { return s, err } + w := bufio.NewWriter(fp) + w.WriteString(header) return csource{ filename: filename, file: fp, - writer: bufio.NewWriter(fp), + writer: w, }, nil } @@ -67,3 +77,130 @@ func (s *csource) flush() error { func (s *csource) include(name string) { fmt.Fprintf(s.writer, "#include \"%s\"\n", path.Join(srcdirname, name)) } + +func (s *csource) bytes(data []byte, final bool) { + if len(data) == 0 { + return + } + line := make([]byte, 0, width+8) + for i, x := range data { + cur := line + line = strconv.AppendUint(line, uint64(x), 10) + if i < len(data)-1 || !final { + line = append(line, ',') + } + if len(line) > width-4 { + s.writer.WriteString("\n\t") + s.writer.Write(cur) + nline := line[len(cur):] + copy(line, nline) + line = line[:len(nline)] + } + } + s.writer.WriteString("\n\t") + s.writer.Write(line) +} + +func (s *csource) ints(data []int) { + if len(data) == 0 { + return + } + line := make([]byte, 0, width+16) + for i, x := range data { + cur := line + line = strconv.AppendInt(line, int64(x), 10) + if i < len(data)-1 { + line = append(line, ',') + } + if len(line) > width-4 { + s.writer.WriteString("\n\t") + s.writer.Write(cur) + nline := line[len(cur):] + copy(line, nline) + line = line[:len(nline)] + } + } + s.writer.WriteString("\n\t") + s.writer.Write(line) +} + +func (s *csource) strings(data []string) { + for i, x := range data { + s.writer.WriteString("\n\t\"") + var last byte + for _, c := range []byte(x) { + if 32 <= c && c <= 126 { + if c == '\\' || c == '"' { + s.writer.WriteByte('\\') + } else if '0' <= c && c <= '9' && last == 0 && i == 0 { + s.writer.WriteString("00") + } + s.writer.WriteByte(c) + } else { + var e string + switch c { + case 0: + e = `\0` + case '\t': + e = `\t` + case '\n': + e = `\n` + case '\r': + e = `\r` + } + if e == "" { + fmt.Fprintf(s.writer, "\\x%02x", c) + } else { + s.writer.WriteString(e) + } + } + last = c + } + if i < len(data)-1 { + s.writer.WriteString(`\0`) + } + s.writer.WriteByte('"') + } +} + +func intType(maxval int) string { + if maxval <= math.MaxUint8 { + return "UInt8" + } + if maxval <= math.MaxUint16 { + return "UInt16" + } + return "UInt32" +} + +func arrayIntType(arr []int) string { + var max int + for _, x := range arr { + if x > max { + max = x + } + } + return intType(max) +} + +type stringtable struct { + data []string + offset int + offsets map[string]int +} + +func newStringtable() (s stringtable) { + s.offsets = make(map[string]int) + return +} + +func (t *stringtable) add(s string) int { + if offset, exist := t.offsets[s]; exist { + return offset + } + t.data = append(t.data, s) + offset := t.offset + t.offset += len(s) + 1 + t.offsets[s] = offset + return offset +}