diff --git a/convert/BUILD.bazel b/convert/BUILD.bazel index f4b0b68..ee554f1 100644 --- a/convert/BUILD.bazel +++ b/convert/BUILD.bazel @@ -1,20 +1,6 @@ load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load("//bazel:copts.bzl", "COPTS") -_data = [ - "charmap_roman.dat", - "charmap_turkish.dat", - "charmap_croatian.dat", - "charmap_iceland.dat", - "charmap_romanian.dat", - "charmap_celtic.dat", - "charmap_gaelic.dat", - "charmap_greek.dat", - "charmap_cyrillic.dat", - "charmap_inuit.dat", - "charmap_centeuro.dat", -] - genrule( name = "data", srcs = [ @@ -22,11 +8,12 @@ genrule( "//scripts:data", ], outs = [ - "charmap.c", + "charmap_data.c", + "charmap_info.c", + "charmap_region.c", "charmap.r", - "charmap_name.c", - ] + _data, - cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet", + ], + cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet -format=false", tools = [ "//gen:macscript", ], @@ -35,14 +22,19 @@ genrule( cc_library( name = "convert", srcs = [ - "charmap.c", + "charmap_data.c", + "charmap_info.c", + "charmap_region.c", "convert.c", - "convert.h", "convert_1f.c", "convert_1r.c", + "toolbox.c", + ], + hdrs = [ + "convert.h", + "data.h", "defs.h", "test.h", - "toolbox.c", ], copts = COPTS, ) @@ -50,11 +42,9 @@ cc_library( cc_test( name = "convert_test", srcs = [ - "charmap_name.c", "convert_test.c", ], copts = COPTS, - data = _data, deps = [ ":convert", ], diff --git a/convert/convert_test.c b/convert/convert_test.c index e54785f..4e6fa4d 100644 --- a/convert/convert_test.c +++ b/convert/convert_test.c @@ -2,6 +2,7 @@ #define _XOPEN_SOURCE 500 #include "convert/convert.h" +#include "convert/data.h" #include "convert/test.h" #include @@ -65,65 +66,6 @@ static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...) } } -/* Read a file in its entirety. */ -static void ReadFile(const char *filename, void **datap, size_t *sizep) -{ - char fnbuf[128]; - FILE *fp = NULL; - char *buf = NULL, *newbuf; - size_t size, alloc, newalloc, amt; - int err; - - StringPrintf(fnbuf, sizeof(fnbuf), "convert/%s", filename); - - fp = fopen(fnbuf, "rb"); - if (fp == NULL) { - err = errno; - goto error; - } - buf = malloc(kInitialBufSize); - if (buf == NULL) { - err = errno; - goto error; - } - size = 0; - alloc = kInitialBufSize; - for (;;) { - if (size >= alloc) { - newalloc = alloc * 2; - newbuf = realloc(buf, newalloc); - if (newbuf == NULL) { - err = errno; - goto error; - } - alloc = newalloc; - buf = newbuf; - } - amt = fread(buf + size, 1, alloc - size, fp); - if (amt == 0) { - if (feof(fp)) { - break; - } - err = errno; - goto error; - } - size += amt; - } - fclose(fp); - *datap = buf; - *sizep = size; - return; - -error: - if (fp != NULL) { - fclose(fp); - } - if (buf != NULL) { - free(buf); - } - DieErrorf(err, "read %s", filename); -} - static UInt8 *gBuffer[3]; static void PrintQuotedString(const UInt8 *buf, int len) @@ -203,10 +145,8 @@ static const char *const kLineBreakData[4] = { static const char *const kLineBreakName[4] = {"keep", "LF", "CR", "CRLF"}; -static void TestConverter(const char *filename) +static void TestConverter(const char *name, struct CharmapData data) { - void *data; - size_t datasz; Ptr datap; Handle datah; struct Converter cf, cr, cc; @@ -218,24 +158,22 @@ static void TestConverter(const char *filename) UInt8 *optr, *oend; int lblen[4]; - data = NULL; cf.data = NULL; cr.data = NULL; - StringPrintf(gTestName, sizeof(gTestName), "%s", filename); + StringPrintf(gTestName, sizeof(gTestName), "%s", name); /* Load the converter into memory and build the conversion table. */ - ReadFile(filename, &data, &datasz); - datap = data; + datap = (void *)data.ptr; datah = &datap; - r = ConverterBuild(&cf, datah, datasz, kToUTF8, &err); + r = ConverterBuild(&cf, datah, data.size, kToUTF8, &err); if (r != 0) { - Failf("ConverterBuild: %s (to UTF-8): %s", filename, ErrorName(r)); + Failf("ConverterBuild: to UTF-8: %s", ErrorName(r)); goto done; } - r = ConverterBuild(&cr, datah, datasz, kFromUTF8, &err); + r = ConverterBuild(&cr, datah, data.size, kFromUTF8, &err); if (r != 0) { - Failf("ConverterBuild: %s (from UTF-8): %s", filename, ErrorName(r)); + Failf("ConverterBuild: from UTF-8: %s", ErrorName(r)); goto done; } @@ -269,7 +207,7 @@ static void TestConverter(const char *filename) } for (j = 1; j <= jmax; j++) { StringPrintf(gTestName, sizeof(gTestName), "%s reverse i=%d j=%d", - filename, i, j); + name, i, j); st.data = 0; iptr = gBuffer[1]; optr = gBuffer[2]; @@ -300,7 +238,7 @@ static void TestConverter(const char *filename) len0 = lblen[i]; /* Expected output */ for (j = 1; j < len1; j++) { StringPrintf(gTestName, sizeof(gTestName), - "%s %s linebreak %s split=%d", filename, + "%s %s linebreak %s split=%d", name, k == 0 ? "forward" : "backward", kLineBreakName[i], j); st.data = 0; @@ -323,7 +261,6 @@ static void TestConverter(const char *filename) } done: - free(data); if (cf.data != NULL) { DisposeHandle(cf.data); } @@ -335,7 +272,8 @@ done: int main(int argc, char **argv) { void *buf; - const char *filename; + struct CharmapData data; + const char *name; int i; (void)argc; @@ -350,16 +288,24 @@ int main(int argc, char **argv) } for (i = 0;; i++) { - filename = kCharsetFilename[i]; - if (filename == NULL) { + name = CharmapName(i); + if (name == NULL) { break; } - TestConverter(filename); + data = CharmapData(i); + if (data.ptr != NULL) { + TestConverter(name, data); + } } for (i = 0; i < 3; i++) { free(gBuffer[i]); } - return gFailCount == 0 ? 0 : 1; + if (gFailCount > 0) { + fputs("failed\n", stderr); + return 1; + } + fputs("ok\n", stderr); + return 0; } diff --git a/convert/data.h b/convert/data.h new file mode 100644 index 0000000..5dd2320 --- /dev/null +++ b/convert/data.h @@ -0,0 +1,25 @@ +#ifndef data_h +#define data_h +/* data.h - charmap data, not used for classic Mac OS builds */ +#include "convert/defs.h" + +/* Get the ID of the given character map. Return NULL if no such character map + exists. */ +const char *CharmapID(int cmap); + +/* Get the human-readable name fo the given character map. Return NULL if no + such character map exists. */ +const char *CharmapName(int cmap); + +/* Conversion table data. */ +struct CharmapData { + const UInt8 *ptr; + Size size; +}; + +/* Get the conversion table data for the given charmap. Returns an empty buffer + with a NULL pointer if the character map does not exist or if no conversion + table exists for that character map. */ +struct CharmapData CharmapData(int cmap); + +#endif diff --git a/convert/test.h b/convert/test.h index f04fbf6..1c8d4cc 100644 --- a/convert/test.h +++ b/convert/test.h @@ -4,9 +4,6 @@ #include "convert/defs.h" -/* List of all data files, terminated by NULL. */ -extern const char *const kCharsetFilename[]; - /* Print an error message and exit. */ void Dief(const char *msg, ...) __attribute__((noreturn, format(printf, 1, 2))); diff --git a/gen/BUILD.bazel b/gen/BUILD.bazel index c34114e..ffbe110 100644 --- a/gen/BUILD.bazel +++ b/gen/BUILD.bazel @@ -3,8 +3,8 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary") go_binary( name = "macscript", srcs = [ + "cdata.go", "data.go", - "filenames.go", "main.go", "rez.go", "scriptmap.go", diff --git a/gen/cdata.go b/gen/cdata.go new file mode 100644 index 0000000..e224b8e --- /dev/null +++ b/gen/cdata.go @@ -0,0 +1,117 @@ +package main + +import "fmt" + +const strlookup = `const char *%s(int cmap) +{ + if (cmap < 0 || CHARMAP_COUNT <= cmap) { + return 0; + } + return kCharmapText + %s[cmap]; +} +` + +const datalookup = `struct CharmapData CharmapData(int cmap) { + struct CharmapData data; + UInt32 off0, off1; + data.ptr = 0; + data.size = 0; + if (cmap < 0 || CHARMAP_COUNT <= cmap) { + return data; + } + off0 = kCharmapOffset[cmap]; + off1 = kCharmapOffset[cmap+1]; + if (off0 == off1) { + return data; + } + data.ptr = kCharmapData + off0; + data.size = off1 - off0; + return data; +} +` + +func writeInfo(d *scriptdata, filename string) error { + strs := newStringtable() + ids := make([]int, len(d.charmaps)) + names := make([]int, len(d.charmaps)) + for i, cm := range d.charmaps { + ids[i] = strs.add(cm.id) + names[i] = strs.add(cm.name) + } + + s, err := createCSource(filename) + if err != nil { + return err + } + + w := s.writer + s.include("data.h") + + w.WriteString(formatOff) + + fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps)) + + fmt.Fprintf(w, "static const char kCharmapText[] =") + s.strings(strs.data) + w.WriteString(";\n") + + fmt.Fprintf(w, "static const %s kCharmapIDs[CHARMAP_COUNT] = {", arrayIntType(ids)) + s.ints(ids) + w.WriteString("\n};\n") + + fmt.Fprintf(w, "static const %s kCharmapNames[CHARMAP_COUNT] = {", arrayIntType(ids)) + s.ints(ids) + w.WriteString("\n};\n") + + w.WriteString(formatOn) + + fmt.Fprintf(w, strlookup, "CharmapID", "kCharmapIDs") + fmt.Fprintf(w, strlookup, "CharmapName", "kCharmapNames") + + return s.flush() +} + +func writeData(d *scriptdata, filename string) error { + offsets := make([]int, len(d.charmaps)+1) + var offset, last int + for i, cm := range d.charmaps { + offsets[i] = offset + offset += len(cm.data) + if len(cm.data) != 0 { + last = i + } + } + offsets[len(offsets)-1] = offset + + s, err := createCSource(filename) + if err != nil { + return err + } + + w := s.writer + w.WriteString(formatOff) + s.include("data.h") + fmt.Fprintf(w, "#define CHARMAP_COUNT %d\n", len(d.charmaps)) + + fmt.Fprintf(w, "static const %s kCharmapOffset[CHARMAP_COUNT + 1] = {", arrayIntType(offsets)) + s.ints(offsets) + w.WriteString("\n};\n") + + w.WriteString("static const UInt8 kCharmapData[] = {") + for i, cm := range d.charmaps { + if len(cm.data) != 0 { + fmt.Fprintf(w, "\n\t/* %s */", cm.name) + s.bytes(cm.data, i == last) + if i != last { + w.WriteByte('\n') + } + } + } + w.WriteString("\n};\n") + + w.WriteString(formatOn) + + w.WriteString(datalookup) + + return s.flush() +} diff --git a/gen/data.go b/gen/data.go index fcbf05c..117bfa2 100644 --- a/gen/data.go +++ b/gen/data.go @@ -10,9 +10,19 @@ import ( "regexp" "strconv" "strings" + + "moria.us/macscript/charmap" + "moria.us/macscript/table" ) -var isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$") +var ( + isIdent = regexp.MustCompile("^[a-zA-Z][_a-zA-Z0-9]*$") + nonIdentPart = regexp.MustCompile("[^a-zA-Z0-9]+") +) + +func makeID(name string) string { + return nonIdentPart.ReplaceAllLiteralString(name, "") +} // A dataError indicates an error in the contents of one of the data files. type dataError struct { @@ -113,14 +123,16 @@ func readConsts(filename string) (m constmap, err error) { } type charmapinfo struct { - name string - file string - script int - regions []int + name string + filename string + id string + script int + regions []int + data []byte } // readCharmaps reads and parses the charmaps.csv file. -func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapinfo, error) { +func readCharmaps(srcdir, filename string, scripts, regions map[string]int) ([]charmapinfo, error) { fp, err := os.Open(filename) if err != nil { return nil, err @@ -152,9 +164,11 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin } index := len(arr) ifo := charmapinfo{ - name: row[0], - file: row[1], + name: row[0], + filename: strings.ToLower(strings.TrimSuffix(row[1], ".TXT")), + id: makeID(row[0]), } + file := row[1] sname := row[2] var e bool ifo.script, e = scripts[sname] @@ -178,15 +192,32 @@ func readCharmaps(filename string, scripts, regions map[string]int) ([]charmapin ifo.regions = append(ifo.regions, rg) case omap != index: line, _ := r.FieldPos(0) - return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)} + return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)} } } } else { if omap, e := gcharmaps[ifo.script]; e { line, _ := r.FieldPos(0) - return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previou charmaps: %q", arr[omap].name)} + return nil, &dataError{filename, line, 0, fmt.Errorf("charmap conflicts with previous charmaps: %q", arr[omap].name)} } } + if file != "" { + cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", file)) + if err != nil { + return nil, err + } + t, err := table.Create(cm) + if err != nil { + if e, ok := err.(*table.UnsupportedError); ok { + if !flagQuiet { + fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", file, e.Message) + } + continue + } + return nil, fmt.Errorf("%s: %v", file, err) + } + ifo.data = t.Data() + } arr = append(arr, ifo) } return arr, nil @@ -207,6 +238,6 @@ func readData(srcdir string) (d scriptdata, err error) { if err != nil { return d, err } - d.charmaps, err = readCharmaps(filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names) + d.charmaps, err = readCharmaps(srcdir, filepath.Join(srcdir, "scripts/charmap.csv"), d.scripts.names, d.regions.names) return } diff --git a/gen/filenames.go b/gen/filenames.go deleted file mode 100644 index a3aba0b..0000000 --- a/gen/filenames.go +++ /dev/null @@ -1,27 +0,0 @@ -package main - -import ( - "strconv" -) - -func writeFilenames(charmaps []string, filename string) error { - s, err := createCSource(filename) - if err != nil { - return err - } - - w := s.writer - w.WriteString(header) - s.include("test.h") - w.WriteString("const char *const kCharsetFilename[] = {\n") - for _, fn := range charmaps { - if fn != "" { - w.WriteByte('\t') - w.WriteString(strconv.Quote(fn)) - w.WriteString(",\n") - } - } - w.WriteString("\tNULL\n};\n") - - return s.flush() -} diff --git a/gen/main.go b/gen/main.go index a65ad3b..145a00c 100644 --- a/gen/main.go +++ b/gen/main.go @@ -7,10 +7,6 @@ import ( "io/ioutil" "os" "path/filepath" - "strings" - - "moria.us/macscript/charmap" - "moria.us/macscript/table" ) const ( @@ -57,37 +53,19 @@ func mainE() error { } // Compile and emit charmap data. - cms := make([]string, len(d.charmaps)) var hascmap bool - for i, c := range d.charmaps { - if c.file == "" { - continue - } - cm, err := charmap.ReadFile(filepath.Join(srcdir, "charmap", c.file)) - if err != nil { - return err - } - t, err := table.Create(cm) - if err != nil { - if e, ok := err.(*table.UnsupportedError); ok { - if !flagQuiet { - fmt.Fprintf(os.Stderr, "Warning: unsupported charmap %q: %s\n", c.file, e.Message) - } - continue + for _, c := range d.charmaps { + if len(c.data) != 0 { + name := "charmap_" + c.filename + ".dat" + fpath := filepath.Join(destdir, name) + if !flagQuiet { + fmt.Fprintln(os.Stderr, "Writing:", fpath) } - return fmt.Errorf("%s: %v", c.file, err) + if err := ioutil.WriteFile(fpath, c.data, 0666); err != nil { + return err + } + hascmap = true } - data := t.Data() - name := "charmap_" + strings.ToLower(strings.TrimSuffix(c.file, ".TXT")) + ".dat" - fpath := filepath.Join(destdir, name) - if !flagQuiet { - fmt.Fprintln(os.Stderr, "Writing:", fpath) - } - if err := ioutil.WriteFile(fpath, data, 0666); err != nil { - return err - } - cms[i] = name - hascmap = true } if !hascmap { return errors.New("could not compile any character map") @@ -95,13 +73,16 @@ func mainE() error { // Write generated output. m := genMap(&d) - if err := writeMap(&d, m, filepath.Join(destdir, "charmap.c")); err != nil { + if err := writeMap(&d, m, filepath.Join(destdir, "charmap_region.c")); err != nil { return err } - if err := writeFilenames(cms, filepath.Join(destdir, "charmap_name.c")); err != nil { + if err := writeInfo(&d, filepath.Join(destdir, "charmap_info.c")); err != nil { return err } - if err := writeRez(&d, cms, filepath.Join(destdir, "charmap.r")); err != nil { + if err := writeData(&d, filepath.Join(destdir, "charmap_data.c")); err != nil { + return err + } + if err := writeRez(&d, filepath.Join(destdir, "charmap.r")); err != nil { return err } return nil diff --git a/gen/rez.go b/gen/rez.go index 28537d6..4364151 100644 --- a/gen/rez.go +++ b/gen/rez.go @@ -41,7 +41,7 @@ func constStrings(c *constmap) []string { return r } -func writeRez(d *scriptdata, charmaps []string, filename string) error { +func writeRez(d *scriptdata, filename string) error { if !flagQuiet { fmt.Fprintln(os.Stderr, "Writing:", filename) } @@ -59,9 +59,9 @@ func writeRez(d *scriptdata, charmaps []string, filename string) error { writeStrings(w, `rSTRS_Charmaps, "Character Maps"`, charmapNames(d)) writeStrings(w, `rSTRS_Scripts, "Scripts"`, constStrings(&d.scripts)) writeStrings(w, `rSTRS_Regions, "Regions"`, constStrings(&d.regions)) - for i, cm := range charmaps { - if cm != "" { - fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, d.charmaps[i].name, cm) + for i, cm := range d.charmaps { + if cm.filename != "" { + fmt.Fprintf(w, "read 'cmap' (%d, %q) %q;\n", 128+i, cm.name, cm.filename) } } diff --git a/gen/source.go b/gen/source.go index e42f369..81cb494 100644 --- a/gen/source.go +++ b/gen/source.go @@ -3,9 +3,17 @@ package main import ( "bufio" "fmt" + "math" "os" "os/exec" "path" + "strconv" +) + +const ( + width = 80 + formatOff = "/* clang-format off */\n" + formatOn = "/* clang-format on */\n" ) type csource struct { @@ -23,10 +31,12 @@ func createCSource(filename string) (s csource, err error) { if err != nil { return s, err } + w := bufio.NewWriter(fp) + w.WriteString(header) return csource{ filename: filename, file: fp, - writer: bufio.NewWriter(fp), + writer: w, }, nil } @@ -67,3 +77,130 @@ func (s *csource) flush() error { func (s *csource) include(name string) { fmt.Fprintf(s.writer, "#include \"%s\"\n", path.Join(srcdirname, name)) } + +func (s *csource) bytes(data []byte, final bool) { + if len(data) == 0 { + return + } + line := make([]byte, 0, width+8) + for i, x := range data { + cur := line + line = strconv.AppendUint(line, uint64(x), 10) + if i < len(data)-1 || !final { + line = append(line, ',') + } + if len(line) > width-4 { + s.writer.WriteString("\n\t") + s.writer.Write(cur) + nline := line[len(cur):] + copy(line, nline) + line = line[:len(nline)] + } + } + s.writer.WriteString("\n\t") + s.writer.Write(line) +} + +func (s *csource) ints(data []int) { + if len(data) == 0 { + return + } + line := make([]byte, 0, width+16) + for i, x := range data { + cur := line + line = strconv.AppendInt(line, int64(x), 10) + if i < len(data)-1 { + line = append(line, ',') + } + if len(line) > width-4 { + s.writer.WriteString("\n\t") + s.writer.Write(cur) + nline := line[len(cur):] + copy(line, nline) + line = line[:len(nline)] + } + } + s.writer.WriteString("\n\t") + s.writer.Write(line) +} + +func (s *csource) strings(data []string) { + for i, x := range data { + s.writer.WriteString("\n\t\"") + var last byte + for _, c := range []byte(x) { + if 32 <= c && c <= 126 { + if c == '\\' || c == '"' { + s.writer.WriteByte('\\') + } else if '0' <= c && c <= '9' && last == 0 && i == 0 { + s.writer.WriteString("00") + } + s.writer.WriteByte(c) + } else { + var e string + switch c { + case 0: + e = `\0` + case '\t': + e = `\t` + case '\n': + e = `\n` + case '\r': + e = `\r` + } + if e == "" { + fmt.Fprintf(s.writer, "\\x%02x", c) + } else { + s.writer.WriteString(e) + } + } + last = c + } + if i < len(data)-1 { + s.writer.WriteString(`\0`) + } + s.writer.WriteByte('"') + } +} + +func intType(maxval int) string { + if maxval <= math.MaxUint8 { + return "UInt8" + } + if maxval <= math.MaxUint16 { + return "UInt16" + } + return "UInt32" +} + +func arrayIntType(arr []int) string { + var max int + for _, x := range arr { + if x > max { + max = x + } + } + return intType(max) +} + +type stringtable struct { + data []string + offset int + offsets map[string]int +} + +func newStringtable() (s stringtable) { + s.offsets = make(map[string]int) + return +} + +func (t *stringtable) add(s string) int { + if offset, exist := t.offsets[s]; exist { + return offset + } + t.data = append(t.data, s) + offset := t.offset + t.offset += len(s) + 1 + t.offsets[s] = offset + return offset +}