diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..bc20b44 --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,12 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": ["${workspaceFolder}"], + "cStandard": "c89", + "intelliSenseMode": "linux-clang-x64", + "compilerPath": "/usr/bin/clang" + } + ], + "version": 4 +} diff --git a/gen/BUILD.bazel b/gen/BUILD.bazel index c74aa10..c34114e 100644 --- a/gen/BUILD.bazel +++ b/gen/BUILD.bazel @@ -4,9 +4,11 @@ go_binary( name = "macscript", srcs = [ "data.go", + "filenames.go", "main.go", "rez.go", "scriptmap.go", + "source.go", ], visibility = ["//visibility:public"], deps = [ diff --git a/gen/filenames.go b/gen/filenames.go new file mode 100644 index 0000000..3ce2deb --- /dev/null +++ b/gen/filenames.go @@ -0,0 +1,26 @@ +package main + +import "strconv" + +func writeFilenames(charmaps []string, filename string) error { + s, err := createCSource(filename) + if err != nil { + return err + } + + w := s.writer + w.WriteString(header) + w.WriteString( + "#include \"src/test.h\"\n" + + "const char *const kCharsetFilename[] = {\n") + for _, fn := range charmaps { + if fn != "" { + w.WriteByte('\t') + w.WriteString(strconv.Quote(fn)) + w.WriteString(",\n") + } + } + w.WriteString("\tNULL\n};\n") + + return s.flush() +} diff --git a/gen/main.go b/gen/main.go index 2f7010b..2196355 100644 --- a/gen/main.go +++ b/gen/main.go @@ -16,9 +16,10 @@ import ( const header = "/* This file is automatically generated. */\n" var ( - flagDest string - flagSrc string - flagQuiet bool + flagDest string + flagSrc string + flagQuiet bool + flagFormat bool ) func getSrcdir() (string, error) { @@ -90,6 +91,9 @@ func mainE() error { if err := writeMap(&d, m, filepath.Join(destdir, "charmap.c")); err != nil { return err } + if err := writeFilenames(cms, filepath.Join(destdir, "charmap_name.c")); err != nil { + return err + } if err := writeRez(&d, cms, filepath.Join(destdir, "charmap.r")); err != nil { return err } @@ -100,6 +104,7 @@ func main() { flag.StringVar(&flagDest, "dest", "", "output directory") flag.StringVar(&flagSrc, "src", "", "source directory") flag.BoolVar(&flagQuiet, "quiet", false, "only output error messages") + flag.BoolVar(&flagFormat, "format", true, "run clang-format on C output") flag.Parse() if args := flag.Args(); len(args) != 0 { fmt.Fprintf(os.Stderr, "Error: unexpected argument: %q\n", args[0]) diff --git a/gen/scriptmap.go b/gen/scriptmap.go index a4a85c6..9cdd24f 100644 --- a/gen/scriptmap.go +++ b/gen/scriptmap.go @@ -1,10 +1,7 @@ package main import ( - "bufio" "fmt" - "os" - "os/exec" "sort" ) @@ -73,17 +70,13 @@ func genMap(d *scriptdata) []*scriptmap { // writeMap writes out a C function that returns the correct character map for a // given script and region. func writeMap(d *scriptdata, m []*scriptmap, filename string) error { - if !flagQuiet { - fmt.Fprintln(os.Stderr, "Writing:", filename) - } - - fp, err := os.Create(filename) + s, err := createCSource(filename) if err != nil { return err } - defer fp.Close() - w := bufio.NewWriter(fp) + defer s.close() + w := s.writer w.WriteString(header) w.WriteString( "#include \"src/convert.h\"\n" + @@ -120,17 +113,5 @@ func writeMap(d *scriptdata, m []*scriptmap, filename string) error { "}\n" + "}\n") - if err := w.Flush(); err != nil { - return err - } - if err := fp.Close(); err != nil { - return err - } - - cmd := exec.Command("clang-format", "-i", filename) - if err := cmd.Run(); err != nil { - fmt.Fprintln(os.Stderr, "Warning: clang-format failed:", err) - } - - return nil + return s.flush() } diff --git a/gen/source.go b/gen/source.go new file mode 100644 index 0000000..fe66eeb --- /dev/null +++ b/gen/source.go @@ -0,0 +1,64 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "os/exec" +) + +type csource struct { + filename string + file *os.File + writer *bufio.Writer +} + +func createCSource(filename string) (s csource, err error) { + if !flagQuiet { + fmt.Fprintln(os.Stderr, "Writing:", filename) + } + + fp, err := os.Create(filename) + if err != nil { + return s, err + } + return csource{ + filename: filename, + file: fp, + writer: bufio.NewWriter(fp), + }, nil +} + +func (s *csource) close() { + if s.file != nil { + s.file.Close() + s.file = nil + } + if s.filename != "" { + os.Remove(s.filename) + } +} + +func (s *csource) flush() error { + if s.file == nil { + panic("already closed") + } + err := s.writer.Flush() + s.writer = nil + if err != nil { + return err + } + err = s.file.Close() + s.file = nil + if err != nil { + return err + } + if flagFormat { + cmd := exec.Command("clang-format", "-i", s.filename) + if err := cmd.Run(); err != nil { + return err + } + } + s.filename = "" + return nil +} diff --git a/src/BUILD.bazel b/src/BUILD.bazel index d1a620e..f4b0b68 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -1,6 +1,20 @@ -load("@rules_cc//cc:defs.bzl", "cc_library") +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load("//bazel:copts.bzl", "COPTS") +_data = [ + "charmap_roman.dat", + "charmap_turkish.dat", + "charmap_croatian.dat", + "charmap_iceland.dat", + "charmap_romanian.dat", + "charmap_celtic.dat", + "charmap_gaelic.dat", + "charmap_greek.dat", + "charmap_cyrillic.dat", + "charmap_inuit.dat", + "charmap_centeuro.dat", +] + genrule( name = "data", srcs = [ @@ -10,18 +24,8 @@ genrule( outs = [ "charmap.c", "charmap.r", - "charmap_roman.dat", - "charmap_turkish.dat", - "charmap_croatian.dat", - "charmap_iceland.dat", - "charmap_romanian.dat", - "charmap_celtic.dat", - "charmap_gaelic.dat", - "charmap_greek.dat", - "charmap_cyrillic.dat", - "charmap_inuit.dat", - "charmap_centeuro.dat", - ], + "charmap_name.c", + ] + _data, cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet", tools = [ "//gen:macscript", @@ -32,9 +36,26 @@ cc_library( name = "convert", srcs = [ "charmap.c", + "convert.c", "convert.h", + "convert_1f.c", + "convert_1r.c", "defs.h", + "test.h", "toolbox.c", ], copts = COPTS, ) + +cc_test( + name = "convert_test", + srcs = [ + "charmap_name.c", + "convert_test.c", + ], + copts = COPTS, + data = _data, + deps = [ + ":convert", + ], +) diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..d8872a4 --- /dev/null +++ b/src/README.md @@ -0,0 +1,10 @@ +# Converter + +## Debugging + +Tests can be debugged with GDB: + +```shell +bazel build -c dbg //src:convert_test +gdb -ex 'dir .' -ex 'cd bazel-bin' bazel-bin/src/convert_test +``` diff --git a/src/convert.c b/src/convert.c new file mode 100644 index 0000000..b6aba4d --- /dev/null +++ b/src/convert.c @@ -0,0 +1,38 @@ +#include "src/convert.h" + +struct ConvertEngine { + ConvertBuildf build; + ConvertRunf run; +}; + +const struct ConvertEngine kEngines[][2] = { + {{Convert1fBuild, Convert1fRun}, {Convert1rBuild, Convert1rRun}}}; + +int ConverterBuild(struct Converter *c, Handle data, Size datasz, + ConvertDirection direction, OSErr *errp) +{ + int engine, r; + const struct ConvertEngine *funcs; + Handle out; + + if (datasz == 0) { + return kErrorBadData; + } + engine = (UInt8) * *data - 1; + if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) { + /* Invalid engine. */ + return kErrorBadData; + } + funcs = &kEngines[engine][direction]; + if (funcs->build == NULL || funcs->run == NULL) { + /* Invalid engine. */ + return kErrorBadData; + } + r = funcs->build(&out, data, datasz, errp); + if (r != 0) { + return r; + } + c->data = out; + c->run = funcs->run; + return 0; +} diff --git a/src/convert.h b/src/convert.h index b5ada64..7960af9 100644 --- a/src/convert.h +++ b/src/convert.h @@ -2,8 +2,94 @@ #define convert_h /* convert.h - character set conversion routines. */ +#include "src/defs.h" + +/* Error codes. */ +enum +{ + /* No error. */ + kErrorOK, + + /* Memory allocation failed. */ + kErrorNoMemory, + + /* Invaild table data. */ + kErrorBadData +}; + +enum +{ + /* Constants for CR and LF. Note that we should not use '\n' or '\r' + anywhere, because these character constants may have unexpected values on + certain old Mac OS compilers, depending on the compiler settings. In + particular, the values of '\n' and '\r' will be swapped. */ + kCharLF = 10, + kCharCR = 13, + + /* Constant for substitution character: '?'. */ + kCharSubstitute = 63 +}; + +typedef enum +{ + /* Don't translite line breaks. */ + kLineBreakKeep, + + /* Convert line breaks to LF. */ + kLineBreakLF, + + /* Convert line breaks to CR. */ + kLineBreakCR, + + /* Convert line breaks to CR LF. */ + kLineBreakCRLF +} LineBreakConversion; + +/* Directions that the converter runs in. */ +typedef enum +{ + kToUTF8, + kFromUTF8 +} ConvertDirection; + /* Get the character map used for the given Mac OS script and region codes. Return -1 if no known character map exists. */ int GetCharmap(int script, int region); +/* The state of a converter. Must be zeroed prior to first conversion. */ +struct ConverterState { + UInt32 data; +}; + +/* Implementation function for building a converter. */ +typedef int (*ConvertBuildf)(Handle *out, Handle data, Size datasz, + OSErr *errp); + +/* Implementation function for running a converter. */ +typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc, + struct ConverterState *stateptr, UInt8 **optr, + UInt8 *oend, const UInt8 **iptr, const UInt8 *iend); + +/* A converter. The converter can be freed by disposing the handle. */ +struct Converter { + Handle data; + ConvertRunf run; +}; + +/* Build a converter from the given conversion table data. */ +int ConverterBuild(struct Converter *c, Handle data, Size datasz, + ConvertDirection direction, OSErr *errp); + +/* Engine 1: extended ASCII */ + +int Convert1fBuild(Handle *out, Handle data, Size datasz, OSErr *errp); +void Convert1fRun(const void *cvtptr, LineBreakConversion lc, + struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend, + const UInt8 **iptr, const UInt8 *iend); + +int Convert1rBuild(Handle *out, Handle data, Size datasz, OSErr *errp); +void Convert1rRun(const void *cvtptr, LineBreakConversion lc, + struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend, + const UInt8 **iptr, const UInt8 *iend); + #endif diff --git a/src/convert_1f.c b/src/convert_1f.c new file mode 100644 index 0000000..f987ef5 --- /dev/null +++ b/src/convert_1f.c @@ -0,0 +1,127 @@ +/* convert_1f.c - Forward conversion from extended ASCII to UTF-8. */ +#include "src/convert.h" +#include "src/defs.h" + +struct Convert1fData { + /* Unicode characters, encoded in UTF-8, and packed MSB first. Always either + 2 bytes or 3 bytes. */ + UInt32 chars[128]; +}; + +struct Convert1fState { + UInt8 lastch; +}; + +int Convert1fBuild(Handle *out, Handle data, Size datasz, OSErr *errp) +{ + Handle h; + struct Convert1fData *cvt; + int i, n; + UInt32 uch; + const UInt8 *dptr, *dend; + + h = NewHandle(sizeof(struct Convert1fData)); + if (h == NULL) { + *errp = MemError(); + return kErrorNoMemory; + } + cvt = (void *)*h; + dptr = (void *)*data; + dptr++; + dend = dptr + datasz; + for (i = 0; i < 128; i++) { + if (dptr == dend) { + goto bad_table; + } + n = *dptr++; + if (n < 2 || 3 < n) { + goto bad_table; + } + if (dend - dptr < n) { + goto bad_table; + } + uch = 0; + while (n-- > 0) { + uch = (uch << 8) | *dptr++; + } + cvt->chars[i] = uch; + if (dptr == dend) { + goto bad_table; + } + n = *dptr++; + if (dend - dptr < n) { + goto bad_table; + } + dptr += n; + } + *out = h; + return 0; + +bad_table: + DisposeHandle(h); + return kErrorBadData; +} + +void Convert1fRun(const void *cvtptr, LineBreakConversion lc, + struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend, + const UInt8 **iptr, const UInt8 *iend) +{ + const struct Convert1fData *cvt = cvtptr; + struct Convert1fState *state = (struct Convert1fState *)stateptr; + UInt8 *opos = *optr; + const UInt8 *ipos = *iptr; + unsigned ch, lastch; + UInt32 uch; + + ch = state->lastch; + while (ipos < iend && oend - opos >= 3) { + lastch = ch; + ch = *ipos++; + if (ch < 128) { + if (ch == kCharLF || ch == kCharCR) { + /* Line breaks. */ + if (ch == kCharLF && lastch == kCharCR) { + if (lc == kLineBreakKeep) { + *opos++ = ch; + } + } else { + switch (lc) { + case kLineBreakKeep: + *opos++ = ch; + break; + case kLineBreakLF: + *opos++ = kCharLF; + break; + case kLineBreakCR: + *opos++ = kCharCR; + break; + case kLineBreakCRLF: + *opos++ = kCharCR; + *opos++ = kCharLF; + break; + } + } + } else { + /* ASCII characters. */ + *opos++ = ch; + } + } else { + /* Unicode characters. */ + uch = cvt->chars[ch - 128]; + if (uch > 0xffff) { + opos[0] = uch >> 16; + opos[1] = uch >> 8; + opos[2] = uch; + opos += 3; + } else { + opos[0] = uch >> 8; + opos[1] = uch; + opos += 2; + } + } + } + state->lastch = ch; + + *optr = opos; + *iptr = ipos; +} diff --git a/src/convert_1r.c b/src/convert_1r.c new file mode 100644 index 0000000..ba502c6 --- /dev/null +++ b/src/convert_1r.c @@ -0,0 +1,368 @@ +/* convert_1r.c - Reverse conversion from UTF-8 to extended ASCII. */ +#include "src/convert.h" +#include "src/defs.h" + +enum +{ + /* Maximum length of encoded character. */ + kMaxEncodedLength = 8, + + /* Initial number of nodes to allocate when building the tree. */ + kInitialTableAlloc = 8 +}; + +struct TEntry { + /* The output character, or zero if no output. */ + UInt8 output; + /* The next node, or zero if no next node. */ + UInt8 next; +}; + +/* A node for building the converter. */ +struct TNode { + struct TEntry entries[256]; +}; + +struct TTree { + struct TNode **nodes; + int count; +}; + +static int CreateTree(struct TTree *tree, Handle data, Size datasz, OSErr *errp) +{ + struct TNode **nodes, *node; + int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc; + unsigned ch; + OSErr err; + + /* Create a tree with a root node mapping all the ASCII characters except + NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as + no output. CR and LF are removed so they can be handled specially be the + decoder. */ + nodes = + (struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode)); + if (nodes == NULL) { + err = MemError(); + goto have_error; + } + nodecount = 1; + nodealloc = kInitialTableAlloc; + node = *nodes; + MemClear(node, sizeof(struct TNode)); + for (i = 0; i < 128; i++) { + node->entries[i].output = i; + } + node->entries[kCharLF].output = 0; + node->entries[kCharCR].output = 0; + + /* Parse the table data and build up a tree of TNode. */ + dpos = 1; + /* For each high character (128..255). */ + for (i = 0; i < 128; i++) { + /* For each encoding of that character. */ + for (j = 0; j < 2; j++) { + if (dpos >= datasz) { + goto bad_table; + } + enclen = (UInt8)(*data)[dpos++]; + if (enclen != 0) { + if (enclen < 2 || enclen > datasz - dpos || + enclen > kMaxEncodedLength) { + goto bad_table; + } + /* Iterate over all but last byte in encoding, to find the node + which will produce the decoded byte as output. */ + state = 0; + node = *nodes; + for (encend = dpos + enclen - 1; dpos < encend; dpos++) { + ch = (UInt8)(*data)[dpos]; + cur = state; + state = node->entries[ch].next; + if (state == 0) { + if (nodecount >= nodealloc) { + nodealloc *= 2; + SetHandleSize((Handle)nodes, + nodealloc * sizeof(struct TNode)); + err = MemError(); + if (err != 0) { + goto have_error; + } + node = *nodes + cur; + } + state = nodecount++; + node->entries[ch].next = state; + node = (*nodes) + state; + MemClear(node, sizeof(*node)); + } else { + node = *nodes + state; + } + } + ch = (UInt8)(*data)[dpos++]; + if (node->entries[ch].output != 0) { + goto bad_table; + } + node->entries[ch].output = i | 0x80; + } + } + } + SetHandleSize((Handle)nodes, nodecount * sizeof(struct TNode)); + tree->nodes = nodes; + tree->count = nodecount; + return 0; + +bad_table: + DisposeHandle((Handle)nodes); + return kErrorBadData; + +have_error: + DisposeHandle((Handle)nodes); + *errp = err; + return kErrorNoMemory; +} + +struct NodeInfo { + UInt8 min; + UInt8 max; + UInt16 offset; +}; + +struct CEntry { + UInt16 output; + UInt16 next; +}; + +/* A compressed table node. Followed by an array of centry. */ +struct CNode { + /* First byte in table. */ + UInt8 base; + /* Number of entries in table, minus one. */ + UInt8 span; +}; + +static int CompactTree(Handle *out, struct TNode **nodes, int nodecount, + OSErr *errp) +{ + Handle ctree; + struct TNode *node; + struct NodeInfo **infos, *info; + struct CNode *cnode; + struct CEntry *centry; + int i, j, min, max, count, next; + unsigned offset; + + /* Figure out where each compacted node will go. */ + infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount); + if (infos == NULL) { + *errp = MemError(); + return kErrorNoMemory; + } + offset = 0; + for (i = 0; i < nodecount; i++) { + node = *nodes + i; + min = 0; + while (node->entries[min].output == 0 && node->entries[min].next == 0) { + min++; + } + max = 255; + while (node->entries[max].output == 0 && node->entries[max].next == 0) { + max--; + } + info = *infos + i; + info->min = min; + info->max = max; + info->offset = offset; + count = max - min + 1; + offset += sizeof(struct CNode) + count * sizeof(struct CEntry); + } + + /* Create the compacted tree. */ + ctree = NewHandle(offset); + if (ctree == NULL) { + *errp = MemError(); + DisposeHandle((Handle)infos); + return kErrorNoMemory; + } + for (i = 0; i < nodecount; i++) { + node = *nodes + i; + info = *infos + i; + min = info->min; + max = info->max; + offset = info->offset; + cnode = (void *)(*ctree + offset); + cnode->base = min; + cnode->span = max - min; + centry = (void *)(*ctree + offset + sizeof(struct CNode)); + for (j = min; j <= max; j++) { + centry->output = node->entries[j].output; + next = node->entries[j].next; + if (next != 0) { + next = (*infos)[next].offset; + } + centry->next = next; + centry++; + } + } + + DisposeHandle((Handle)infos); + *out = ctree; + return 0; +} + +int Convert1rBuild(Handle *out, Handle data, Size datasz, OSErr *errp) +{ + struct TTree table; + int r; + + r = CreateTree(&table, data, datasz, errp); + if (r != 0) { + return r; + } + r = CompactTree(out, table.nodes, table.count, errp); + DisposeHandle((Handle)table.nodes); + return r; +} + +struct Convert1rState { + UInt8 lastch; + UInt8 output; + UInt16 tableoffset; +}; + +void Convert1rRun(const void *cvtptr, LineBreakConversion lc, + struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend, + const UInt8 **iptr, const UInt8 *iend) +{ + struct Convert1rState *state = (struct Convert1rState *)stateptr; + const struct CNode *node; + const struct CEntry *entry; + UInt8 *opos = *optr; + const UInt8 *ipos = *iptr, *savein; + unsigned ch, lastch, chlen, output, saveout, toffset, savetoffset; + + ch = state->lastch; + savein = ipos; + saveout = state->output; + toffset = state->tableoffset; + savetoffset = toffset; + if (oend - opos < 2) { + goto done; + } + goto resume; + +next_out: + if (oend - opos < 2) { + goto done; + } + + /* Follow state machine to the end. */ + savein = ipos; + saveout = 0; + toffset = 0; + savetoffset = 0; +resume: + for (;;) { + if (ipos >= iend) { + goto done; + } + lastch = ch; + ch = *ipos++; + + node = (const void *)((const UInt8 *)cvtptr + toffset); + ch -= node->base; + if (ch > node->span) { + toffset = 0; + goto bad_char; + } + entry = + (const void *)((const UInt8 *)cvtptr + toffset + + sizeof(struct CNode) + ch * sizeof(struct CEntry)); + output = entry->output; + toffset = entry->next; + if (toffset == 0) { + /* Reached end of tree. */ + if (output == 0) { + goto bad_char; + } + *opos++ = output; + goto next_out; + } + if (output != 0) { + /* Can produce output here, or can consume more input. We try + consuming more input, but save the state to rewind if that + fails. */ + savein = ipos; + saveout = output; + savetoffset = toffset; + } + } + +bad_char: + /* Bad character. Back up and try again. */ + ipos = savein; + if (saveout != 0) { + /* Produce saved output. */ + *opos++ = saveout; + ch = 0; + } else { + /* No saved output, this really is a bad character. Consume one + UTF-8 character, emit it as a fallback, and continue. */ + ch = *ipos++; + if ((ch & 0x80) == 0) { + /* ASCII character: either NUL, CR, or LF, because only + these + characters will result in a transition to state 0. */ + if (ch == 0) { + *opos++ = ch; + } else if (ch == kCharLF && lastch == kCharCR) { + if (lc == kLineBreakKeep) { + *opos++ = ch; + } + } else { + switch (lc) { + case kLineBreakKeep: + *opos++ = ch; + break; + case kLineBreakLF: + *opos++ = kCharLF; + break; + case kLineBreakCR: + *opos++ = kCharCR; + break; + case kLineBreakCRLF: + *opos++ = kCharCR; + *opos++ = kCharLF; + break; + } + } + } else { + if ((ch & 0xe0) == 0xc0) { + chlen = 1; + } else if ((ch & 0xf0) == 0xe0) { + chlen = 2; + } else if ((ch & 0xf8) == 0xf0) { + chlen = 3; + } else { + chlen = 0; + } + for (; chlen > 0; chlen--) { + if (ipos == iend) { + goto done; + } + ch = *ipos; + if ((ch & 0xc0) != 0x80) { + break; + } + ipos++; + } + *opos++ = kCharSubstitute; + } + } + goto next_out; + +done: + state->lastch = ch; + state->output = saveout; + state->tableoffset = savetoffset; + *optr = opos; + *iptr = savein; +} diff --git a/src/convert_test.c b/src/convert_test.c new file mode 100644 index 0000000..e4e1dff --- /dev/null +++ b/src/convert_test.c @@ -0,0 +1,321 @@ +/* Converter test. */ +#define _XOPEN_SOURCE 500 + +#include "src/convert.h" +#include "src/test.h" + +#include +#include +#include +#include +#include + +enum +{ + kInitialBufSize = 4 * 1024, + kConvertBufferSize = 1024 +}; + +static int gFailCount; +static char gTestName[128]; + +static void Failf(const char *msg, ...) __attribute__((format(printf, 1, 2))); + +static void Failf(const char *msg, ...) +{ + va_list ap; + + gFailCount++; + fputs("Error: ", stderr); + fputs(gTestName, stderr); + fputs(": ", stderr); + va_start(ap, msg); + vfprintf(stderr, msg, ap); + va_end(ap); + fputc('\n', stderr); + if (gFailCount >= 10) { + exit(1); + } +} + +static const char *const kErrorNames[] = {"ok", "no memory", "bad data"}; + +static const char *ErrorName(int err) +{ + if (err < 0 || (int)(sizeof(kErrorNames) / sizeof(*kErrorNames)) <= err) { + Dief("bad error code: %d", err); + } + return kErrorNames[err]; +} + +static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...) +{ + va_list ap; + int n; + + va_start(ap, fmt); + n = vsnprintf(dest, destsz, fmt, ap); + va_end(ap); + + if (n < 0 || n >= (int)destsz) { + Dief("snprintf: overflow"); + } +} + +/* Read a file in its entirety. */ +static void ReadFile(const char *filename, void **datap, size_t *sizep) +{ + char fnbuf[128]; + FILE *fp = NULL; + char *buf = NULL, *newbuf; + size_t size, alloc, newalloc, amt; + int err; + + StringPrintf(fnbuf, sizeof(fnbuf), "src/%s", filename); + + fp = fopen(fnbuf, "rb"); + if (fp == NULL) { + err = errno; + goto error; + } + buf = malloc(kInitialBufSize); + if (buf == NULL) { + err = errno; + goto error; + } + size = 0; + alloc = kInitialBufSize; + for (;;) { + if (size >= alloc) { + newalloc = alloc * 2; + newbuf = realloc(buf, newalloc); + if (newbuf == NULL) { + err = errno; + goto error; + } + alloc = newalloc; + buf = newbuf; + } + amt = fread(buf + size, 1, alloc - size, fp); + if (amt == 0) { + if (feof(fp)) { + break; + } + err = errno; + goto error; + } + size += amt; + } + fclose(fp); + *datap = buf; + *sizep = size; + return; + +error: + if (fp != NULL) { + fclose(fp); + } + if (buf != NULL) { + free(buf); + } + DieErrorf(err, "read %s", filename); +} + +static UInt8 *gBuffer[3]; + +static void PrintQuotedString(const UInt8 *buf, int len) +{ + int i, c; + + fputc('"', stderr); + for (i = 0; i < len; i++) { + c = buf[i]; + if (32 <= c && c <= 126) { + if (c == '\\' || c == '"') { + fputc('\\', stderr); + } + fputc(c, stderr); + } else { + fprintf(stderr, "\\x%02x", c); + } + } + fputc('"', stderr); +} + +static void Check(int len0, int len1, int len2) +{ + int i, n, col, diffcol, c1, c2; + + if (len0 == len2 && memcmp(gBuffer[0], gBuffer[2], len2) == 0) { + return; + } + Failf("incorrect output"); + n = len0; + if (n > len2) { + n = len2; + } + diffcol = -1; + col = 0; + for (i = 0; i < n; i++) { + c1 = gBuffer[0][i]; + c2 = gBuffer[2][i]; + if (c1 != c2) { + diffcol = col; + break; + } + if (32 <= c1 && c1 <= 126) { + col++; + if (c1 == '\\' || c1 == '"') { + col++; + } + } else { + col += 4; + } + } + fputs("Input: ", stderr); + PrintQuotedString(gBuffer[1], len1); + fputc('\n', stderr); + fputs("Expect: ", stderr); + PrintQuotedString(gBuffer[0], len0); + fputc('\n', stderr); + fputs("Output: ", stderr); + PrintQuotedString(gBuffer[2], len2); + fputc('\n', stderr); + if (diffcol >= 0) { + for (i = 0; i < diffcol + 9; i++) { + fputc(' ', stderr); + } + fputc('^', stderr); + } + fputc('\n', stderr); +} + +static void TestConverter(const char *filename) +{ + void *data; + size_t datasz; + Ptr datap; + Handle datah; + struct Converter cf, cr; + struct ConverterState st; + int r, i, j, jmax, len0, len1, len2; + OSErr err; + UInt8 *ptr; + const UInt8 *iptr, *iend; + UInt8 *optr, *oend; + + data = NULL; + cf.data = NULL; + cr.data = NULL; + + StringPrintf(gTestName, sizeof(gTestName), "%s", filename); + + /* Load the converter into memory and build the conversion table. */ + ReadFile(filename, &data, &datasz); + datap = data; + datah = &datap; + r = ConverterBuild(&cf, datah, datasz, kToUTF8, &err); + if (r != 0) { + Failf("ConverterBuild: %s (to UTF-8): %s", filename, ErrorName(r)); + goto done; + } + r = ConverterBuild(&cr, datah, datasz, kFromUTF8, &err); + if (r != 0) { + Failf("ConverterBuild: %s (from UTF-8): %s", filename, ErrorName(r)); + goto done; + } + + /* Create sample data to convert: 0-255, followed by 0. */ + len0 = 257; + ptr = gBuffer[0]; + for (i = 0; i < 256; i++) { + ptr[i] = i; + } + ptr[256] = 0; + + /* Convert sample data. */ + iptr = gBuffer[0]; + iend = iptr + 257; + optr = gBuffer[1]; + oend = optr + kConvertBufferSize; + st.data = 0; + cf.run(*cf.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend); + if (iptr != iend) { + Failf("some data failed to convert"); + goto done; + } + len1 = optr - gBuffer[1]; + + /* Convert back, in three calls. The middle call will be to a 1-4 byte slice + in the middle. */ + for (i = 1; i < len1 - 2; i++) { + jmax = len1 - i; + if (jmax > 4) { + jmax = 4; + } + for (j = 1; j <= jmax; j++) { + StringPrintf(gTestName, sizeof(gTestName), "%s reverse i=%d j=%d", + filename, i, j); + st.data = 0; + iptr = gBuffer[1]; + optr = gBuffer[2]; + oend = optr + kConvertBufferSize; + iend = gBuffer[1] + i; + cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend); + iend = gBuffer[1] + i + j; + cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend); + iend = gBuffer[1] + len1; + cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend); + if (iptr != iend) { + Failf("some data failed to convert"); + continue; + } + len2 = optr - gBuffer[2]; + Check(len0, len1, len2); + } + } + +done: + free(data); + if (cf.data != NULL) { + DisposeHandle(cf.data); + } + if (cr.data != NULL) { + DisposeHandle(cr.data); + } +} + +int main(int argc, char **argv) +{ + void *buf; + const char *filename; + int i; + + (void)argc; + (void)argv; + + for (i = 0; i < 3; i++) { + buf = malloc(kConvertBufferSize); + if (buf == NULL) { + DieErrorf(errno, "malloc"); + } + gBuffer[i] = buf; + } + + for (i = 0;; i++) { + filename = kCharsetFilename[i]; + if (filename == NULL) { + break; + } + TestConverter(filename); + } + + for (i = 0; i < 3; i++) { + free(gBuffer[i]); + } + + return gFailCount == 0 ? 0 : 1; +} diff --git a/src/test.h b/src/test.h new file mode 100644 index 0000000..46aeaaa --- /dev/null +++ b/src/test.h @@ -0,0 +1,17 @@ +#ifndef test_h +#define test_h +/* test.h - unit testing definitions. */ + +#include "src/defs.h" + +/* List of all data files, terminated by NULL. */ +extern const char *const kCharsetFilename[]; + +/* Print an error message and exit. */ +void Dief(const char *msg, ...) __attribute__((noreturn, format(printf, 1, 2))); + +/* Print an error message with an error code and exit. */ +void DieErrorf(int errcode, const char *msg, ...) + __attribute__((noreturn, format(printf, 2, 3))); + +#endif diff --git a/src/toolbox.c b/src/toolbox.c index 4a505bb..68658a3 100644 --- a/src/toolbox.c +++ b/src/toolbox.c @@ -3,17 +3,16 @@ This is used to run conversion tests on non-Mac OS systems to make development easier. These are not intended to make it possible to port the converter to non-Mac OS systems. */ -#include "defs.h" +#include "src/defs.h" +#include "src/test.h" #include #include #include #include -static void Dief(const char *msg, ...) - __attribute__((noreturn, format(printf, 1, 2))); - -static void Dief(const char *msg, ...) { +void Dief(const char *msg, ...) +{ va_list ap; fputs("Error: ", stderr); va_start(ap, msg); @@ -23,7 +22,21 @@ static void Dief(const char *msg, ...) { exit(1); } -Handle NewHandle(Size byteCount) { +void DieErrorf(int errcode, const char *msg, ...) +{ + va_list ap; + fputs("Error: ", stderr); + va_start(ap, msg); + vfprintf(stderr, msg, ap); + va_end(ap); + fputs(": ", stderr); + fputs(strerror(errcode), stderr); + fputc('\n', stderr); + exit(1); +} + +Handle NewHandle(Size byteCount) +{ Ptr p; Handle h; @@ -42,22 +55,26 @@ Handle NewHandle(Size byteCount) { return h; } -void HLock(Handle h) { +void HLock(Handle h) +{ (void)h; } -void HUnlock(Handle h) { +void HUnlock(Handle h) +{ (void)h; } -void DisposeHandle(Handle h) { +void DisposeHandle(Handle h) +{ if (h != NULL) { free(*h); free(h); } } -void SetHandleSize(Handle h, Size newSize) { +void SetHandleSize(Handle h, Size newSize) +{ Ptr p; if (h == NULL) { Dief("SetHandleSize: h = NULL"); @@ -69,11 +86,13 @@ void SetHandleSize(Handle h, Size newSize) { *h = p; } -OSErr MemError(void) { +OSErr MemError(void) +{ /* Memory allocation failures abort the program. */ return 0; } -void MemClear(void *ptr, Size size) { +void MemClear(void *ptr, Size size) +{ memset(ptr, 0, size); }