Implement extended ASCII converter engine

This adds support for the simplest 8-bit character encodings, which are
compatible with ASCII.
This commit is contained in:
Dietrich Epp 2022-03-23 01:26:25 -04:00
parent 612aad382f
commit c96bb9cd0a
15 changed files with 1148 additions and 51 deletions

12
.vscode/c_cpp_properties.json vendored Normal file
View File

@ -0,0 +1,12 @@
{
"configurations": [
{
"name": "Linux",
"includePath": ["${workspaceFolder}"],
"cStandard": "c89",
"intelliSenseMode": "linux-clang-x64",
"compilerPath": "/usr/bin/clang"
}
],
"version": 4
}

View File

@ -4,9 +4,11 @@ go_binary(
name = "macscript",
srcs = [
"data.go",
"filenames.go",
"main.go",
"rez.go",
"scriptmap.go",
"source.go",
],
visibility = ["//visibility:public"],
deps = [

26
gen/filenames.go Normal file
View File

@ -0,0 +1,26 @@
package main
import "strconv"
func writeFilenames(charmaps []string, filename string) error {
s, err := createCSource(filename)
if err != nil {
return err
}
w := s.writer
w.WriteString(header)
w.WriteString(
"#include \"src/test.h\"\n" +
"const char *const kCharsetFilename[] = {\n")
for _, fn := range charmaps {
if fn != "" {
w.WriteByte('\t')
w.WriteString(strconv.Quote(fn))
w.WriteString(",\n")
}
}
w.WriteString("\tNULL\n};\n")
return s.flush()
}

View File

@ -19,6 +19,7 @@ var (
flagDest string
flagSrc string
flagQuiet bool
flagFormat bool
)
func getSrcdir() (string, error) {
@ -90,6 +91,9 @@ func mainE() error {
if err := writeMap(&d, m, filepath.Join(destdir, "charmap.c")); err != nil {
return err
}
if err := writeFilenames(cms, filepath.Join(destdir, "charmap_name.c")); err != nil {
return err
}
if err := writeRez(&d, cms, filepath.Join(destdir, "charmap.r")); err != nil {
return err
}
@ -100,6 +104,7 @@ func main() {
flag.StringVar(&flagDest, "dest", "", "output directory")
flag.StringVar(&flagSrc, "src", "", "source directory")
flag.BoolVar(&flagQuiet, "quiet", false, "only output error messages")
flag.BoolVar(&flagFormat, "format", true, "run clang-format on C output")
flag.Parse()
if args := flag.Args(); len(args) != 0 {
fmt.Fprintf(os.Stderr, "Error: unexpected argument: %q\n", args[0])

View File

@ -1,10 +1,7 @@
package main
import (
"bufio"
"fmt"
"os"
"os/exec"
"sort"
)
@ -73,17 +70,13 @@ func genMap(d *scriptdata) []*scriptmap {
// writeMap writes out a C function that returns the correct character map for a
// given script and region.
func writeMap(d *scriptdata, m []*scriptmap, filename string) error {
if !flagQuiet {
fmt.Fprintln(os.Stderr, "Writing:", filename)
}
fp, err := os.Create(filename)
s, err := createCSource(filename)
if err != nil {
return err
}
defer fp.Close()
w := bufio.NewWriter(fp)
defer s.close()
w := s.writer
w.WriteString(header)
w.WriteString(
"#include \"src/convert.h\"\n" +
@ -120,17 +113,5 @@ func writeMap(d *scriptdata, m []*scriptmap, filename string) error {
"}\n" +
"}\n")
if err := w.Flush(); err != nil {
return err
}
if err := fp.Close(); err != nil {
return err
}
cmd := exec.Command("clang-format", "-i", filename)
if err := cmd.Run(); err != nil {
fmt.Fprintln(os.Stderr, "Warning: clang-format failed:", err)
}
return nil
return s.flush()
}

64
gen/source.go Normal file
View File

@ -0,0 +1,64 @@
package main
import (
"bufio"
"fmt"
"os"
"os/exec"
)
type csource struct {
filename string
file *os.File
writer *bufio.Writer
}
func createCSource(filename string) (s csource, err error) {
if !flagQuiet {
fmt.Fprintln(os.Stderr, "Writing:", filename)
}
fp, err := os.Create(filename)
if err != nil {
return s, err
}
return csource{
filename: filename,
file: fp,
writer: bufio.NewWriter(fp),
}, nil
}
func (s *csource) close() {
if s.file != nil {
s.file.Close()
s.file = nil
}
if s.filename != "" {
os.Remove(s.filename)
}
}
func (s *csource) flush() error {
if s.file == nil {
panic("already closed")
}
err := s.writer.Flush()
s.writer = nil
if err != nil {
return err
}
err = s.file.Close()
s.file = nil
if err != nil {
return err
}
if flagFormat {
cmd := exec.Command("clang-format", "-i", s.filename)
if err := cmd.Run(); err != nil {
return err
}
}
s.filename = ""
return nil
}

View File

@ -1,15 +1,7 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("//bazel:copts.bzl", "COPTS")
genrule(
name = "data",
srcs = [
"//charmap:data",
"//scripts:data",
],
outs = [
"charmap.c",
"charmap.r",
_data = [
"charmap_roman.dat",
"charmap_turkish.dat",
"charmap_croatian.dat",
@ -21,7 +13,19 @@ genrule(
"charmap_cyrillic.dat",
"charmap_inuit.dat",
"charmap_centeuro.dat",
]
genrule(
name = "data",
srcs = [
"//charmap:data",
"//scripts:data",
],
outs = [
"charmap.c",
"charmap.r",
"charmap_name.c",
] + _data,
cmd = "$(execpath //gen:macscript) -dest=$(RULEDIR) -src=. -quiet",
tools = [
"//gen:macscript",
@ -32,9 +36,26 @@ cc_library(
name = "convert",
srcs = [
"charmap.c",
"convert.c",
"convert.h",
"convert_1f.c",
"convert_1r.c",
"defs.h",
"test.h",
"toolbox.c",
],
copts = COPTS,
)
cc_test(
name = "convert_test",
srcs = [
"charmap_name.c",
"convert_test.c",
],
copts = COPTS,
data = _data,
deps = [
":convert",
],
)

10
src/README.md Normal file
View File

@ -0,0 +1,10 @@
# Converter
## Debugging
Tests can be debugged with GDB:
```shell
bazel build -c dbg //src:convert_test
gdb -ex 'dir .' -ex 'cd bazel-bin' bazel-bin/src/convert_test
```

38
src/convert.c Normal file
View File

@ -0,0 +1,38 @@
#include "src/convert.h"
struct ConvertEngine {
ConvertBuildf build;
ConvertRunf run;
};
const struct ConvertEngine kEngines[][2] = {
{{Convert1fBuild, Convert1fRun}, {Convert1rBuild, Convert1rRun}}};
int ConverterBuild(struct Converter *c, Handle data, Size datasz,
ConvertDirection direction, OSErr *errp)
{
int engine, r;
const struct ConvertEngine *funcs;
Handle out;
if (datasz == 0) {
return kErrorBadData;
}
engine = (UInt8) * *data - 1;
if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) {
/* Invalid engine. */
return kErrorBadData;
}
funcs = &kEngines[engine][direction];
if (funcs->build == NULL || funcs->run == NULL) {
/* Invalid engine. */
return kErrorBadData;
}
r = funcs->build(&out, data, datasz, errp);
if (r != 0) {
return r;
}
c->data = out;
c->run = funcs->run;
return 0;
}

View File

@ -2,8 +2,94 @@
#define convert_h
/* convert.h - character set conversion routines. */
#include "src/defs.h"
/* Error codes. */
enum
{
/* No error. */
kErrorOK,
/* Memory allocation failed. */
kErrorNoMemory,
/* Invaild table data. */
kErrorBadData
};
enum
{
/* Constants for CR and LF. Note that we should not use '\n' or '\r'
anywhere, because these character constants may have unexpected values on
certain old Mac OS compilers, depending on the compiler settings. In
particular, the values of '\n' and '\r' will be swapped. */
kCharLF = 10,
kCharCR = 13,
/* Constant for substitution character: '?'. */
kCharSubstitute = 63
};
typedef enum
{
/* Don't translite line breaks. */
kLineBreakKeep,
/* Convert line breaks to LF. */
kLineBreakLF,
/* Convert line breaks to CR. */
kLineBreakCR,
/* Convert line breaks to CR LF. */
kLineBreakCRLF
} LineBreakConversion;
/* Directions that the converter runs in. */
typedef enum
{
kToUTF8,
kFromUTF8
} ConvertDirection;
/* Get the character map used for the given Mac OS script and region codes.
Return -1 if no known character map exists. */
int GetCharmap(int script, int region);
/* The state of a converter. Must be zeroed prior to first conversion. */
struct ConverterState {
UInt32 data;
};
/* Implementation function for building a converter. */
typedef int (*ConvertBuildf)(Handle *out, Handle data, Size datasz,
OSErr *errp);
/* Implementation function for running a converter. */
typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr,
UInt8 *oend, const UInt8 **iptr, const UInt8 *iend);
/* A converter. The converter can be freed by disposing the handle. */
struct Converter {
Handle data;
ConvertRunf run;
};
/* Build a converter from the given conversion table data. */
int ConverterBuild(struct Converter *c, Handle data, Size datasz,
ConvertDirection direction, OSErr *errp);
/* Engine 1: extended ASCII */
int Convert1fBuild(Handle *out, Handle data, Size datasz, OSErr *errp);
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
const UInt8 **iptr, const UInt8 *iend);
int Convert1rBuild(Handle *out, Handle data, Size datasz, OSErr *errp);
void Convert1rRun(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
const UInt8 **iptr, const UInt8 *iend);
#endif

127
src/convert_1f.c Normal file
View File

@ -0,0 +1,127 @@
/* convert_1f.c - Forward conversion from extended ASCII to UTF-8. */
#include "src/convert.h"
#include "src/defs.h"
struct Convert1fData {
/* Unicode characters, encoded in UTF-8, and packed MSB first. Always either
2 bytes or 3 bytes. */
UInt32 chars[128];
};
struct Convert1fState {
UInt8 lastch;
};
int Convert1fBuild(Handle *out, Handle data, Size datasz, OSErr *errp)
{
Handle h;
struct Convert1fData *cvt;
int i, n;
UInt32 uch;
const UInt8 *dptr, *dend;
h = NewHandle(sizeof(struct Convert1fData));
if (h == NULL) {
*errp = MemError();
return kErrorNoMemory;
}
cvt = (void *)*h;
dptr = (void *)*data;
dptr++;
dend = dptr + datasz;
for (i = 0; i < 128; i++) {
if (dptr == dend) {
goto bad_table;
}
n = *dptr++;
if (n < 2 || 3 < n) {
goto bad_table;
}
if (dend - dptr < n) {
goto bad_table;
}
uch = 0;
while (n-- > 0) {
uch = (uch << 8) | *dptr++;
}
cvt->chars[i] = uch;
if (dptr == dend) {
goto bad_table;
}
n = *dptr++;
if (dend - dptr < n) {
goto bad_table;
}
dptr += n;
}
*out = h;
return 0;
bad_table:
DisposeHandle(h);
return kErrorBadData;
}
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
const UInt8 **iptr, const UInt8 *iend)
{
const struct Convert1fData *cvt = cvtptr;
struct Convert1fState *state = (struct Convert1fState *)stateptr;
UInt8 *opos = *optr;
const UInt8 *ipos = *iptr;
unsigned ch, lastch;
UInt32 uch;
ch = state->lastch;
while (ipos < iend && oend - opos >= 3) {
lastch = ch;
ch = *ipos++;
if (ch < 128) {
if (ch == kCharLF || ch == kCharCR) {
/* Line breaks. */
if (ch == kCharLF && lastch == kCharCR) {
if (lc == kLineBreakKeep) {
*opos++ = ch;
}
} else {
switch (lc) {
case kLineBreakKeep:
*opos++ = ch;
break;
case kLineBreakLF:
*opos++ = kCharLF;
break;
case kLineBreakCR:
*opos++ = kCharCR;
break;
case kLineBreakCRLF:
*opos++ = kCharCR;
*opos++ = kCharLF;
break;
}
}
} else {
/* ASCII characters. */
*opos++ = ch;
}
} else {
/* Unicode characters. */
uch = cvt->chars[ch - 128];
if (uch > 0xffff) {
opos[0] = uch >> 16;
opos[1] = uch >> 8;
opos[2] = uch;
opos += 3;
} else {
opos[0] = uch >> 8;
opos[1] = uch;
opos += 2;
}
}
}
state->lastch = ch;
*optr = opos;
*iptr = ipos;
}

368
src/convert_1r.c Normal file
View File

@ -0,0 +1,368 @@
/* convert_1r.c - Reverse conversion from UTF-8 to extended ASCII. */
#include "src/convert.h"
#include "src/defs.h"
enum
{
/* Maximum length of encoded character. */
kMaxEncodedLength = 8,
/* Initial number of nodes to allocate when building the tree. */
kInitialTableAlloc = 8
};
struct TEntry {
/* The output character, or zero if no output. */
UInt8 output;
/* The next node, or zero if no next node. */
UInt8 next;
};
/* A node for building the converter. */
struct TNode {
struct TEntry entries[256];
};
struct TTree {
struct TNode **nodes;
int count;
};
static int CreateTree(struct TTree *tree, Handle data, Size datasz, OSErr *errp)
{
struct TNode **nodes, *node;
int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc;
unsigned ch;
OSErr err;
/* Create a tree with a root node mapping all the ASCII characters except
NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as
no output. CR and LF are removed so they can be handled specially be the
decoder. */
nodes =
(struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode));
if (nodes == NULL) {
err = MemError();
goto have_error;
}
nodecount = 1;
nodealloc = kInitialTableAlloc;
node = *nodes;
MemClear(node, sizeof(struct TNode));
for (i = 0; i < 128; i++) {
node->entries[i].output = i;
}
node->entries[kCharLF].output = 0;
node->entries[kCharCR].output = 0;
/* Parse the table data and build up a tree of TNode. */
dpos = 1;
/* For each high character (128..255). */
for (i = 0; i < 128; i++) {
/* For each encoding of that character. */
for (j = 0; j < 2; j++) {
if (dpos >= datasz) {
goto bad_table;
}
enclen = (UInt8)(*data)[dpos++];
if (enclen != 0) {
if (enclen < 2 || enclen > datasz - dpos ||
enclen > kMaxEncodedLength) {
goto bad_table;
}
/* Iterate over all but last byte in encoding, to find the node
which will produce the decoded byte as output. */
state = 0;
node = *nodes;
for (encend = dpos + enclen - 1; dpos < encend; dpos++) {
ch = (UInt8)(*data)[dpos];
cur = state;
state = node->entries[ch].next;
if (state == 0) {
if (nodecount >= nodealloc) {
nodealloc *= 2;
SetHandleSize((Handle)nodes,
nodealloc * sizeof(struct TNode));
err = MemError();
if (err != 0) {
goto have_error;
}
node = *nodes + cur;
}
state = nodecount++;
node->entries[ch].next = state;
node = (*nodes) + state;
MemClear(node, sizeof(*node));
} else {
node = *nodes + state;
}
}
ch = (UInt8)(*data)[dpos++];
if (node->entries[ch].output != 0) {
goto bad_table;
}
node->entries[ch].output = i | 0x80;
}
}
}
SetHandleSize((Handle)nodes, nodecount * sizeof(struct TNode));
tree->nodes = nodes;
tree->count = nodecount;
return 0;
bad_table:
DisposeHandle((Handle)nodes);
return kErrorBadData;
have_error:
DisposeHandle((Handle)nodes);
*errp = err;
return kErrorNoMemory;
}
struct NodeInfo {
UInt8 min;
UInt8 max;
UInt16 offset;
};
struct CEntry {
UInt16 output;
UInt16 next;
};
/* A compressed table node. Followed by an array of centry. */
struct CNode {
/* First byte in table. */
UInt8 base;
/* Number of entries in table, minus one. */
UInt8 span;
};
static int CompactTree(Handle *out, struct TNode **nodes, int nodecount,
OSErr *errp)
{
Handle ctree;
struct TNode *node;
struct NodeInfo **infos, *info;
struct CNode *cnode;
struct CEntry *centry;
int i, j, min, max, count, next;
unsigned offset;
/* Figure out where each compacted node will go. */
infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount);
if (infos == NULL) {
*errp = MemError();
return kErrorNoMemory;
}
offset = 0;
for (i = 0; i < nodecount; i++) {
node = *nodes + i;
min = 0;
while (node->entries[min].output == 0 && node->entries[min].next == 0) {
min++;
}
max = 255;
while (node->entries[max].output == 0 && node->entries[max].next == 0) {
max--;
}
info = *infos + i;
info->min = min;
info->max = max;
info->offset = offset;
count = max - min + 1;
offset += sizeof(struct CNode) + count * sizeof(struct CEntry);
}
/* Create the compacted tree. */
ctree = NewHandle(offset);
if (ctree == NULL) {
*errp = MemError();
DisposeHandle((Handle)infos);
return kErrorNoMemory;
}
for (i = 0; i < nodecount; i++) {
node = *nodes + i;
info = *infos + i;
min = info->min;
max = info->max;
offset = info->offset;
cnode = (void *)(*ctree + offset);
cnode->base = min;
cnode->span = max - min;
centry = (void *)(*ctree + offset + sizeof(struct CNode));
for (j = min; j <= max; j++) {
centry->output = node->entries[j].output;
next = node->entries[j].next;
if (next != 0) {
next = (*infos)[next].offset;
}
centry->next = next;
centry++;
}
}
DisposeHandle((Handle)infos);
*out = ctree;
return 0;
}
int Convert1rBuild(Handle *out, Handle data, Size datasz, OSErr *errp)
{
struct TTree table;
int r;
r = CreateTree(&table, data, datasz, errp);
if (r != 0) {
return r;
}
r = CompactTree(out, table.nodes, table.count, errp);
DisposeHandle((Handle)table.nodes);
return r;
}
struct Convert1rState {
UInt8 lastch;
UInt8 output;
UInt16 tableoffset;
};
void Convert1rRun(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
const UInt8 **iptr, const UInt8 *iend)
{
struct Convert1rState *state = (struct Convert1rState *)stateptr;
const struct CNode *node;
const struct CEntry *entry;
UInt8 *opos = *optr;
const UInt8 *ipos = *iptr, *savein;
unsigned ch, lastch, chlen, output, saveout, toffset, savetoffset;
ch = state->lastch;
savein = ipos;
saveout = state->output;
toffset = state->tableoffset;
savetoffset = toffset;
if (oend - opos < 2) {
goto done;
}
goto resume;
next_out:
if (oend - opos < 2) {
goto done;
}
/* Follow state machine to the end. */
savein = ipos;
saveout = 0;
toffset = 0;
savetoffset = 0;
resume:
for (;;) {
if (ipos >= iend) {
goto done;
}
lastch = ch;
ch = *ipos++;
node = (const void *)((const UInt8 *)cvtptr + toffset);
ch -= node->base;
if (ch > node->span) {
toffset = 0;
goto bad_char;
}
entry =
(const void *)((const UInt8 *)cvtptr + toffset +
sizeof(struct CNode) + ch * sizeof(struct CEntry));
output = entry->output;
toffset = entry->next;
if (toffset == 0) {
/* Reached end of tree. */
if (output == 0) {
goto bad_char;
}
*opos++ = output;
goto next_out;
}
if (output != 0) {
/* Can produce output here, or can consume more input. We try
consuming more input, but save the state to rewind if that
fails. */
savein = ipos;
saveout = output;
savetoffset = toffset;
}
}
bad_char:
/* Bad character. Back up and try again. */
ipos = savein;
if (saveout != 0) {
/* Produce saved output. */
*opos++ = saveout;
ch = 0;
} else {
/* No saved output, this really is a bad character. Consume one
UTF-8 character, emit it as a fallback, and continue. */
ch = *ipos++;
if ((ch & 0x80) == 0) {
/* ASCII character: either NUL, CR, or LF, because only
these
characters will result in a transition to state 0. */
if (ch == 0) {
*opos++ = ch;
} else if (ch == kCharLF && lastch == kCharCR) {
if (lc == kLineBreakKeep) {
*opos++ = ch;
}
} else {
switch (lc) {
case kLineBreakKeep:
*opos++ = ch;
break;
case kLineBreakLF:
*opos++ = kCharLF;
break;
case kLineBreakCR:
*opos++ = kCharCR;
break;
case kLineBreakCRLF:
*opos++ = kCharCR;
*opos++ = kCharLF;
break;
}
}
} else {
if ((ch & 0xe0) == 0xc0) {
chlen = 1;
} else if ((ch & 0xf0) == 0xe0) {
chlen = 2;
} else if ((ch & 0xf8) == 0xf0) {
chlen = 3;
} else {
chlen = 0;
}
for (; chlen > 0; chlen--) {
if (ipos == iend) {
goto done;
}
ch = *ipos;
if ((ch & 0xc0) != 0x80) {
break;
}
ipos++;
}
*opos++ = kCharSubstitute;
}
}
goto next_out;
done:
state->lastch = ch;
state->output = saveout;
state->tableoffset = savetoffset;
*optr = opos;
*iptr = savein;
}

321
src/convert_test.c Normal file
View File

@ -0,0 +1,321 @@
/* Converter test. */
#define _XOPEN_SOURCE 500
#include "src/convert.h"
#include "src/test.h"
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum
{
kInitialBufSize = 4 * 1024,
kConvertBufferSize = 1024
};
static int gFailCount;
static char gTestName[128];
static void Failf(const char *msg, ...) __attribute__((format(printf, 1, 2)));
static void Failf(const char *msg, ...)
{
va_list ap;
gFailCount++;
fputs("Error: ", stderr);
fputs(gTestName, stderr);
fputs(": ", stderr);
va_start(ap, msg);
vfprintf(stderr, msg, ap);
va_end(ap);
fputc('\n', stderr);
if (gFailCount >= 10) {
exit(1);
}
}
static const char *const kErrorNames[] = {"ok", "no memory", "bad data"};
static const char *ErrorName(int err)
{
if (err < 0 || (int)(sizeof(kErrorNames) / sizeof(*kErrorNames)) <= err) {
Dief("bad error code: %d", err);
}
return kErrorNames[err];
}
static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...)
__attribute__((format(printf, 3, 4)));
static void StringPrintf(char *dest, size_t destsz, const char *fmt, ...)
{
va_list ap;
int n;
va_start(ap, fmt);
n = vsnprintf(dest, destsz, fmt, ap);
va_end(ap);
if (n < 0 || n >= (int)destsz) {
Dief("snprintf: overflow");
}
}
/* Read a file in its entirety. */
static void ReadFile(const char *filename, void **datap, size_t *sizep)
{
char fnbuf[128];
FILE *fp = NULL;
char *buf = NULL, *newbuf;
size_t size, alloc, newalloc, amt;
int err;
StringPrintf(fnbuf, sizeof(fnbuf), "src/%s", filename);
fp = fopen(fnbuf, "rb");
if (fp == NULL) {
err = errno;
goto error;
}
buf = malloc(kInitialBufSize);
if (buf == NULL) {
err = errno;
goto error;
}
size = 0;
alloc = kInitialBufSize;
for (;;) {
if (size >= alloc) {
newalloc = alloc * 2;
newbuf = realloc(buf, newalloc);
if (newbuf == NULL) {
err = errno;
goto error;
}
alloc = newalloc;
buf = newbuf;
}
amt = fread(buf + size, 1, alloc - size, fp);
if (amt == 0) {
if (feof(fp)) {
break;
}
err = errno;
goto error;
}
size += amt;
}
fclose(fp);
*datap = buf;
*sizep = size;
return;
error:
if (fp != NULL) {
fclose(fp);
}
if (buf != NULL) {
free(buf);
}
DieErrorf(err, "read %s", filename);
}
static UInt8 *gBuffer[3];
static void PrintQuotedString(const UInt8 *buf, int len)
{
int i, c;
fputc('"', stderr);
for (i = 0; i < len; i++) {
c = buf[i];
if (32 <= c && c <= 126) {
if (c == '\\' || c == '"') {
fputc('\\', stderr);
}
fputc(c, stderr);
} else {
fprintf(stderr, "\\x%02x", c);
}
}
fputc('"', stderr);
}
static void Check(int len0, int len1, int len2)
{
int i, n, col, diffcol, c1, c2;
if (len0 == len2 && memcmp(gBuffer[0], gBuffer[2], len2) == 0) {
return;
}
Failf("incorrect output");
n = len0;
if (n > len2) {
n = len2;
}
diffcol = -1;
col = 0;
for (i = 0; i < n; i++) {
c1 = gBuffer[0][i];
c2 = gBuffer[2][i];
if (c1 != c2) {
diffcol = col;
break;
}
if (32 <= c1 && c1 <= 126) {
col++;
if (c1 == '\\' || c1 == '"') {
col++;
}
} else {
col += 4;
}
}
fputs("Input: ", stderr);
PrintQuotedString(gBuffer[1], len1);
fputc('\n', stderr);
fputs("Expect: ", stderr);
PrintQuotedString(gBuffer[0], len0);
fputc('\n', stderr);
fputs("Output: ", stderr);
PrintQuotedString(gBuffer[2], len2);
fputc('\n', stderr);
if (diffcol >= 0) {
for (i = 0; i < diffcol + 9; i++) {
fputc(' ', stderr);
}
fputc('^', stderr);
}
fputc('\n', stderr);
}
static void TestConverter(const char *filename)
{
void *data;
size_t datasz;
Ptr datap;
Handle datah;
struct Converter cf, cr;
struct ConverterState st;
int r, i, j, jmax, len0, len1, len2;
OSErr err;
UInt8 *ptr;
const UInt8 *iptr, *iend;
UInt8 *optr, *oend;
data = NULL;
cf.data = NULL;
cr.data = NULL;
StringPrintf(gTestName, sizeof(gTestName), "%s", filename);
/* Load the converter into memory and build the conversion table. */
ReadFile(filename, &data, &datasz);
datap = data;
datah = &datap;
r = ConverterBuild(&cf, datah, datasz, kToUTF8, &err);
if (r != 0) {
Failf("ConverterBuild: %s (to UTF-8): %s", filename, ErrorName(r));
goto done;
}
r = ConverterBuild(&cr, datah, datasz, kFromUTF8, &err);
if (r != 0) {
Failf("ConverterBuild: %s (from UTF-8): %s", filename, ErrorName(r));
goto done;
}
/* Create sample data to convert: 0-255, followed by 0. */
len0 = 257;
ptr = gBuffer[0];
for (i = 0; i < 256; i++) {
ptr[i] = i;
}
ptr[256] = 0;
/* Convert sample data. */
iptr = gBuffer[0];
iend = iptr + 257;
optr = gBuffer[1];
oend = optr + kConvertBufferSize;
st.data = 0;
cf.run(*cf.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
if (iptr != iend) {
Failf("some data failed to convert");
goto done;
}
len1 = optr - gBuffer[1];
/* Convert back, in three calls. The middle call will be to a 1-4 byte slice
in the middle. */
for (i = 1; i < len1 - 2; i++) {
jmax = len1 - i;
if (jmax > 4) {
jmax = 4;
}
for (j = 1; j <= jmax; j++) {
StringPrintf(gTestName, sizeof(gTestName), "%s reverse i=%d j=%d",
filename, i, j);
st.data = 0;
iptr = gBuffer[1];
optr = gBuffer[2];
oend = optr + kConvertBufferSize;
iend = gBuffer[1] + i;
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
iend = gBuffer[1] + i + j;
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
iend = gBuffer[1] + len1;
cr.run(*cr.data, kLineBreakKeep, &st, &optr, oend, &iptr, iend);
if (iptr != iend) {
Failf("some data failed to convert");
continue;
}
len2 = optr - gBuffer[2];
Check(len0, len1, len2);
}
}
done:
free(data);
if (cf.data != NULL) {
DisposeHandle(cf.data);
}
if (cr.data != NULL) {
DisposeHandle(cr.data);
}
}
int main(int argc, char **argv)
{
void *buf;
const char *filename;
int i;
(void)argc;
(void)argv;
for (i = 0; i < 3; i++) {
buf = malloc(kConvertBufferSize);
if (buf == NULL) {
DieErrorf(errno, "malloc");
}
gBuffer[i] = buf;
}
for (i = 0;; i++) {
filename = kCharsetFilename[i];
if (filename == NULL) {
break;
}
TestConverter(filename);
}
for (i = 0; i < 3; i++) {
free(gBuffer[i]);
}
return gFailCount == 0 ? 0 : 1;
}

17
src/test.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef test_h
#define test_h
/* test.h - unit testing definitions. */
#include "src/defs.h"
/* List of all data files, terminated by NULL. */
extern const char *const kCharsetFilename[];
/* Print an error message and exit. */
void Dief(const char *msg, ...) __attribute__((noreturn, format(printf, 1, 2)));
/* Print an error message with an error code and exit. */
void DieErrorf(int errcode, const char *msg, ...)
__attribute__((noreturn, format(printf, 2, 3)));
#endif

View File

@ -3,17 +3,16 @@
This is used to run conversion tests on non-Mac OS systems to make
development easier. These are not intended to make it possible to port the
converter to non-Mac OS systems. */
#include "defs.h"
#include "src/defs.h"
#include "src/test.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void Dief(const char *msg, ...)
__attribute__((noreturn, format(printf, 1, 2)));
static void Dief(const char *msg, ...) {
void Dief(const char *msg, ...)
{
va_list ap;
fputs("Error: ", stderr);
va_start(ap, msg);
@ -23,7 +22,21 @@ static void Dief(const char *msg, ...) {
exit(1);
}
Handle NewHandle(Size byteCount) {
void DieErrorf(int errcode, const char *msg, ...)
{
va_list ap;
fputs("Error: ", stderr);
va_start(ap, msg);
vfprintf(stderr, msg, ap);
va_end(ap);
fputs(": ", stderr);
fputs(strerror(errcode), stderr);
fputc('\n', stderr);
exit(1);
}
Handle NewHandle(Size byteCount)
{
Ptr p;
Handle h;
@ -42,22 +55,26 @@ Handle NewHandle(Size byteCount) {
return h;
}
void HLock(Handle h) {
void HLock(Handle h)
{
(void)h;
}
void HUnlock(Handle h) {
void HUnlock(Handle h)
{
(void)h;
}
void DisposeHandle(Handle h) {
void DisposeHandle(Handle h)
{
if (h != NULL) {
free(*h);
free(h);
}
}
void SetHandleSize(Handle h, Size newSize) {
void SetHandleSize(Handle h, Size newSize)
{
Ptr p;
if (h == NULL) {
Dief("SetHandleSize: h = NULL");
@ -69,11 +86,13 @@ void SetHandleSize(Handle h, Size newSize) {
*h = p;
}
OSErr MemError(void) {
OSErr MemError(void)
{
/* Memory allocation failures abort the program. */
return 0;
}
void MemClear(void *ptr, Size size) {
void MemClear(void *ptr, Size size)
{
memset(ptr, 0, size);
}