Use more modern C

This commit is contained in:
Dietrich Epp 2022-04-22 11:44:07 -04:00
parent 61e4168fbc
commit ec3c80360f
6 changed files with 79 additions and 76 deletions

View File

@ -24,12 +24,12 @@ int ConverterBuild(struct Converter *c, Handle data, Size datasz,
} }
engine = (UInt8)(**data) - 1; engine = (UInt8)(**data) - 1;
if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) { if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) {
/* Invalid engine. */ // Invalid engine.
return kErrorBadData; return kErrorBadData;
} }
funcs = &kEngines[engine][direction]; funcs = &kEngines[engine][direction];
if (funcs->build == NULL || funcs->run == NULL) { if (funcs->build == NULL || funcs->run == NULL) {
/* Invalid engine. */ // Invalid engine.
return kErrorBadData; return kErrorBadData;
} }
err = funcs->build(&out, data, datasz); err = funcs->build(&out, data, datasz);

View File

@ -3,68 +3,71 @@
// Mozilla Public License, version 2.0. See LICENSE.txt for details. // Mozilla Public License, version 2.0. See LICENSE.txt for details.
#ifndef CONVERT_CONVERT_H #ifndef CONVERT_CONVERT_H
#define CONVERT_CONVERT_H #define CONVERT_CONVERT_H
/* convert.h - character set conversion routines. */ // convert.h - character set conversion routines.
#include "lib/defs.h" #include "lib/defs.h"
#include "lib/error.h" #include "lib/error.h"
enum { enum {
/* Constants for CR and LF. Note that we should not use '\n' or '\r' // Constants for CR and LF. Note that we should not use '\n' or '\r'
anywhere, because these character constants may have unexpected values on // anywhere, because these character constants may have unexpected values on
certain old Mac OS compilers, depending on the compiler settings. In // certain old Mac OS compilers, depending on the compiler settings. In
particular, the values of '\n' and '\r' will be swapped. */ // particular, the values of '\n' and '\r' will be swapped.
kCharLF = 10, kCharLF = 10,
kCharCR = 13, kCharCR = 13,
/* Constant for substitution character: '?'. */ // Constant for substitution character: '?'.
kCharSubstitute = 63 kCharSubstitute = 63
}; };
typedef enum { typedef enum {
/* Don't translite line breaks. */ // Don't translite line breaks.
kLineBreakKeep, kLineBreakKeep,
/* Convert line breaks to LF. */ // Convert line breaks to LF.
kLineBreakLF, kLineBreakLF,
/* Convert line breaks to CR. */ // Convert line breaks to CR.
kLineBreakCR, kLineBreakCR,
/* Convert line breaks to CR LF. */ // Convert line breaks to CR LF.
kLineBreakCRLF kLineBreakCRLF
} LineBreakConversion; } LineBreakConversion;
/* Directions that the converter runs in. */ // Directions that the converter runs in.
typedef enum { kToUTF8, kFromUTF8 } ConvertDirection; typedef enum {
kToUTF8,
kFromUTF8,
} ConvertDirection;
/* Get the character map used for the given Mac OS script and region codes. // Get the character map used for the given Mac OS script and region codes.
Return -1 if no known character map exists. */ // Return -1 if no known character map exists.
int GetCharmap(int script, int region); int GetCharmap(int script, int region);
/* The state of a converter. Must be zeroed prior to first conversion. */ // The state of a converter. Must be zeroed prior to first conversion.
struct ConverterState { struct ConverterState {
UInt32 data; UInt32 data;
}; };
/* Implementation function for building a converter. */ // Implementation function for building a converter.
typedef ErrorCode (*ConvertBuildf)(Handle *out, Handle data, Size datasz); typedef ErrorCode (*ConvertBuildf)(Handle *out, Handle data, Size datasz);
/* Implementation function for running a converter. */ // Implementation function for running a converter.
typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc, typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, struct ConverterState *stateptr, UInt8 **optr,
UInt8 *oend, const UInt8 **iptr, const UInt8 *iend); UInt8 *oend, const UInt8 **iptr, const UInt8 *iend);
/* A converter. The converter can be freed by disposing the handle. */ // A converter. The converter can be freed by disposing the handle.
struct Converter { struct Converter {
Handle data; Handle data;
ConvertRunf run; ConvertRunf run;
}; };
/* Build a converter from the given conversion table data. */ // Build a converter from the given conversion table data.
int ConverterBuild(struct Converter *c, Handle data, Size datasz, int ConverterBuild(struct Converter *c, Handle data, Size datasz,
ConvertDirection direction); ConvertDirection direction);
/* Engine 1: extended ASCII */ // Engine 1: extended ASCII.
ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz); ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz);
void Convert1fRun(const void *cvtptr, LineBreakConversion lc, void Convert1fRun(const void *cvtptr, LineBreakConversion lc,

View File

@ -1,13 +1,14 @@
// Copyright 2022 Dietrich Epp. // Copyright 2022 Dietrich Epp.
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the // This file is part of SyncFiles. SyncFiles is licensed under the terms of the
// Mozilla Public License, version 2.0. See LICENSE.txt for details. // Mozilla Public License, version 2.0. See LICENSE.txt for details.
/* convert_1f.c - Forward conversion from extended ASCII to UTF-8. */
// convert_1f.c - Forward conversion from extended ASCII to UTF-8.
#include "convert/convert.h" #include "convert/convert.h"
#include "lib/defs.h" #include "lib/defs.h"
struct Convert1fData { struct Convert1fData {
/* Unicode characters, encoded in UTF-8, and packed MSB first. Always either // Unicode characters, encoded in UTF-8, and packed MSB first. Always either
2 bytes or 3 bytes. */ // 2 bytes or 3 bytes.
UInt32 chars[128]; UInt32 chars[128];
}; };
@ -81,7 +82,7 @@ void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
ch = *ipos++; ch = *ipos++;
if (ch < 128) { if (ch < 128) {
if (ch == kCharLF || ch == kCharCR) { if (ch == kCharLF || ch == kCharCR) {
/* Line breaks. */ // Line breaks.
if (ch == kCharLF && lastch == kCharCR) { if (ch == kCharLF && lastch == kCharCR) {
if (lc == kLineBreakKeep) { if (lc == kLineBreakKeep) {
*opos++ = ch; *opos++ = ch;
@ -104,11 +105,11 @@ void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
} }
} }
} else { } else {
/* ASCII characters. */ // ASCII characters.
*opos++ = ch; *opos++ = ch;
} }
} else { } else {
/* Unicode characters. */ // Unicode characters.
uch = cvt->chars[ch - 128]; uch = cvt->chars[ch - 128];
if (uch > 0xffff) { if (uch > 0xffff) {
opos[0] = uch >> 16; opos[0] = uch >> 16;

View File

@ -1,26 +1,27 @@
// Copyright 2022 Dietrich Epp. // Copyright 2022 Dietrich Epp.
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the // This file is part of SyncFiles. SyncFiles is licensed under the terms of the
// Mozilla Public License, version 2.0. See LICENSE.txt for details. // Mozilla Public License, version 2.0. See LICENSE.txt for details.
/* convert_1r.c - Reverse conversion from UTF-8 to extended ASCII. */
// convert_1r.c - Reverse conversion from UTF-8 to extended ASCII.
#include "convert/convert.h" #include "convert/convert.h"
#include "lib/defs.h" #include "lib/defs.h"
enum { enum {
/* Maximum length of encoded character. */ // Maximum length of encoded character.
kMaxEncodedLength = 8, kMaxEncodedLength = 8,
/* Initial number of nodes to allocate when building the tree. */ // Initial number of nodes to allocate when building the tree.
kInitialTableAlloc = 8 kInitialTableAlloc = 8
}; };
struct TEntry { struct TEntry {
/* The output character, or zero if no output. */ // The output character, or zero if no output.
UInt8 output; UInt8 output;
/* The next node, or zero if no next node. */ // The next node, or zero if no next node.
UInt8 next; UInt8 next;
}; };
/* A node for building the converter. */ // A node for building the converter.
struct TNode { struct TNode {
struct TEntry entries[256]; struct TEntry entries[256];
}; };
@ -36,10 +37,10 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc; int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc;
unsigned ch; unsigned ch;
/* Create a tree with a root node mapping all the ASCII characters except // Create a tree with a root node mapping all the ASCII characters except
NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as // NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as
no output. CR and LF are removed so they can be handled specially be the // no output. CR and LF are removed so they can be handled specially be the
decoder. */ // decoder.
nodes = nodes =
(struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode)); (struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode));
if (nodes == NULL) { if (nodes == NULL) {
@ -55,11 +56,11 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
node->entries[kCharLF].output = 0; node->entries[kCharLF].output = 0;
node->entries[kCharCR].output = 0; node->entries[kCharCR].output = 0;
/* Parse the table data and build up a tree of TNode. */ // Parse the table data and build up a tree of TNode.
dpos = 1; dpos = 1;
/* For each high character (128..255). */ // For each high character (128..255).
for (i = 0; i < 128; i++) { for (i = 0; i < 128; i++) {
/* For each encoding of that character. */ // For each encoding of that character.
for (j = 0; j < 2; j++) { for (j = 0; j < 2; j++) {
if (dpos >= datasz) { if (dpos >= datasz) {
goto bad_table; goto bad_table;
@ -70,8 +71,8 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
enclen > kMaxEncodedLength) { enclen > kMaxEncodedLength) {
goto bad_table; goto bad_table;
} }
/* Iterate over all but last byte in encoding, to find the node // Iterate over all but last byte in encoding, to find the node
which will produce the decoded byte as output. */ // which will produce the decoded byte as output.
state = 0; state = 0;
node = *nodes; node = *nodes;
for (encend = dpos + enclen - 1; dpos < encend; dpos++) { for (encend = dpos + enclen - 1; dpos < encend; dpos++) {
@ -129,11 +130,11 @@ struct CEntry {
UInt16 next; UInt16 next;
}; };
/* A compressed table node. Followed by an array of centry. */ // A compressed table node. Followed by an array of centry.
struct CNode { struct CNode {
/* First byte in table. */ // First byte in table.
UInt8 base; UInt8 base;
/* Number of entries in table, minus one. */ // Number of entries in table, minus one.
UInt8 span; UInt8 span;
}; };
@ -147,7 +148,7 @@ static ErrorCode CompactTree(Handle *out, struct TNode **nodes, int nodecount)
int i, j, min, max, count, next; int i, j, min, max, count, next;
unsigned offset; unsigned offset;
/* Figure out where each compacted node will go. */ // Figure out where each compacted node will go.
infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount); infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount);
if (infos == NULL) { if (infos == NULL) {
return kErrorNoMemory; return kErrorNoMemory;
@ -171,7 +172,7 @@ static ErrorCode CompactTree(Handle *out, struct TNode **nodes, int nodecount)
offset += sizeof(struct CNode) + count * sizeof(struct CEntry); offset += sizeof(struct CNode) + count * sizeof(struct CEntry);
} }
/* Create the compacted tree. */ // Create the compacted tree.
ctree = NewHandle(offset); ctree = NewHandle(offset);
if (ctree == NULL) { if (ctree == NULL) {
DisposeHandle((Handle)infos); DisposeHandle((Handle)infos);
@ -249,7 +250,7 @@ next_out:
goto done; goto done;
} }
/* Follow state machine to the end. */ // Follow state machine to the end.
savein = ipos; savein = ipos;
saveout = 0; saveout = 0;
toffset = 0; toffset = 0;
@ -274,7 +275,7 @@ resume:
output = entry->output; output = entry->output;
toffset = entry->next; toffset = entry->next;
if (toffset == 0) { if (toffset == 0) {
/* Reached end of tree. */ // Reached end of tree.
if (output == 0) { if (output == 0) {
goto bad_char; goto bad_char;
} }
@ -282,9 +283,8 @@ resume:
goto next_out; goto next_out;
} }
if (output != 0) { if (output != 0) {
/* Can produce output here, or can consume more input. We try // Can produce output here, or can consume more input. We try
consuming more input, but save the state to rewind if that // consuming more input, but save the state to rewind if that fails.
fails. */
savein = ipos; savein = ipos;
saveout = output; saveout = output;
savetoffset = toffset; savetoffset = toffset;
@ -292,20 +292,19 @@ resume:
} }
bad_char: bad_char:
/* Bad character. Back up and try again. */ // Bad character. Back up and try again.
ipos = savein; ipos = savein;
if (saveout != 0) { if (saveout != 0) {
/* Produce saved output. */ // Produce saved output.
*opos++ = saveout; *opos++ = saveout;
ch = 0; ch = 0;
} else { } else {
/* No saved output, this really is a bad character. Consume one // No saved output, this really is a bad character. Consume one UTF-8
UTF-8 character, emit it as a fallback, and continue. */ // character, emit it as a fallback, and continue.
ch = *ipos++; ch = *ipos++;
if ((ch & 0x80) == 0) { if ((ch & 0x80) == 0) {
/* ASCII character: either NUL, CR, or LF, because only // ASCII character: either NUL, CR, or LF, because only these
these // characters will result in a transition to state 0.
characters will result in a transition to state 0. */
if (ch == 0) { if (ch == 0) {
*opos++ = ch; *opos++ = ch;
} else if (ch == kCharLF && lastch == kCharCR) { } else if (ch == kCharLF && lastch == kCharCR) {

View File

@ -112,7 +112,7 @@ static void TestConverter(const char *name, struct CharmapData data)
SetTestName(name); SetTestName(name);
/* Load the converter into memory and build the conversion table. */ // Load the converter into memory and build the conversion table.
datap = (void *)data.ptr; datap = (void *)data.ptr;
datah = &datap; datah = &datap;
err = ConverterBuild(&cf, datah, data.size, kToUTF8); err = ConverterBuild(&cf, datah, data.size, kToUTF8);
@ -126,7 +126,7 @@ static void TestConverter(const char *name, struct CharmapData data)
goto done; goto done;
} }
/* Create sample data to convert: 0-255, followed by 0. */ // Create sample data to convert: 0-255, followed by 0.
len0 = 257; len0 = 257;
ptr = gBuffer[0]; ptr = gBuffer[0];
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
@ -134,7 +134,7 @@ static void TestConverter(const char *name, struct CharmapData data)
} }
ptr[256] = 0; ptr[256] = 0;
/* Convert sample data. */ // Convert sample data.
iptr = gBuffer[0]; iptr = gBuffer[0];
iend = iptr + 257; iend = iptr + 257;
optr = gBuffer[1]; optr = gBuffer[1];
@ -147,8 +147,8 @@ static void TestConverter(const char *name, struct CharmapData data)
} }
len1 = optr - gBuffer[1]; len1 = optr - gBuffer[1];
/* Convert back, in three calls. The middle call will be to a 1-4 byte slice // Convert back, in three calls. The middle call will be to a 1-4 byte slice
in the middle. */ // in the middle.
for (i = 1; i < len1 - 2; i++) { for (i = 1; i < len1 - 2; i++) {
jmax = len1 - i; jmax = len1 - i;
if (jmax > 4) { if (jmax > 4) {
@ -182,8 +182,8 @@ static void TestConverter(const char *name, struct CharmapData data)
for (k = 0; k < 2; k++) { for (k = 0; k < 2; k++) {
cc = k == 0 ? cf : cr; cc = k == 0 ? cf : cr;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
len1 = lblen[0]; /* Input data */ len1 = lblen[0]; // Input data
len0 = lblen[i]; /* Expected output */ len0 = lblen[i]; // Expected output
for (j = 1; j < len1; j++) { for (j = 1; j < len1; j++) {
SetTestNamef("%s %s linebreak %s split=%d", name, SetTestNamef("%s %s linebreak %s split=%d", name,
k == 0 ? "forward" : "backward", kLineBreakName[i], k == 0 ? "forward" : "backward", kLineBreakName[i],

View File

@ -3,26 +3,26 @@
// Mozilla Public License, version 2.0. See LICENSE.txt for details. // Mozilla Public License, version 2.0. See LICENSE.txt for details.
#ifndef CONVERT_DATA_H #ifndef CONVERT_DATA_H
#define CONVERT_DATA_H #define CONVERT_DATA_H
/* data.h - charmap data, not used for classic Mac OS builds */ // data.h - charmap data, not used for classic Mac OS builds
#include "lib/defs.h" #include "lib/defs.h"
/* Get the ID of the given character map. Return NULL if no such character map // Get the ID of the given character map. Return NULL if no such character map
exists. */ // exists.
const char *CharmapID(int cmap); const char *CharmapID(int cmap);
/* Get the human-readable name fo the given character map. Return NULL if no // Get the human-readable name fo the given character map. Return NULL if no
such character map exists. */ // such character map exists.
const char *CharmapName(int cmap); const char *CharmapName(int cmap);
/* Conversion table data. */ // Conversion table data.
struct CharmapData { struct CharmapData {
const UInt8 *ptr; const UInt8 *ptr;
Size size; Size size;
}; };
/* Get the conversion table data for the given charmap. Returns an empty buffer // Get the conversion table data for the given charmap. Returns an empty buffer
with a NULL pointer if the character map does not exist or if no conversion // with a NULL pointer if the character map does not exist or if no conversion
table exists for that character map. */ // table exists for that character map.
struct CharmapData CharmapData(int cmap); struct CharmapData CharmapData(int cmap);
#endif #endif