Use more modern C

This commit is contained in:
Dietrich Epp 2022-04-22 11:44:07 -04:00
parent 61e4168fbc
commit ec3c80360f
6 changed files with 79 additions and 76 deletions

View File

@ -24,12 +24,12 @@ int ConverterBuild(struct Converter *c, Handle data, Size datasz,
}
engine = (UInt8)(**data) - 1;
if (engine < 0 || (int)(sizeof(kEngines) / sizeof(*kEngines)) <= engine) {
/* Invalid engine. */
// Invalid engine.
return kErrorBadData;
}
funcs = &kEngines[engine][direction];
if (funcs->build == NULL || funcs->run == NULL) {
/* Invalid engine. */
// Invalid engine.
return kErrorBadData;
}
err = funcs->build(&out, data, datasz);

View File

@ -3,68 +3,71 @@
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
#ifndef CONVERT_CONVERT_H
#define CONVERT_CONVERT_H
/* convert.h - character set conversion routines. */
// convert.h - character set conversion routines.
#include "lib/defs.h"
#include "lib/error.h"
enum {
/* Constants for CR and LF. Note that we should not use '\n' or '\r'
anywhere, because these character constants may have unexpected values on
certain old Mac OS compilers, depending on the compiler settings. In
particular, the values of '\n' and '\r' will be swapped. */
// Constants for CR and LF. Note that we should not use '\n' or '\r'
// anywhere, because these character constants may have unexpected values on
// certain old Mac OS compilers, depending on the compiler settings. In
// particular, the values of '\n' and '\r' will be swapped.
kCharLF = 10,
kCharCR = 13,
/* Constant for substitution character: '?'. */
// Constant for substitution character: '?'.
kCharSubstitute = 63
};
typedef enum {
/* Don't translite line breaks. */
// Don't translite line breaks.
kLineBreakKeep,
/* Convert line breaks to LF. */
// Convert line breaks to LF.
kLineBreakLF,
/* Convert line breaks to CR. */
// Convert line breaks to CR.
kLineBreakCR,
/* Convert line breaks to CR LF. */
// Convert line breaks to CR LF.
kLineBreakCRLF
} LineBreakConversion;
/* Directions that the converter runs in. */
typedef enum { kToUTF8, kFromUTF8 } ConvertDirection;
// Directions that the converter runs in.
typedef enum {
kToUTF8,
kFromUTF8,
} ConvertDirection;
/* Get the character map used for the given Mac OS script and region codes.
Return -1 if no known character map exists. */
// Get the character map used for the given Mac OS script and region codes.
// Return -1 if no known character map exists.
int GetCharmap(int script, int region);
/* The state of a converter. Must be zeroed prior to first conversion. */
// The state of a converter. Must be zeroed prior to first conversion.
struct ConverterState {
UInt32 data;
};
/* Implementation function for building a converter. */
// Implementation function for building a converter.
typedef ErrorCode (*ConvertBuildf)(Handle *out, Handle data, Size datasz);
/* Implementation function for running a converter. */
// Implementation function for running a converter.
typedef void (*ConvertRunf)(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr,
UInt8 *oend, const UInt8 **iptr, const UInt8 *iend);
/* A converter. The converter can be freed by disposing the handle. */
// A converter. The converter can be freed by disposing the handle.
struct Converter {
Handle data;
ConvertRunf run;
};
/* Build a converter from the given conversion table data. */
// Build a converter from the given conversion table data.
int ConverterBuild(struct Converter *c, Handle data, Size datasz,
ConvertDirection direction);
/* Engine 1: extended ASCII */
// Engine 1: extended ASCII.
ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz);
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,

View File

@ -1,13 +1,14 @@
// Copyright 2022 Dietrich Epp.
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
/* convert_1f.c - Forward conversion from extended ASCII to UTF-8. */
// convert_1f.c - Forward conversion from extended ASCII to UTF-8.
#include "convert/convert.h"
#include "lib/defs.h"
struct Convert1fData {
/* Unicode characters, encoded in UTF-8, and packed MSB first. Always either
2 bytes or 3 bytes. */
// Unicode characters, encoded in UTF-8, and packed MSB first. Always either
// 2 bytes or 3 bytes.
UInt32 chars[128];
};
@ -81,7 +82,7 @@ void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
ch = *ipos++;
if (ch < 128) {
if (ch == kCharLF || ch == kCharCR) {
/* Line breaks. */
// Line breaks.
if (ch == kCharLF && lastch == kCharCR) {
if (lc == kLineBreakKeep) {
*opos++ = ch;
@ -104,11 +105,11 @@ void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
}
}
} else {
/* ASCII characters. */
// ASCII characters.
*opos++ = ch;
}
} else {
/* Unicode characters. */
// Unicode characters.
uch = cvt->chars[ch - 128];
if (uch > 0xffff) {
opos[0] = uch >> 16;

View File

@ -1,26 +1,27 @@
// Copyright 2022 Dietrich Epp.
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
/* convert_1r.c - Reverse conversion from UTF-8 to extended ASCII. */
// convert_1r.c - Reverse conversion from UTF-8 to extended ASCII.
#include "convert/convert.h"
#include "lib/defs.h"
enum {
/* Maximum length of encoded character. */
// Maximum length of encoded character.
kMaxEncodedLength = 8,
/* Initial number of nodes to allocate when building the tree. */
// Initial number of nodes to allocate when building the tree.
kInitialTableAlloc = 8
};
struct TEntry {
/* The output character, or zero if no output. */
// The output character, or zero if no output.
UInt8 output;
/* The next node, or zero if no next node. */
// The next node, or zero if no next node.
UInt8 next;
};
/* A node for building the converter. */
// A node for building the converter.
struct TNode {
struct TEntry entries[256];
};
@ -36,10 +37,10 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
int i, j, dpos, enclen, encend, state, cur, nodecount, nodealloc;
unsigned ch;
/* Create a tree with a root node mapping all the ASCII characters except
NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as
no output. CR and LF are removed so they can be handled specially be the
decoder. */
// Create a tree with a root node mapping all the ASCII characters except
// NUL, CR, and LF. NUL won't map because an output of 0 is interpreted as
// no output. CR and LF are removed so they can be handled specially be the
// decoder.
nodes =
(struct TNode **)NewHandle(kInitialTableAlloc * sizeof(struct TNode));
if (nodes == NULL) {
@ -55,11 +56,11 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
node->entries[kCharLF].output = 0;
node->entries[kCharCR].output = 0;
/* Parse the table data and build up a tree of TNode. */
// Parse the table data and build up a tree of TNode.
dpos = 1;
/* For each high character (128..255). */
// For each high character (128..255).
for (i = 0; i < 128; i++) {
/* For each encoding of that character. */
// For each encoding of that character.
for (j = 0; j < 2; j++) {
if (dpos >= datasz) {
goto bad_table;
@ -70,8 +71,8 @@ static ErrorCode CreateTree(struct TTree *tree, Handle data, Size datasz)
enclen > kMaxEncodedLength) {
goto bad_table;
}
/* Iterate over all but last byte in encoding, to find the node
which will produce the decoded byte as output. */
// Iterate over all but last byte in encoding, to find the node
// which will produce the decoded byte as output.
state = 0;
node = *nodes;
for (encend = dpos + enclen - 1; dpos < encend; dpos++) {
@ -129,11 +130,11 @@ struct CEntry {
UInt16 next;
};
/* A compressed table node. Followed by an array of centry. */
// A compressed table node. Followed by an array of centry.
struct CNode {
/* First byte in table. */
// First byte in table.
UInt8 base;
/* Number of entries in table, minus one. */
// Number of entries in table, minus one.
UInt8 span;
};
@ -147,7 +148,7 @@ static ErrorCode CompactTree(Handle *out, struct TNode **nodes, int nodecount)
int i, j, min, max, count, next;
unsigned offset;
/* Figure out where each compacted node will go. */
// Figure out where each compacted node will go.
infos = (struct NodeInfo **)NewHandle(sizeof(struct NodeInfo) * nodecount);
if (infos == NULL) {
return kErrorNoMemory;
@ -171,7 +172,7 @@ static ErrorCode CompactTree(Handle *out, struct TNode **nodes, int nodecount)
offset += sizeof(struct CNode) + count * sizeof(struct CEntry);
}
/* Create the compacted tree. */
// Create the compacted tree.
ctree = NewHandle(offset);
if (ctree == NULL) {
DisposeHandle((Handle)infos);
@ -249,7 +250,7 @@ next_out:
goto done;
}
/* Follow state machine to the end. */
// Follow state machine to the end.
savein = ipos;
saveout = 0;
toffset = 0;
@ -274,7 +275,7 @@ resume:
output = entry->output;
toffset = entry->next;
if (toffset == 0) {
/* Reached end of tree. */
// Reached end of tree.
if (output == 0) {
goto bad_char;
}
@ -282,9 +283,8 @@ resume:
goto next_out;
}
if (output != 0) {
/* Can produce output here, or can consume more input. We try
consuming more input, but save the state to rewind if that
fails. */
// Can produce output here, or can consume more input. We try
// consuming more input, but save the state to rewind if that fails.
savein = ipos;
saveout = output;
savetoffset = toffset;
@ -292,20 +292,19 @@ resume:
}
bad_char:
/* Bad character. Back up and try again. */
// Bad character. Back up and try again.
ipos = savein;
if (saveout != 0) {
/* Produce saved output. */
// Produce saved output.
*opos++ = saveout;
ch = 0;
} else {
/* No saved output, this really is a bad character. Consume one
UTF-8 character, emit it as a fallback, and continue. */
// No saved output, this really is a bad character. Consume one UTF-8
// character, emit it as a fallback, and continue.
ch = *ipos++;
if ((ch & 0x80) == 0) {
/* ASCII character: either NUL, CR, or LF, because only
these
characters will result in a transition to state 0. */
// ASCII character: either NUL, CR, or LF, because only these
// characters will result in a transition to state 0.
if (ch == 0) {
*opos++ = ch;
} else if (ch == kCharLF && lastch == kCharCR) {

View File

@ -112,7 +112,7 @@ static void TestConverter(const char *name, struct CharmapData data)
SetTestName(name);
/* Load the converter into memory and build the conversion table. */
// Load the converter into memory and build the conversion table.
datap = (void *)data.ptr;
datah = &datap;
err = ConverterBuild(&cf, datah, data.size, kToUTF8);
@ -126,7 +126,7 @@ static void TestConverter(const char *name, struct CharmapData data)
goto done;
}
/* Create sample data to convert: 0-255, followed by 0. */
// Create sample data to convert: 0-255, followed by 0.
len0 = 257;
ptr = gBuffer[0];
for (i = 0; i < 256; i++) {
@ -134,7 +134,7 @@ static void TestConverter(const char *name, struct CharmapData data)
}
ptr[256] = 0;
/* Convert sample data. */
// Convert sample data.
iptr = gBuffer[0];
iend = iptr + 257;
optr = gBuffer[1];
@ -147,8 +147,8 @@ static void TestConverter(const char *name, struct CharmapData data)
}
len1 = optr - gBuffer[1];
/* Convert back, in three calls. The middle call will be to a 1-4 byte slice
in the middle. */
// Convert back, in three calls. The middle call will be to a 1-4 byte slice
// in the middle.
for (i = 1; i < len1 - 2; i++) {
jmax = len1 - i;
if (jmax > 4) {
@ -182,8 +182,8 @@ static void TestConverter(const char *name, struct CharmapData data)
for (k = 0; k < 2; k++) {
cc = k == 0 ? cf : cr;
for (i = 0; i < 4; i++) {
len1 = lblen[0]; /* Input data */
len0 = lblen[i]; /* Expected output */
len1 = lblen[0]; // Input data
len0 = lblen[i]; // Expected output
for (j = 1; j < len1; j++) {
SetTestNamef("%s %s linebreak %s split=%d", name,
k == 0 ? "forward" : "backward", kLineBreakName[i],

View File

@ -3,26 +3,26 @@
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
#ifndef CONVERT_DATA_H
#define CONVERT_DATA_H
/* data.h - charmap data, not used for classic Mac OS builds */
// data.h - charmap data, not used for classic Mac OS builds
#include "lib/defs.h"
/* Get the ID of the given character map. Return NULL if no such character map
exists. */
// Get the ID of the given character map. Return NULL if no such character map
// exists.
const char *CharmapID(int cmap);
/* Get the human-readable name fo the given character map. Return NULL if no
such character map exists. */
// Get the human-readable name fo the given character map. Return NULL if no
// such character map exists.
const char *CharmapName(int cmap);
/* Conversion table data. */
// Conversion table data.
struct CharmapData {
const UInt8 *ptr;
Size size;
};
/* Get the conversion table data for the given charmap. Returns an empty buffer
with a NULL pointer if the character map does not exist or if no conversion
table exists for that character map. */
// Get the conversion table data for the given charmap. Returns an empty buffer
// with a NULL pointer if the character map does not exist or if no conversion
// table exists for that character map.
struct CharmapData CharmapData(int cmap);
#endif