From 910039b77a459f47282f73fb27b48b54e4349bf6 Mon Sep 17 00:00:00 2001 From: Dietrich Epp Date: Wed, 24 Mar 2021 03:31:00 -0400 Subject: [PATCH] Convert files incrementally The previous implementation read the entire file into memory and then processed it. This version uses reasonably-sized buffers and can handle files of any size. Since this new conversion code is a bit more complicated, a test suite has been added which can be run on ordinary Unix systems. GitOrigin-RevId: acc7be277103fad1da2d0ca16d1a84be11802fbf --- .gitignore | 1 + Makefile | 16 ++- README.md | 8 +- convert.c | 59 +++++++++ convert.h | 46 +++++++ convert_test.c | 235 +++++++++++++++++++++++++++++++++ defs.h | 23 +--- file.c | 337 ++++++++++++++++++++---------------------------- mac_from_unix.c | 233 +++++++++++++++++++-------------- mac_to_unix.c | 104 ++++++++++----- sync.c | 7 +- test.sh | 4 + util.c | 4 + 13 files changed, 718 insertions(+), 359 deletions(-) create mode 100644 .gitignore create mode 100644 convert.c create mode 100644 convert.h create mode 100644 convert_test.c create mode 100644 test.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b0b7e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/convert_test diff --git a/Makefile b/Makefile index 7105efc..d7c20b4 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ COptions-68K = {COptions} {Sym-68K} ### Source Files ### SrcFiles = ∂ + convert.c ∂ file.c ∂ mac_from_unix.c ∂ mac_to_unix.c ∂ @@ -17,6 +18,7 @@ SrcFiles = ∂ ### Object Files ### ObjFiles-PPC = ∂ + convert.c.x ∂ file.c.x ∂ mac_from_unix.c.x ∂ mac_to_unix.c.x ∂ @@ -24,6 +26,7 @@ ObjFiles-PPC = ∂ util.c.x ObjFiles-68K = ∂ + convert.c.o ∂ file.c.o ∂ mac_from_unix.c.o ∂ mac_to_unix.c.o ∂ @@ -88,7 +91,13 @@ Dependencies ƒ $OutOfDate {SrcFiles} #*** Dependencies: Cut here *** -# These dependencies were produced at 1:09:52 PM on Tue, Mar 16, 2021 by MakeDepend +# These dependencies were produced at 3:18:36 AM on Wed, Mar 24, 2021 by MakeDepend + +:convert.c.x :convert.c.o ƒ ∂ + :convert.c ∂ + :convert.h ∂ + :defs.h ∂ + :mac_from_unix_data.h :file.c.x :file.c.o ƒ ∂ :file.c ∂ @@ -96,12 +105,11 @@ Dependencies ƒ $OutOfDate :mac_from_unix.c.x :mac_from_unix.c.o ƒ ∂ :mac_from_unix.c ∂ - :defs.h ∂ - :mac_from_unix_data.h + :convert.h :mac_to_unix.c.x :mac_to_unix.c.o ƒ ∂ :mac_to_unix.c ∂ - :defs.h + :convert.h :sync.c.x :sync.c.o ƒ ∂ :sync.c ∂ diff --git a/README.md b/README.md index 26c49da..7123ce7 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,6 @@ SyncFiles is a tool for MPW (Macintosh Programmer’s Workshop) which synchroniz - Creates Macintosh files with MPW Shell creator code and text file type. -## Limitations - -There is a hard-coded maximum file size of 64 KiB. - ## File Patterns Copies files named Makefile, and files with the following extensions: @@ -58,6 +54,10 @@ SyncFiles -pull - `-delete`: Delete files in destination which are missing from source. +## Testing + +Run `sh test.sh` to test the text conversion code. + ## License SyncFiles is distributed under the terms of the MIT license. See LICENSE.txt for details. diff --git a/convert.c b/convert.c new file mode 100644 index 0000000..20dc613 --- /dev/null +++ b/convert.c @@ -0,0 +1,59 @@ +// convert.c - Conversion helper functions. +#include "convert.h" + +#include "defs.h" +#include "mac_from_unix_data.h" + +#include +#include +#include +#include + +int convert_read(short ref, long *count, void *data) { + OSErr err; + + SpinCursor(1); + err = FSRead(ref, count, data); + switch (err) { + case noErr: + return kConvertOK; + case eofErr: + return kConvertEOF; + default: + print_errcode(err, "could not read source file"); + return kConvertError; + } +} + +int convert_write(short ref, long count, const void *data) { + OSErr err; + + SpinCursor(1); + err = FSWrite(ref, &count, data); + if (err == noErr) { + return kConvertOK; + } + print_errcode(err, "could not write temp file"); + return kConvertError; +} + +static unsigned short *gFromUnixData; + +// Get the table for converting from Unix to Macintosh. +unsigned short *mac_from_unix_data(void) { + Ptr ptr, src, dest; + + if (gFromUnixData != NULL) { + return gFromUnixData; + } + ptr = NewPtr(FROM_UNIX_DATALEN); + if (ptr == NULL) { + print_memerr(FROM_UNIX_DATALEN); + return NULL; + } + src = (void *)kFromUnixData; + dest = ptr; + UnpackBits(&src, &dest, FROM_UNIX_DATALEN); + gFromUnixData = (void *)ptr; + return gFromUnixData; +} diff --git a/convert.h b/convert.h new file mode 100644 index 0000000..23bf093 --- /dev/null +++ b/convert.h @@ -0,0 +1,46 @@ +// These helper functions are written so the conversion functions can be written +// for a standard C environment without using Macintosh Toolbox functions. + +enum { + // Base size of temporary buffer for converting files, not counting the + // "extra". + kBufferBaseSize = 16 * 1024, + + // Extra space past the end of the buffer for converting files. + kBufferExtraSize = 16, + + // Total size of a buffer. + kBufferTotalSize = kBufferBaseSize + kBufferExtraSize, +}; + +// ============================================================================= +// Helper functions +// ============================================================================= + +// Result codes for convert_read and convert_write. +enum { + kConvertOK, + kConvertError, + kConvertEOF, +}; + +// Read data from a file. +int convert_read(short ref, long *count, void *data); + +// Write data to a file. +int convert_write(short ref, long count, const void *data); + +// Get the table for converting from Unix to Macintosh. +unsigned short *mac_from_unix_data(void); + +// ============================================================================= +// Conversion functions +// ============================================================================= + +// Convert Macintosh encoding with CR line endings to UTF-8 with LF. The source +// and destinations are file handles. The buffers have size buf +int mac_to_unix(short srcRef, short destRef, void *srcBuf, void *destBuf); + +// Convert UTF-8 with LF line endings to Macintosh encoding with CR. The source +// and destinations are file handles. The buffers have size kBufferTotalSize. +int mac_from_unix(short srcRef, short destRef, void *srcBuf, void *destBuf); diff --git a/convert_test.c b/convert_test.c new file mode 100644 index 0000000..edbe3b6 --- /dev/null +++ b/convert_test.c @@ -0,0 +1,235 @@ +#include "convert.h" + +#include +#include +#include +#include + +#include "mac_from_unix_data.h" + +static noreturn void malloc_fail(size_t sz) { + fprintf(stderr, "Error: malloc(%zu) failed\n", sz); + exit(1); +} + +static void *xmalloc(size_t sz) { + void *ptr = malloc(sz); + if (ptr == NULL) { + malloc_fail(sz); + } + return ptr; +} + +struct buf { + char *data; + size_t size; + size_t alloc; +}; + +static void buf_put(struct buf *buf, const void *data, size_t length) { + if (length > buf->alloc - buf->size) { + size_t nalloc = buf->alloc; + if (nalloc == 0) { + nalloc = 1024; + } + while (length > nalloc - buf->size) { + nalloc <<= 1; + } + void *narr = realloc(buf->data, nalloc); + if (narr == NULL) { + malloc_fail(nalloc); + } + buf->data = narr; + buf->alloc = nalloc; + } + memcpy(buf->data + buf->size, data, length); + buf->size += length; +} + +// ============================================================================= + +static unsigned short *gMacFromUnixData; + +static noreturn void bad_unpackbits(void) { + fputs("Error: invalid unpackbits data\n", stderr); + exit(1); +} + +static void unpackbits(void *dest, size_t destsz, const void *src, + size_t srcsz) { + const unsigned char *ip = src, *ie = ip + srcsz; + unsigned char *op = dest, *oe = op + destsz; + while (op < oe) { + if (ip >= ie) { + bad_unpackbits(); + } + int c = (signed char)*ip++; + if (c >= 0) { + int len = c + 1; + if (len > ie - ip || len > oe - op) { + bad_unpackbits(); + } + memcpy(op, ip, len); + op += len; + ip += len; + } else { + int len = -c + 1; + if (ip >= ie || len > oe - op) { + bad_unpackbits(); + } + memset(op, *ip, len); + op += len; + ip += 1; + } + } + if (ip != ie) { + bad_unpackbits(); + } +} + +unsigned short *mac_from_unix_data(void) { + unsigned short *ptr = gMacFromUnixData; + if (ptr == NULL) { + unsigned char *bytes = xmalloc(FROM_UNIX_DATALEN); + unpackbits(bytes, FROM_UNIX_DATALEN, kFromUnixData, + sizeof(kFromUnixData)); + ptr = xmalloc(FROM_UNIX_DATALEN); + for (int i = 0; i < FROM_UNIX_DATALEN / 2; i++) { + ptr[i] = (bytes[i * 2] << 8) | bytes[i * 2 + 1]; + } + free(bytes); + gMacFromUnixData = ptr; + } + return ptr; +} + +// ============================================================================= + +enum { + kSrcRef = 1234, + kDestRef = 5678, +}; + +static const char *gReadBuf; +static size_t gReadSize; +static size_t gReadPos; +static size_t gReadChunk; +static struct buf gWriteBuf; + +int convert_read(short ref, long *count, void *data) { + if (ref != kSrcRef) { + fputs("Wrong ref\n", stderr); + exit(1); + } + size_t amt = *count; + size_t rem = gReadSize - gReadPos; + if (amt > rem) { + amt = rem; + } + if (gReadChunk != 0 && amt > gReadChunk) { + amt = gReadChunk; + } + *count = amt; + memcpy(data, gReadBuf + gReadPos, amt); + gReadPos += amt; + if (gReadPos == gReadSize) { + return kConvertEOF; + } + return kConvertOK; +} + +int convert_write(short ref, long count, const void *data) { + if (ref != kDestRef) { + fputs("Wrong ref\n", stderr); + exit(1); + } + buf_put(&gWriteBuf, data, count); + return kConvertOK; +} + +// ============================================================================= + +enum { + kInputSize = 64 * 1024 - 2, +}; + +static char *gen_input(void) { + char *ptr = xmalloc(kInputSize); + unsigned state = 0x12345678; + for (int i = 0; i < kInputSize; i++) { + // Relatively common LCG. + state = (state * 1103515245 + 12345) & 0x7fffffff; + ptr[i] = state >> 23; + } + return ptr; +} + +int main(int argc, char **argv) { + (void)argc; + (void)argv; + + int r; + + void *sbuf = xmalloc(kBufferTotalSize); + void *dbuf = xmalloc(kBufferTotalSize); + + // Generate input. + char *input = gen_input(); + + // Convert Macintosh -> UTF-8. + gReadBuf = input; + gReadSize = kInputSize; + gReadPos = 0; + r = mac_to_unix(kSrcRef, kDestRef, sbuf, dbuf); + if (r != 0) { + fputs("mac_to_unix failed\n", stderr); + return 1; + } + + // Check that we have no CR. + { + const char *data = gWriteBuf.data; + size_t size = gWriteBuf.size; + for (size_t i = 0; i < size; i++) { + if (data[i] == 0x0d) { + fprintf(stderr, "Error: CR at offset %zu\n", i); + return 1; + } + } + } + + // Convert back. + gReadBuf = gWriteBuf.data; + gReadSize = gWriteBuf.size; + gReadPos = 0; + gWriteBuf = (struct buf){NULL, 0, 0}; + r = mac_from_unix(kSrcRef, kDestRef, sbuf, dbuf); + if (r != 0) { + fputs("mac_from_unix failed\n", stderr); + return 1; + } + + // Check that this is equal to original, except with LF changed to CR. + { + const char *data = gWriteBuf.data; + size_t size = gWriteBuf.size; + if (kInputSize != size) { + fprintf(stderr, "Error: size = %zu, expect %d\n", size, kInputSize); + return 1; + } + for (size_t i = 0; i < kInputSize; i++) { + unsigned char x = input[i]; + if (x == 0x0a) { + x = 0x0d; + } + unsigned char y = data[i]; + if (x != y) { + fprintf(stderr, "Error: data[%zu] = 0x%02x, expect 0x%02x\n", i, + y, x); + return 1; + } + } + } + + return 0; +} diff --git a/defs.h b/defs.h index 7f2ef09..d5f0d3e 100644 --- a/defs.h +++ b/defs.h @@ -37,6 +37,11 @@ void print_errcode(OSErr err, const char *msg, ...); // Print an out-of-memory error. void print_memerr(unsigned long size); +// Print an abort message. +void print_abort_func(const char *file, int line); + +#define print_abort() print_abort_func(__FILE__, __LINE__) + // Log the error result of a function call. void log_call(OSErr err, const char *function); @@ -51,9 +56,7 @@ void p2cstr(char *ostr, const unsigned char *istr); // ============================================================================= // Text file conversion function. -typedef int (*convert_func)(unsigned char **outptr, unsigned char *outend, - const unsigned char **inptr, - const unsigned char *inend); +typedef int (*convert_func)(short src, short dest, void *srcBuf, void *destBuf); enum { kSrcDir, @@ -90,17 +93,3 @@ struct file_info { int sync_file(struct file_info *file, convert_func func, short srcVol, long srcDir, short destVol, long destDir, short tempVol, long tempDir); - -// ============================================================================= -// conversion -// ============================================================================= - -// mac_to_unix.c -int mac_to_unix(unsigned char **outptr, unsigned char *outend, - const unsigned char **inptr, const unsigned char *inend); - -// mac_from_unix.c -int mac_from_unix(unsigned char **outptr, unsigned char *outend, - const unsigned char **inptr, const unsigned char *inend); -int mac_from_unix_init(void); -void mac_from_unix_term(void); diff --git a/file.c b/file.c index 53cd5f6..623cbb2 100644 --- a/file.c +++ b/file.c @@ -1,82 +1,12 @@ #include "defs.h" +#include "convert.h" + #include #include #include -enum { - // Maximum file size that we will copy. - kMaxFileSize = 64 * 1024, -}; - -// Read the entire data fork of a file. The result must be freed with -// DisposePtr. -static int read_file(FSSpec *spec, Ptr *data, long *length) { - CInfoPBRec ci; - Ptr ptr; - long dataLength, pos, count; - OSErr err; - short fref; - - // Get file size. - memset(&ci, 0, sizeof(ci)); - ci.hFileInfo.ioNamePtr = spec->name; - ci.hFileInfo.ioVRefNum = spec->vRefNum; - ci.hFileInfo.ioDirID = spec->parID; - err = PBGetCatInfoSync(&ci); - if (err != 0) { - print_errcode(err, "could not get file metadata"); - return 1; - } - if ((ci.hFileInfo.ioFlAttrib & kioFlAttribDirMask) != 0) { - print_err("is a directory"); - return 1; - } - dataLength = ci.hFileInfo.ioFlLgLen; - if (dataLength > kMaxFileSize) { - print_err("file is too large: size=%ld, max=%ld", dataLength, - kMaxFileSize); - return 1; - } - if (dataLength == 0) { - *data = NULL; - *length = 0; - return 0; - } - - // Allocate memory. - ptr = NewPtr(dataLength); - if (ptr == NULL) { - print_memerr(dataLength); - return 1; - } - - // Read file. - err = FSpOpenDF(spec, fsRdPerm, &fref); - if (err != 0) { - DisposePtr(ptr); - print_errcode(err, "could not open file"); - return 1; - } - pos = 0; - while (pos < dataLength) { - count = dataLength - pos; - err = FSRead(fref, &count, ptr + pos); - if (err != 0) { - DisposePtr(ptr); - FSClose(fref); - print_errcode(err, "could not read file"); - return 1; - } - pos += count; - } - FSClose(fref); - *data = ptr; - *length = dataLength; - return 0; -} - // Make an FSSpec for a temporary file. static int make_temp(FSSpec *temp, short vRefNum, long dirID, const unsigned char *name) { @@ -99,91 +29,51 @@ static int make_temp(FSSpec *temp, short vRefNum, long dirID, return 0; } -// Write the entire contents of a file. -static int write_file(FSSpec *dest, short tempVol, long tempDir, Ptr data, - long length, long modTime, file_action action) { - OSType creator = 'MPS ', fileType = 'TEXT'; - FSSpec temp; - long pos, amt; - short ref; - HParamBlockRec pb; - CMovePBRec cm; +// Set the modification time for a file. +static int set_modtime(FSSpec *spec, long modTime) { CInfoPBRec ci; Str31 name; OSErr err; - int r; - bool mustMove, mustRename; - // Save the data to a temporary file. - r = make_temp(&temp, tempVol, tempDir, dest->name); - if (r != 0) { - return 1; - } - err = FSpCreate(&temp, creator, fileType, smSystemScript); - if (err == dupFNErr) { - err = FSpDelete(&temp); - if (err != 0) { - print_errcode(err, "could not delete existing temp file"); - return 1; - } - err = FSpCreate(&temp, creator, fileType, smSystemScript); - } - if (err != 0) { - print_errcode(err, "could not create file"); - return 1; - } - err = FSpOpenDF(&temp, fsRdWrPerm, &ref); - if (err != 0) { - print_errcode(err, "could not open temp file"); - goto error; - } - pos = 0; - while (pos < length) { - amt = length - pos; - err = FSWrite(ref, &amt, data + pos); - if (err != 0) { - FSClose(ref); - print_errcode(err, "could not write temp file"); - goto error; - } - pos += amt; - } - err = FSClose(ref); - if (err != 0) { - print_errcode(err, "could not close temp file"); - goto error; - } - - // Update the modification time. memset(&ci, 0, sizeof(ci)); - memcpy(name, temp.name, temp.name[0] + 1); + memcpy(name, spec->name, spec->name[0] + 1); ci.hFileInfo.ioNamePtr = name; - ci.hFileInfo.ioVRefNum = temp.vRefNum; - ci.hFileInfo.ioDirID = temp.parID; + ci.hFileInfo.ioVRefNum = spec->vRefNum; + ci.hFileInfo.ioDirID = spec->parID; err = PBGetCatInfoSync(&ci); if (err != 0) { print_errcode(err, "could not get temp file info"); - goto error; + return 1; } - memcpy(name, temp.name, temp.name[0] + 1); + memcpy(name, spec->name, spec->name[0] + 1); ci.hFileInfo.ioNamePtr = name; - ci.hFileInfo.ioVRefNum = temp.vRefNum; - ci.hFileInfo.ioDirID = temp.parID; + ci.hFileInfo.ioVRefNum = spec->vRefNum; + ci.hFileInfo.ioDirID = spec->parID; ci.hFileInfo.ioFlMdDat = modTime; err = PBSetCatInfoSync(&ci); if (err != 0) { print_errcode(err, "could not set temp file info"); - goto error; + return 1; } + return 0; +} + +// Move a temp file over a destination file. This may modify the temp file spec +// if it moves in multiple stages. +static int replace_file(FSSpec *temp, FSSpec *dest, file_action action) { + HParamBlockRec pb; + CMovePBRec cm; + OSErr err; + bool mustMove, mustRename; // First, try to exchange files if destination exists. if (action == kActionReplace) { - err = FSpExchangeFiles(&temp, dest); + err = FSpExchangeFiles(temp, dest); if (gLogLevel >= kLogVerbose) { log_call(err, "FSpExchangeFiles"); } if (err == 0) { - err = FSpDelete(&temp); + err = FSpDelete(temp); if (err != 0) { print_errcode(err, "could not remove temporary file"); return 1; @@ -193,27 +83,27 @@ static int write_file(FSSpec *dest, short tempVol, long tempDir, Ptr data, // paramErr: function not supported by volume. if (err != paramErr) { print_errcode(err, "could not exchange files"); - goto error; + return 1; } // Otherwise, delete destination and move temp file over. err = FSpDelete(dest); if (err != 0) { print_errcode(err, "could not remove destination file"); - goto error; + return 1; } } - mustMove = dest->parID != temp.parID; - mustRename = memcmp(dest->name, temp.name, dest->name[0] + 1) != 0; + mustMove = dest->parID != temp->parID; + mustRename = memcmp(dest->name, temp->name, dest->name[0] + 1) != 0; // Next, try MoveRename. if (mustMove && mustRename) { memset(&pb, 0, sizeof(pb)); - pb.copyParam.ioNamePtr = temp.name; - pb.copyParam.ioVRefNum = temp.vRefNum; + pb.copyParam.ioNamePtr = temp->name; + pb.copyParam.ioVRefNum = temp->vRefNum; pb.copyParam.ioNewName = dest->name; pb.copyParam.ioNewDirID = dest->parID; - pb.copyParam.ioDirID = temp.parID; + pb.copyParam.ioDirID = temp->parID; err = PBHMoveRenameSync(&pb); if (gLogLevel >= kLogVerbose) { log_call(err, "PBHMoveRename"); @@ -224,58 +114,53 @@ static int write_file(FSSpec *dest, short tempVol, long tempDir, Ptr data, // paramErr: function not supported by volume. if (err != paramErr) { print_errcode(err, "could not rename temporary file"); - goto error; + return 1; } } // Finally, try move and then rename. if (mustMove) { memset(&cm, 0, sizeof(cm)); - cm.ioNamePtr = temp.name; - cm.ioVRefNum = temp.vRefNum; + cm.ioNamePtr = temp->name; + cm.ioVRefNum = temp->vRefNum; cm.ioNewDirID = dest->parID; - cm.ioDirID = temp.parID; + cm.ioDirID = temp->parID; err = PBCatMoveSync(&cm); if (gLogLevel >= kLogVerbose) { log_call(err, "PBCatMove"); } if (err != 0) { print_errcode(err, "could not move temporary file"); - goto error; + return 1; } - temp.parID = dest->parID; + temp->parID = dest->parID; } if (mustRename) { - err = FSpRename(&temp, dest->name); + err = FSpRename(temp, dest->name); if (gLogLevel >= kLogVerbose) { log_call(err, "FSpRename"); } if (err != 0) { print_errcode(err, "could not rename temporary file"); - goto error; + return 1; } } return 0; - -error: - err = FSpDelete(&temp); - if (err != 0) { - print_errcode(err, "could not delete temp file"); - } - return 1; } +static Ptr gSrcBuffer; +static Ptr gDestBuffer; + int sync_file(struct file_info *file, convert_func func, short srcVol, long srcDir, short destVol, long destDir, short tempVol, long tempDir) { - FSSpec src, dest; - Ptr srcData = NULL, destData = NULL; - long srcLength, destLength; - int r, result = 1; + OSType creator = 'MPS ', fileType = 'TEXT'; + FSSpec src, dest, temp; + short srcRef = 0, destRef = 0; + bool has_temp = false; + int r; OSErr err; - unsigned char *outptr, *outend; - const unsigned char *inptr, *inend; // Handle actions which don't involve conversion. if (file->action == kActionNone) { @@ -306,53 +191,107 @@ int sync_file(struct file_info *file, convert_func func, short srcVol, print_errcode(err, "could not create destination spec"); return 1; } - - // Read the source file into memory. - r = read_file(&src, &srcData, &srcLength); + r = make_temp(&temp, tempVol, tempDir, dest.name); if (r != 0) { return 1; } + // Open the source file for reading. + err = FSpOpenDF(&src, fsRdPerm, &srcRef); + if (err != 0) { + print_errcode(err, "could not open file"); + goto error; + } + + // Create and open the temporary file for writing. + err = FSpCreate(&temp, creator, fileType, smSystemScript); + if (err == dupFNErr) { + err = FSpDelete(&temp); + if (err != 0) { + print_errcode(err, "could not delete existing temp file"); + goto error; + } + err = FSpCreate(&temp, creator, fileType, smSystemScript); + } + if (err != 0) { + print_errcode(err, "could not create file"); + goto error; + } + has_temp = true; + err = FSpOpenDF(&temp, fsRdWrPerm, &destRef); + if (err != 0) { + print_errcode(err, "could not open temp file"); + goto error; + } + + // Get buffers for conversion. + if (gSrcBuffer == NULL) { + gSrcBuffer = NewPtr(kBufferTotalSize); + if (gSrcBuffer == NULL) { + print_memerr(kBufferTotalSize); + goto error; + } + } + if (gDestBuffer == NULL) { + gDestBuffer = NewPtr(kBufferTotalSize); + if (gDestBuffer == NULL) { + print_memerr(kBufferTotalSize); + goto error; + } + } + // Convert data. - if (srcLength > 0) { - destLength = srcLength + (srcLength >> 2) + 16; - destData = NewPtr(destLength); - if (destData == NULL) { - print_memerr(destLength); - goto done; - } - outptr = (unsigned char *)destData; - outend = outptr + destLength; - inptr = (unsigned char *)srcData; - inend = inptr + srcLength; - func(&outptr, outend, &inptr, inend); - if (inptr != inend) { - print_err("conversion function failed"); - goto done; - } - destLength = outptr - (unsigned char *)destData; - } else { - destLength = 0; - destData = NULL; - } - - // Write destination file. - r = write_file(&dest, tempVol, tempDir, destData, destLength, - file->meta[kSrcDir].modTime, file->action); + r = func(srcRef, destRef, gSrcBuffer, gDestBuffer); if (r != 0) { - goto done; + goto error; } - // Success. - result = 0; + // Close files. + err = FSClose(srcRef); + srcRef = 0; + if (err != 0) { + print_errcode(err, "could not close source file"); + goto error; + } + err = FSClose(destRef); + destRef = 0; + if (err != 0) { + print_errcode(err, "could not close temp file"); + goto error; + } -done: + // Set modification time. + r = set_modtime(&temp, file->meta[kSrcDir].modTime); + if (r != 0) { + goto error; + } + + // Overwrite destination. + r = replace_file(&temp, &dest, file->action); + if (r != 0) { + goto error; + } + return 0; + +error: // Clean up. - if (srcData != NULL) { - DisposePtr(srcData); + if (srcRef != 0) { + err = FSClose(srcRef); + if (err != 0) { + print_errcode(err, "could not close source file"); + } } - if (destData != NULL) { - DisposePtr(destData); + if (destRef != 0) { + err = FSClose(destRef); + if (err != 0) { + print_errcode(err, "could not close destination file"); + } } - return result; + if (has_temp) { + err = FSpDelete(&temp); + if (err != 0) { + print_errcode(err, "could not delete temp file"); + } + } + return 1; } diff --git a/mac_from_unix.c b/mac_from_unix.c index 3ed399b..e24e5e6 100644 --- a/mac_from_unix.c +++ b/mac_from_unix.c @@ -1,115 +1,150 @@ -#include "defs.h" -#include "mac_from_unix_data.h" - -#include +#include "convert.h" #include -static unsigned short *gFromUnixData; - -static void print_uerr(const unsigned char *start, const unsigned char *end) { - const unsigned char *ptr; - int lineno = 1, colno = 0; - for (ptr = start; ptr != end; ptr++) { - colno++; - // Note: \r != 0x0d, \n != 0x0a on old Mac compilers. - if (*ptr == 0x0a || *ptr == 0x0d) { - lineno++; - colno = 0; - } - } - fprintf(stderr, "## Error: line %d, column %d: invalid character\n", lineno, - colno); -} - -int mac_from_unix(unsigned char **outptr, unsigned char *outend, - const unsigned char **inptr, const unsigned char *inend) { - unsigned char *op = *outptr; - const unsigned char *ip = *inptr, *curpos; - const unsigned short *table; +int mac_from_unix(short srcRef, short destRef, void *srcBuf, void *destBuf) { + unsigned char *op, *oe; // Output ptr, end. + unsigned char *ip, *ie; // Input ptr, end. + unsigned char *tmp, *curpos; + const unsigned short *table; // Conversion table. unsigned entry, value, state, c, last, curvalue; + long count, i; + int has_eof = 0, need_input = 1, do_unput; + int lineno = 1, r; - table = gFromUnixData; + table = mac_from_unix_data(); if (table == NULL) { - print_err("table not loaded"); return 1; } - last = 0; - while (ip < inend && op < outend) { - c = *ip; - if (c < 128) { - // Note: \r != 0x0d, \n != 0x0a on old Mac compilers. - if (c == 0x0a) { - c = 0x0d; - } - if (op == outend) { + + // Initialize buffer pointers. + ip = srcBuf; + ie = ip; + need_input = 1; + op = destBuf; + // The destination buffer has an extra byte which may be combined with a + // diacritic. + oe = op + kBufferBaseSize + 1; + + for (;;) { + if (need_input || ip >= ie) { + if (has_eof && ip == ie) { break; } - *op = c; - last = c; - ip++; - op++; - } else { - // Find the longest matching Unicode character. - state = table[last] & 0xff00; - if (state != 0) { - // Continue from previous character. - op--; - curpos = ip; - curvalue = last; - } else { - // Continue from current character. - curpos = NULL; - curvalue = 0; - } - do { - entry = table[state | *ip++]; - state = entry & 0xff00; - value = entry & 0xff; - if (value != 0) { - curpos = ip; - curvalue = value; - } - } while (state != 0 && ip < inend); - if (curvalue == 0) { - print_uerr(*outptr, op); - *outptr = op; - *inptr = ip; + + // Save unprocessed input, move to beginning of buffer. + count = ie - ip; + if (count > kBufferExtraSize) { + fputs("## Internal error\n", stderr); return 1; } - ip = curpos; - *op++ = curvalue; - last = 0; + tmp = ip; + ip = (unsigned char *)srcBuf + kBufferExtraSize - count; + for (i = 0; i < count; i++) { + ip[i] = tmp[i]; + } + + // Try to fill remainder of buffer. + count = kBufferBaseSize; + + r = convert_read(srcRef, &count, (char *)srcBuf + kBufferExtraSize); + if (r != kConvertOK) { + if (r == kConvertEOF) { + has_eof = 1; + if (count == 0) { + break; + } + } else { + return 1; + } + } + ie = (unsigned char *)srcBuf + kBufferExtraSize + count; + need_input = 0; + } + + // If output buffer has a full chunk and an extra byte, write out the + // chunk and keep the extra byte. + if (op >= oe) { + count = kBufferBaseSize; + r = convert_write(destRef, count, destBuf); + if (r != 0) { + return 1; + } + tmp = destBuf; + tmp[0] = tmp[kBufferBaseSize]; + op -= kBufferBaseSize; + } + + while (ip < ie && op < oe) { + c = *ip; + if (c < 128) { + ip++; + // Note: \r = 0x0a, \n = 0x0d on old Mac compilers. + if (c == 0x0a || c == 0x0d) { + c = 0x0d; + lineno++; + } + *op++ = c; + last = c; + } else { + // Find the longest matching Unicode character. + // curpos: ip after longest match. + // curvalue: output character after longest match. + state = table[last] & 0xff00; + if (state != 0) { + // Continue from previous character. + do_unput = 1; + curpos = ip; + curvalue = last; + } else { + // Continue with new character. + do_unput = 0; + curpos = NULL; + curvalue = 0; + } + tmp = ip; + do { + entry = table[state | *tmp++]; + state = entry & 0xff00; + value = entry & 0xff; + if (value != 0) { + curpos = tmp; + curvalue = value; + } + } while (state != 0 && tmp < ie); + if (state == 0 || has_eof) { + // We cannot consume more bytes. When state == 0, the state + // machine will not consume any more characters. When ip == + // ie && has_eof, there are no more bytes available. + if (curvalue == 0) { + fprintf(stderr, + "## Error: line %d: invalid character\n", + lineno); + return 1; + } + ip = curpos; + if (do_unput) { + op--; + } + *op++ = curvalue; + last = 0; + } else { + // We can consume more bytes. Get more, and come back. + need_input = 1; + break; + } + } } } - *outptr = op; - *inptr = ip; + + // Write remainder of output buffer. + if (op != destBuf) { + count = op - (unsigned char *)destBuf; + r = convert_write(destRef, count, destBuf); + if (r != 0) { + return 1; + } + } + return 0; } - -int mac_from_unix_init(void) { - Ptr ptr, src, dest; - OSErr err; - - if (gFromUnixData != NULL) { - return 0; - } - ptr = NewPtr(FROM_UNIX_DATALEN); - err = MemError(); - if (err != 0) { - print_errcode(err, "out of memory"); - return 1; - } - src = (void *)kFromUnixData; - dest = ptr; - UnpackBits(&src, &dest, FROM_UNIX_DATALEN); - gFromUnixData = (void *)ptr; - return 0; -} - -void mac_from_unix_term(void) { - if (gFromUnixData != NULL) { - DisposePtr((void *)gFromUnixData); - gFromUnixData = NULL; - } -} diff --git a/mac_to_unix.c b/mac_to_unix.c index ab576b4..109760e 100644 --- a/mac_to_unix.c +++ b/mac_to_unix.c @@ -1,6 +1,4 @@ -#include "defs.h" - -#include +#include "convert.h" // Table that converts Macintosh Roman characters to UTF-8, and CR to LF. static const unsigned short kToUnixTable[256] = { @@ -28,39 +26,83 @@ static const unsigned short kToUnixTable[256] = { 184, 733, 731, 711, }; -int mac_to_unix(unsigned char **outptr, unsigned char *outend, - const unsigned char **inptr, const unsigned char *inend) { - unsigned char *op = *outptr; - const unsigned char *ip = *inptr; - unsigned cp; +int mac_to_unix(short srcRef, short destRef, void *srcBuf, void *destBuf) { + unsigned char *op, *oe, *tmp; // Output ptr, end. + const unsigned char *ip, *ie; // Input ptr, end. + unsigned cp; // Code point. + int r; + long count; + int has_eof = 0; - while (ip < inend) { - cp = kToUnixTable[*ip]; - if (cp < 0x80) { - if (outend - op < 1) { + // Initialize buffer pointers. + ip = srcBuf; + ie = ip; + op = destBuf; + oe = op + kBufferBaseSize; + + for (;;) { + // If input buffer is consumed, read more. + if (ip >= ie) { + if (has_eof) { break; } - op[0] = cp; - op += 1; - } else if (cp < 0x400) { - if (outend - op < 2) { - break; + count = kBufferBaseSize; + r = convert_read(srcRef, &count, srcBuf); + if (r != kConvertOK) { + if (r == kConvertEOF) { + has_eof = 1; + if (count == 0) { + break; + } + } else { + return 1; + } + } + ip = srcBuf; + ie = ip + count; + } + + // If output buffer has a full chunk, write it out. + if (op >= oe) { + count = kBufferBaseSize; + r = convert_write(destRef, count, destBuf); + if (r != 0) { + return 1; + } + tmp = destBuf; + tmp[0] = tmp[kBufferBaseSize]; + tmp[1] = tmp[kBufferBaseSize + 1]; + op -= kBufferBaseSize; + } + + // Convert as much as possible. Note that the "extra" past the end of + // the destination buffer may be used, just to simplify bounds checking. + while (ip < ie && op < oe) { + cp = kToUnixTable[*ip++]; + if (cp < 0x80) { + op[0] = cp; + op += 1; + } else if (cp < 0x400) { + op[0] = (cp >> 6) | 0xc0; + op[1] = (cp & 0x3f) | 0x80; + op += 2; + } else { + op[0] = (cp >> 12) | 0xe0; + op[1] = ((cp >> 6) & 0x3f) | 0x80; + op[2] = (cp & 0x3f) | 0x80; + op += 3; } - op[0] = (cp >> 6) | 0xc0; - op[1] = (cp & 0x3f) | 0x80; - op += 2; - } else { - if (outend - op < 3) { - break; - } - op[0] = (cp >> 12) | 0xe0; - op[1] = ((cp >> 6) & 0x3f) | 0x80; - op[2] = (cp & 0x3f) | 0x80; - op += 3; } - ip++; } - *outptr = op; - *inptr = ip; + + // Write remainder of output buffer. + if (op != destBuf) { + count = op - (unsigned char *)destBuf; + r = convert_write(destRef, count, destBuf); + if (r != 0) { + return 1; + } + } + return 0; } diff --git a/sync.c b/sync.c index 7f0642d..2e68bc7 100644 --- a/sync.c +++ b/sync.c @@ -1,5 +1,7 @@ #include "defs.h" +#include "convert.h" + #include #include #include @@ -317,10 +319,6 @@ static int command_main(char *localPath, char *remotePath, int mode) { // Synchronize the files. InitCursorCtl(NULL); if (mode == kModePull) { - r = mac_from_unix_init(); - if (r != 0) { - return 1; - } func = mac_from_unix; err = FindFolder(destVol, kTemporaryFolderType, true, &tempVol, &tempDir); @@ -405,7 +403,6 @@ int main(int argc, char **argv) { if (gFiles != NULL) { DisposeHandle(gFiles); } - mac_from_unix_term(); if (gLogLevel >= kLogVerbose) { fputs("## Done\n", stderr); } diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..5afb0e5 --- /dev/null +++ b/test.sh @@ -0,0 +1,4 @@ +set -e +CFLAGS="-O0 -g -Wall -Wextra -Wstrict-prototypes" +cc -o convert_test $CFLAGS convert_test.c mac_to_unix.c mac_from_unix.c +exec ./convert_test diff --git a/util.c b/util.c index 894c5a2..eafaccd 100644 --- a/util.c +++ b/util.c @@ -86,6 +86,10 @@ void print_memerr(unsigned long size) { print_errcode(err, "out of memory; size=%lu", size); } +void print_abort_func(const char *file, int line) { + print_err("assertion failed: %s:d", file, line); +} + void log_call(OSErr err, const char *function) { const char *emsg;