syncfiles/convert/convert_1f.c

131 lines
2.6 KiB
C

// Copyright 2022 Dietrich Epp.
// This file is part of SyncFiles. SyncFiles is licensed under the terms of the
// Mozilla Public License, version 2.0. See LICENSE.txt for details.
// convert_1f.c - Forward conversion from extended ASCII to UTF-8.
#include "convert/convert.h"
#include "lib/defs.h"
struct Convert1fData {
// Unicode characters, encoded in UTF-8, and packed MSB first. Always either
// 2 bytes or 3 bytes.
UInt32 chars[128];
};
struct Convert1fState {
UInt8 lastch;
};
ErrorCode Convert1fBuild(Handle *out, Handle data, Size datasz)
{
Handle h;
struct Convert1fData *cvt;
int i, n;
UInt32 uch;
const UInt8 *dptr, *dend;
h = NewHandle(sizeof(struct Convert1fData));
if (h == NULL) {
return kErrorNoMemory;
}
cvt = (void *)*h;
dptr = (void *)*data;
dptr++;
dend = dptr + datasz;
for (i = 0; i < 128; i++) {
if (dptr == dend) {
goto bad_table;
}
n = *dptr++;
if (n < 2 || 3 < n) {
goto bad_table;
}
if (dend - dptr < n) {
goto bad_table;
}
uch = 0;
while (n-- > 0) {
uch = (uch << 8) | *dptr++;
}
cvt->chars[i] = uch;
if (dptr == dend) {
goto bad_table;
}
n = *dptr++;
if (dend - dptr < n) {
goto bad_table;
}
dptr += n;
}
*out = h;
return 0;
bad_table:
DisposeHandle(h);
return kErrorBadData;
}
void Convert1fRun(const void *cvtptr, LineBreakConversion lc,
struct ConverterState *stateptr, UInt8 **optr, UInt8 *oend,
const UInt8 **iptr, const UInt8 *iend)
{
const struct Convert1fData *cvt = cvtptr;
struct Convert1fState *state = (struct Convert1fState *)stateptr;
UInt8 *opos = *optr;
const UInt8 *ipos = *iptr;
unsigned ch, lastch;
UInt32 uch;
ch = state->lastch;
while (ipos < iend && oend - opos >= 3) {
lastch = ch;
ch = *ipos++;
if (ch < 128) {
if (ch == kCharLF || ch == kCharCR) {
// Line breaks.
if (ch == kCharLF && lastch == kCharCR) {
if (lc == kLineBreakKeep) {
*opos++ = ch;
}
} else {
switch (lc) {
case kLineBreakKeep:
*opos++ = ch;
break;
case kLineBreakLF:
*opos++ = kCharLF;
break;
case kLineBreakCR:
*opos++ = kCharCR;
break;
case kLineBreakCRLF:
*opos++ = kCharCR;
*opos++ = kCharLF;
break;
}
}
} else {
// ASCII characters.
*opos++ = ch;
}
} else {
// Unicode characters.
uch = cvt->chars[ch - 128];
if (uch > 0xffff) {
opos[0] = uch >> 16;
opos[1] = uch >> 8;
opos[2] = uch;
opos += 3;
} else {
opos[0] = uch >> 8;
opos[1] = uch;
opos += 2;
}
}
}
state->lastch = ch;
*optr = opos;
*iptr = ipos;
}