ciderpress/reformat/AppleWorks.cpp

1167 lines
35 KiB
C++

/*
* CiderPress
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Convert AppleWorks 3.0 documents.
*/
#include "StdAfx.h"
#include "AppleWorks.h"
/*
* ===========================================================================
* AppleWorks WP
* ===========================================================================
*/
/*
* AppleWorks word processor file format, from FTN.1A.xxxx.
*
* The overall file format is:
*
* file header
* array of line records
* $ff $ff
* optional tags
*
* AppleWorks 5.0 introduced inverse and MouseText characters.
* These just use previously-unused byte ranges. The full set
* of values is thus:
* 00-1f special
* 20-7f plain ASCII
* 80-9f inverse upper (map to 40-5f)
* a0-bf inverse symbols/numbers (map to 20-3f)
* c0-df MouseText
* e0-ff inverse lower (map to 60-7f)
*
* We can output MouseText as Unicode symbols. Inverse text can use the Rich
* Text "highlight" feature; the "background color" feature doesn't seem to
* have any effect.
*/
/*
* Decide whether or not we want to handle this file.
*/
void ReformatAWP::Examine(ReformatHolder* pHolder)
{
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
if (pHolder->GetFileType() == kTypeAWP)
applies = ReformatHolder::kApplicProbably;
pHolder->SetApplic(ReformatHolder::kReformatAWP, applies,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
/*
* Reformat an AppleWorks WP document.
*/
int ReformatAWP::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
bool skipRecord;
uint8_t lineRecCode, lineRecData;
fMouseTextToASCII = pHolder->GetOption(ReformatHolder::kOptMouseTextToASCII) != 0;
if (srcLen > 65536)
fUseRTF = false;
//fUseRTF = false;
//fShowEmbeds = false;
/* expect header plus EOF bytes at least */
if (srcLen <= kFileHeaderSize) {
LOGI(" AWP truncated?");
goto bail;
}
RTFBegin(kRTFFlagColorTable);
/*
* Grab the file header.
*/
assert(sizeof(fFileHeader) == kFileHeaderSize);
memcpy(&fFileHeader, srcPtr, sizeof(fFileHeader));
srcPtr += sizeof(fFileHeader);
length -= sizeof(fFileHeader);
/* do some quick sanity checks */
if (fFileHeader.seventyNine != kSeventyNine) {
LOGI("ERROR: expected %d in signature byte, found %d",
kSeventyNine, fFileHeader.seventyNine);
goto bail;
}
if (fFileHeader.sfMinVers && fFileHeader.sfMinVers != kSFMinVers30) {
LOGI("WARNING: unexpected value %d for sfMinVers",
fFileHeader.sfMinVers);
/* keep going */
}
InitDocState();
/* if first line record is invalid, skip it */
skipRecord = false;
if (fFileHeader.sfMinVers == kSFMinVers30)
skipRecord = true;
/* set margins to 1.0 inches at 10cpi */
RTFLeftMargin(10);
RTFRightMargin(10);
/*
* Read the line records.
*/
while (1) {
if (length < 0) {
LOGI(" AWP truncated file");
goto bail;
}
lineRecData = Read8(&srcPtr, &length);
if (length < 0) {
LOGI(" AWP truncated file");
goto bail;
}
lineRecCode = Read8(&srcPtr, &length);
if (length < 0) {
LOGI(" AWP truncated file");
goto bail;
}
if (skipRecord) {
skipRecord = false;
continue;
}
/* end of data reached? */
if (lineRecData == kEOFMarker && lineRecCode == kEOFMarker)
break;
if (ProcessLineRecord(lineRecData, lineRecCode, &srcPtr, &length) != 0)
{
LOGI("ProcessLineRecord failed, bailing");
goto bail;
}
}
/*
* Read the optional tags.
*/
/* (nah) */
RTFEnd();
SetResultBuffer(pOutput);
retval = 0;
bail:
return retval;
}
/*
* Initialize the DocState structure.
*/
void ReformatAWP::InitDocState(void)
{
memset(&fDocState, 0, sizeof(fDocState));
fDocState.line = 1;
}
/*
* Process a line record.
*/
int ReformatAWP::ProcessLineRecord(uint8_t lineRecData, uint8_t lineRecCode,
const uint8_t** pSrcPtr, long* pLength)
{
int err = 0;
//LOGI(" AWP line rec <0x%02x><0x%02x>", lineRecCode, lineRecData);
if (lineRecCode == kLineRecordCarriageReturn) {
/* ignore the horizontal offset for now */
RTFNewPara();
} else if (lineRecCode == kLineRecordText) {
if (pLength > 0)
err = HandleTextRecord(lineRecData, pSrcPtr, pLength);
else
err = -1;
} else if (lineRecCode >= kLineRecordCommandMin &&
lineRecCode <= kLineRecordCommandMax)
{
switch (lineRecCode) {
case kLineRecordCommandCenter:
RTFParaCenter();
break;
case kLineRecordCommandRightJustify:
RTFParaRight();
break;
case kLineRecordCommandUnjustify:
RTFParaLeft();
break;
case kLineRecordCommandJustify:
RTFParaJustify();
break;
case kLineRecordCommandLeftMargin:
RTFLeftMargin(lineRecData);
break;
case kLineRecordCommandRightMargin:
RTFRightMargin(lineRecData);
break;
/* we handle these by showing them in the text */
case kLineRecordCommandPageNumber:
if (fShowEmbeds) {
RTFSetColor(kColorBlue);
BufPrintf("<set-page-number %d>", lineRecData);
RTFSetColor(kColorNone);
RTFNewPara();
}
break;
case kLineRecordCommandPageHeader:
if (fShowEmbeds) {
RTFSetColor(kColorBlue);
BufPrintf("<page-header>");
RTFSetColor(kColorNone);
RTFNewPara();
}
break;
case kLineRecordCommandPageHeaderEnd:
if (fShowEmbeds) {
RTFSetColor(kColorBlue);
BufPrintf("</page-header>");
RTFSetColor(kColorNone);
RTFNewPara();
}
break;
case kLineRecordCommandPageFooter:
if (fShowEmbeds) {
RTFSetColor(kColorBlue);
BufPrintf("<page-footer>");
RTFSetColor(kColorNone);
RTFNewPara();
}
break;
case kLineRecordCommandPageFooterEnd:
if (fShowEmbeds) {
RTFSetColor(kColorBlue);
BufPrintf("</page-footer>");
RTFSetColor(kColorNone);
RTFNewPara();
}
break;
case kLineRecordCommandNewPage:
if (fUseRTF)
RTFPageBreak();
else if (fShowEmbeds) {
RTFSetColor(kColorBlue); // won't do anything
BufPrintf("<page-break>");
RTFSetColor(kColorNone);
}
break;
case kLineRecordCommandPlatenWidth:
case kLineRecordCommandCharsPerInch:
case kLineRecordCommandProportional1:
case kLineRecordCommandProportional2:
case kLineRecordCommandIndent:
case kLineRecordCommandPaperLength:
case kLineRecordCommandTopMargin:
case kLineRecordCommandBottomMargin:
case kLineRecordCommandLinesPerInch:
case kLineRecordCommandSingleSpace:
case kLineRecordCommandDoubleSpace:
case kLineRecordCommandTripleSpace:
case kLineRecordCommandGroupBegin:
case kLineRecordCommandGroupEnd:
case kLineRecordCommandSkipLines:
case kLineRecordCommandPauseEachPage:
case kLineRecordCommandPauseHere:
case kLineRecordCommandSetMarker:
case kLineRecordCommandSetPageNumber:
case kLineRecordCommandPageBreak:
case kLineRecordCommandPageBreak256:
case kLineRecordCommandPageBreakPara:
case kLineRecordCommandPageBreakPara256:
default:
LOGD(" AWP cmd <0x%02x><0x%02x>", lineRecCode, lineRecData);
break;
}
} else {
/* bad command */
LOGW("WARNING: unrecognized code 0x%02x at 0x%p", lineRecCode,
*pSrcPtr);
fDocState.softFailures++;
if (fDocState.softFailures > kMaxSoftFailures) {
LOGE("ERROR: too many failures, giving up");
err = -1;
}
}
return err;
}
/*
* Handle a text record. The first two bytes are flags, the rest is
* either the text or a ruler. Special codes may be embedded in the text.
*
* "lineRecData" has the number of bytes of input that we have yet to read.
*/
int ReformatAWP::HandleTextRecord(uint8_t lineRecData,
const uint8_t** pSrcPtr, long* pLength)
{
int err = 0;
uint8_t tabFlags;
uint8_t byteCountPlusCR;
int byteCount = lineRecData;
bool noOutput = false;
bool inverse = false;
int ic;
tabFlags = Read8(pSrcPtr, pLength);
byteCount--;
byteCountPlusCR = Read8(pSrcPtr, pLength);
byteCount--;
if (*pLength < 0) {
err = -1;
goto bail;
}
if (byteCount <= 0) {
LOGI("WARNING: line %ld: short line (%d)",
fDocState.line, byteCount);
/* this is bad, but keep going anyway */
}
if ((byteCountPlusCR & ~kCRatEOL) != byteCount) {
LOGI("WARNING: line %ld: byteCount now %d, offset 3 count %d",
fDocState.line, byteCount, byteCountPlusCR & ~kCRatEOL);
/* not sure why this would legally happen */
}
if (tabFlags == kTabFlagsIsRuler)
noOutput = true;
while ((*pLength > 0) && (byteCount--)) {
ic = Read8(pSrcPtr, pLength);
if (*pLength < 0) {
err = -1;
goto bail;
}
if (noOutput)
continue;
if (ic < kMinTextChar) {
switch (ic) {
case kSpecialCharBoldBegin:
RTFBoldOn();
break;
case kSpecialCharBoldEnd:
RTFBoldOff();
break;
case kSpecialCharSuperscriptBegin:
RTFSuperscriptOn();
break;
case kSpecialCharSuperscriptEnd:
RTFSuperscriptOff();
break;
case kSpecialCharSubscriptBegin:
RTFSubscriptOn();
break;
case kSpecialCharSubscriptEnd:
RTFSubscriptOff();
break;
case kSpecialCharUnderlineBegin:
RTFUnderlineOn();
break;
case kSpecialCharUnderlineEnd:
RTFUnderlineOff();
break;
case kSpecialCharEnterKeyboard:
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("<kdb-entry>");
RTFSetColor(oldColor);
}
break;
case kSpecialCharPrintPageNumber:
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("<page#>");
RTFSetColor(oldColor);
}
break;
case kSpecialCharStickySpace:
/* MSWord uses "\~", but RichEdit ignores that */
BufPrintf("\u00a0"); // Unicode NO-BREAK SPACE
break;
case kSpecialCharMailMerge:
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("<mail-merge>");
RTFSetColor(oldColor);
}
case kSpecialCharPrintDate:
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("<date>");
RTFSetColor(oldColor);
}
break;
case kSpecialCharPrintTime:
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("<time>");
RTFSetColor(oldColor);
}
break;
case kSpecialCharTab:
if (fUseRTF)
RTFTab();
else
BufPrintf("\t");
break;
case kSpecialCharTabFill:
/* tab fill char, not vis in doc */
BufPrintf(" ");
break;
default:
LOGI(" AWP unhandled special char 0x%02x", ic);
if (fShowEmbeds) {
TextColor oldColor = RTFSetColor(kColorBlue);
BufPrintf("^");
RTFSetColor(oldColor);
}
}
} else {
// Character.
bool wantInverse = false;
uint16_t mtLow = 0, mtHigh = 0;
if (ic >= 0x80 && ic <= 0x9f) {
// inverse upper; map 100x xxxx --> 010x xxxx
ic ^= 0xc0;
wantInverse = true;
} else if (ic >= 0xa0 && ic <= 0xbf || ic >= 0xe0 && ic <= 0xff) {
// inverse symbols; map 101x xxxx --> 001x xxxx
// inverse lower; map 111x xxxx --> 011x xxxx
ic ^= 0x80;
wantInverse = true;
} else if (ic >= 0xc0 && ic <= 0xdf) {
// MouseText characters
if (fMouseTextToASCII) {
ic = MouseTextToASCII(ic & 0x1f);
} else {
MouseTextToUTF16(ic & 0x1f, &mtLow, &mtHigh);
ic = '?';
}
} else {
// plain ASCII
}
if (wantInverse && !inverse) {
inverse = true;
RTFInverseOn();
} else if (!wantInverse && inverse) {
inverse = false;
RTFInverseOff();
}
if (fUseRTF) {
if (mtLow != 0) {
if (mtHigh != 0) {
RTFPrintUTF16Char(mtHigh);
}
RTFPrintUTF16Char(mtLow);
} else {
RTFPrintChar(ic);
}
} else {
// Plain text output.
BufPrintf("%c", PrintableChar(ic));
}
}
}
if (inverse) {
RTFInverseOff();
}
/* if there's a carriage return at the end of the line, output it now */
if (byteCountPlusCR & kCRatEOL) {
RTFNewPara();
}
/* another line processed, advance the line counter */
fDocState.line++;
bail:
return err;
}
/*
* ===========================================================================
* AppleWorks DB
* ===========================================================================
*/
/*
* AppleWorks database file format, from FTN.19.xxxx.
*
* The overall file format is:
*
* variable-sized file header
* 0 to 8 (0 to 30 in 3.0) report records, 600 bytes each
* variable-sized data records
* $ff $ff
* optional tags
*/
/*
* Decide whether or not we want to handle this file.
*/
void ReformatADB::Examine(ReformatHolder* pHolder)
{
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
if (pHolder->GetFileType() == kTypeADB)
applies = ReformatHolder::kApplicProbably;
pHolder->SetApplic(ReformatHolder::kReformatADB, applies,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
/*
* Reformat an AppleWorks DB document.
*/
int ReformatADB::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
int headerLen, numCats, numRecs, numReports;
fUseRTF = false;
/* expect header plus EOF bytes at least */
if (srcLen <= kMinHeaderLen) {
LOGI(" ADB truncated?");
goto bail;
}
headerLen = Get16LE(srcPtr);
if (headerLen < kMinHeaderLen || headerLen > length) {
LOGI(" ADB bad headerLen %d, file len is %d", headerLen,
srcLen);
goto bail;
}
RTFBegin();
/* offset +035: #of categories in file */
numCats = *(srcPtr + 35);
if (numCats < 1 || numCats > 0x1e) {
LOGI(" ADB GLITCH: unexpected numCats %d", numCats);
/* keep going... */
}
LOGI(" ADB should be %d categories", numCats);
/* offset +036-037: #of records in file */
numRecs = Get16LE(srcPtr + 36) & 0x7fff;
LOGI(" ADB should be %d records", numRecs);
/* offset +038: #of reports in file */
numReports = *(srcPtr + 38);
LOGI(" ADB should be %d reports", numReports);
/* dump category names as first record */
const uint8_t* catPtr;
int catCount;
catPtr = srcPtr + 357;
catCount = numCats;
while (catCount--) {
if (catCount == numCats-1)
BufPrintf("\"");
else
BufPrintf(",\"");
int nameLen = *catPtr;
const uint8_t* namePtr = catPtr + 1;
while (nameLen--) {
if (*namePtr == '"')
BufPrintf("\"\"");
else
BufPrintf("%c", *namePtr);
namePtr++;
}
BufPrintf("\"");
catPtr += kCatNameLen+2;
}
BufPrintf("\r\n");
/*
* Advance pointer to first data record. The first record contains
* "standard values".
*
* Each record looks like this:
* $00-$01: count of bytes in remainder of record
* $02 : control byte, one of:
* $01-$7f: number of following bytes for this category
* $81-$9e: this (minus $80) is #of categories to skip
* $ff : end of record
*
* The data within the categories may have special meanings, e.g. if it
* starts with $c0 it's a date record and $d4 is a time record.
*/
int offsetToData;
offsetToData = kOffsetToFirstCatHeader +
numCats*kCatHeaderLen + numReports*kReportRecordLen;
LOGI(" ADB data records begin at offset 0x%08lx", offsetToData);
if (offsetToData >= length) {
LOGI(" ADB GLITCH: offset >= length %ld", length);
goto bail;
}
srcPtr += offsetToData;
length -= offsetToData;
int rr;
for (rr = 0; rr < numRecs && length > 0; rr++) {
int recordRem = Read16(&srcPtr, &length);
if (rr == 0) {
/* skip first record */
srcPtr += recordRem;
length -= recordRem;
if (*(srcPtr-1) != 0xff) {
LOGI(" ADB GLITCH: first record skipped past 0x%02x",
*(srcPtr-1));
/* keep going, I guess */
}
continue;
}
int catNum = 0;
/* scan through all categories in this record */
int ctrl = Read8(&srcPtr, &length);
while (ctrl != 0xff && length > 0) {
if (ctrl >= 0x01 && ctrl <= 0x7f) {
/* just data */
if (catNum == 0)
BufPrintf("\"");
else
BufPrintf(",\"");
if (*srcPtr == 0xc0) {
static const char kMonths[12][4] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
/* date entry */
Read8(&srcPtr, &length); // throw out the 0xc0
char year[2], month, day[2];
year[0] = Read8(&srcPtr, &length);
year[1] = Read8(&srcPtr, &length);
month = Read8(&srcPtr, &length);
day[0] = Read8(&srcPtr, &length);
day[1] = Read8(&srcPtr, &length);
if (day[0] == ' ')
day[0] = '0';
BufPrintf("%c%c-%s-%s%c%c",
day[0], day[1],
month >= 'A' && month <= 'L' ? kMonths[month-'A'] : "???",
year[0] < '7' ? "20" : "19", year[0], year[1]);
} else if (*srcPtr == 0xd4) {
/* time entry */
Read8(&srcPtr, &length); // throw out the 0xd4
char hour, minute[2];
hour = Read8(&srcPtr, &length);
minute[0] = Read8(&srcPtr, &length);
minute[1] = Read8(&srcPtr, &length);
if (hour >= 'A' && hour < 'M') {
if (hour == 'A') // don't show 00:00
hour = 'A' + 12;
BufPrintf("%02d:%c%c AM",
hour - 'A', minute[0], minute[1]);
} else if (hour >= 'M' && hour <= 'X') {
if (hour == 'M') // don't show 00:00
hour = 'M' + 12;
BufPrintf("%02d:%c%c PM",
hour - 'M', minute[0], minute[1]);
}
} else {
while (ctrl--) {
uint8_t ch = Read8(&srcPtr, &length);
BufPrintf("%c", ch);
if (ch == '"')
BufPrintf("%c", ch);
}
}
BufPrintf("\"");
} else if (ctrl >= 0x81 && ctrl <= 0x9e) {
/* skip over empty categories */
ctrl -= 0x80;
while (ctrl--) {
BufPrintf(",");
catNum++;
}
catNum--; // don't double-count this category
} else {
LOGI(" ADB GLITCH: invalid ctrl byte 0x%02x", ctrl);
break;
/* keep going anyway? */
}
catNum++;
ctrl = Read8(&srcPtr, &length);
}
while (catNum < numCats) {
BufPrintf(",");
catNum++;
}
/* end of record */
RTFNewPara();
}
LOGI(" ADB at exit rr=%d numRecs=%d", rr, numRecs);
int checkEnd;
checkEnd = Read16(&srcPtr, &length);
if (checkEnd != 0xffff) {
LOGI(" ADB GLITCH: last read returned 0x%04x", checkEnd);
} else {
LOGI(" ADB found EOF; success");
}
/*
* Read the optional tags.
*/
/* (nah) */
RTFEnd();
SetResultBuffer(pOutput);
pOutput->SetOutputKind(ReformatOutput::kOutputCSV);
retval = 0;
bail:
return retval;
}
/*
* ===========================================================================
* AppleWorks SS
* ===========================================================================
*/
/*
* AppleWorks spreadsheet file format, from FTN.1b.xxxx.
*
* The overall file format is:
*
* fixed-sized file header
* series of variable-length row records
* collection of cell data
* $ff $ff
* optional tags
*
* The cell data can take several different forms.
*/
/*
* Decide whether or not we want to handle this file.
*/
void ReformatASP::Examine(ReformatHolder* pHolder)
{
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
if (pHolder->GetFileType() == kTypeASP)
applies = ReformatHolder::kApplicProbably;
pHolder->SetApplic(ReformatHolder::kReformatASP, applies,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
/*
* Reformat an AppleWorks SS document.
*/
int ReformatASP::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
const FileHeader* pFileHeader;
bool aw30flag;
ASSERT(sizeof(FileHeader) == kFileHeaderSize);
fUseRTF = false;
/* must at least have the header */
if (length < kFileHeaderSize) {
LOGI(" ADB truncated?");
goto bail;
}
RTFBegin();
pFileHeader = (const FileHeader*) srcPtr;
aw30flag = false;
if (pFileHeader->ssMinVers != 0)
aw30flag = true;
LOGI(" ASP ssMinVers=0x%02x, aw30flag=%d",
pFileHeader->ssMinVers, aw30flag);
/*
* Advance pointer past file header. v3.0 adds a couple of extra bytes
* right after the end of the header that the FTN says we should just
* ignore.
*/
srcPtr += kFileHeaderSize;
length -= kFileHeaderSize;
if (aw30flag) {
srcPtr += 2;
length -= 2;
}
/*
* Loop through the file, reading one row at a time. There is no row
* count because spreadsheets are sparse.
*
* We assume that rows are stored from top to bottom. The count begins
* with 1, not zero.
*/
fCurrentRow = 1;
while (length > 0) {
uint16_t rowLen;
int rowNum;
/* row length or EOF marker */
rowLen = Read16(&srcPtr, &length);
if (rowLen == 0xffff) {
LOGI(" ASP found EOF marker, we're done");
break;
}
rowNum = Read16(&srcPtr, &length);
//LOGI(" ASP process row %d (cur=%d)", rowNum, currentRow);
/* fill out empty rows */
ASSERT(fCurrentRow <= rowNum);
while (fCurrentRow < rowNum) {
BufPrintf("\"\"\r\n");
fCurrentRow++;
}
if (ProcessRow(rowNum, &srcPtr, &length) != 0)
break;
fCurrentRow++;
}
/*
* Read the optional tags.
*/
/* (nah) */
RTFEnd();
SetResultBuffer(pOutput);
pOutput->SetOutputKind(ReformatOutput::kOutputCSV);
retval = 0;
bail:
return retval;
}
/*
* Process one row of spreadsheet data.
*/
int ReformatASP::ProcessRow(int rowNum, const uint8_t** pSrcPtr, long* pLength)
{
uint8_t ctrl;
bool first = true;
fCurrentCol = 0;
while (*pLength > 0) {
ctrl = Read8(pSrcPtr, pLength);
if (ctrl >= 0x01 && ctrl <= 0x7f) {
if (!first)
BufPrintf(",");
else
first = false;
/* read cell entry contents */
if (ctrl > *pLength) {
LOGI(" ASP GLITCH: cell len exceeds file len (%d %d)",
*pLength, ctrl);
break;
}
ProcessCell(*pSrcPtr, ctrl);
(*pSrcPtr) += ctrl;
*pLength -= ctrl;
} else if (ctrl >= 0x81 && ctrl <= 0xfe) {
/* skip this many columns */
if (!first)
BufPrintf(",");
else
first = false;
ctrl -= 0x80;
ctrl--;
while (ctrl--) {
BufPrintf(",");
fCurrentCol++;
}
} else if (ctrl == 0xff) {
/* end of row */
break;
} else {
/* unexpected 0x00 or 0x80 */
LOGI(" ASP GLITCH: unexpected ctrl byte 0x%02x", ctrl);
break;
}
fCurrentCol++;
}
BufPrintf("\r\n");
return 0;
}
/*
* Process the contents of a single cell.
*/
void ReformatASP::ProcessCell(const uint8_t* srcPtr, long cellLength)
{
uint8_t flag1, flag2;
double dval;
int i;
BufPrintf("\"");
flag1 = *srcPtr++;
cellLength--;
if (flag1 & 0x80) { /* bit 7 set? */
/* this is a value, not a label */
flag2 = *srcPtr++;
cellLength--;
if (flag1 & 0x20) { /* bit 5 set? */
/* this is a "value constant" */
dval = ConvertSANEDouble(srcPtr);
BufPrintf("%f", dval);
} else {
if (flag2 & 0x08) { /* bit 3 set? */
/* this is a "value label", AW30+ only */
/* skip over cached string result */
if (*srcPtr >= cellLength) {
LOGI(" ASP GLITCH: invalid value label str len");
BufPrintf("GLITCH");
} else {
srcPtr += *srcPtr +1;
/* output tokens */
while (cellLength > 0)
PrintToken(&srcPtr, &cellLength);
}
} else {
/* this is a "value formula" */
//dval = ConvertSANEDouble(srcPtr);
/* skip over cached computation result */
if (cellLength <= kSANELen) {
LOGI(" ASP GLITCH: invalid value formula len");
BufPrintf("GLITCH");
} else {
srcPtr += kSANELen;
cellLength -= kSANELen;
/* [tokens] */
while (cellLength > 0)
PrintToken(&srcPtr, &cellLength);
}
}
}
} else {
/* this is a label, not a value */
if (flag1 & 0x20) { /* bit 5 set? */
/* propagated label cell */
for (i = 0; i < kPropCount; i++)
BufPrintQChar(*srcPtr);
} else {
/* regular label cell */
for (i = 0; i < cellLength; i++)
BufPrintQChar(srcPtr[i]);
}
}
BufPrintf("\"");
}
/*
* Print the AppleWorks SS token pointed to by srcPtr. Some tokens require
* several bytes to express.
*/
void ReformatASP::PrintToken(const uint8_t** pSrcPtr, long* pLength)
{
/* string constants; note these must NOT contain '"' chars */
const int kTokenStart = 0xc0;
const char* tokenTable[0x100-kTokenStart] = {
/*0xc0*/ "@Deg", "@Rad", "@Pi", "@True",
/*0xc4*/ "@False", "@Not", "@IsBlank", "@IsNA",
/*0xc8*/ "@IsError", "@Exp", "@Ln", "@Log",
/*0xcc*/ "@Cos", "@Sin", "@Tan", "@ACos",
/*0xd0*/ "@ASin", "@ATan2", "@ATan", "@Mod",
/*0xd4*/ "@FV", "@PV", "@PMT", "@Term",
/*0xd8*/ "@Rate", "@Round", "@Or", "@And",
/*0xdc*/ "@Sum", "@Avg", "@Choose", "@Count",
/*0xe0*/ "@Error", "@IRR", "@If", "@Int",
/*0xe4*/ "@Lookup", "@Max", "@Min", "@NA",
/*0xe8*/ "@NPV", "@Sqrt", "@Abs", "",
/*0xec*/ "<>", ">=", "<=", "=",
/*0xf0*/ ">", "<", ",", "^",
/*0xf4*/ ")", "-", "+", "/",
/*0xf8*/ "*", "(", "-" /*unary*/, "+" /*unary*/,
/*0xfc*/ "...", "", "", ""
};
uint8_t token;
token = Read8(pSrcPtr, pLength);
if (token < kTokenStart) {
LOGI(" ASP GLITCH: funky token 0x%02x", token);
return;
}
BufPrintf("%s", tokenTable[token - kTokenStart]);
if (token == 0xe0 || token == 0xe7) {
/* @Error and @NA followed by three zero bytes */
if (*pLength < 3) {
LOGI(" ASP GLITCH: ran off end processing tokens");
return;
}
*pSrcPtr += 3;
*pLength -= 3;
} else if (token == 0xfd) {
/* SANE double number */
if (*pLength < 8) {
LOGI(" ASP GLITCH: not enough left to grab a SANE");
return;
}
double dval = ConvertSANEDouble(*pSrcPtr);
BufPrintf("%f", dval);
*pSrcPtr += 8;
*pLength -= 8;
} else if (token == 0xfe) {
/* row, column reference (relative to current cell) */
int row, col;
col = Read8(pSrcPtr, pLength);
if (col >= 128)
col -= 256;
row = Read16(pSrcPtr, pLength);
if (row >= 32768)
row -= 65536;
BufPrintf("%s%d", PrintCol(fCurrentCol+col), fCurrentRow+row);
} else if (token == 0xff) {
/* Pascal string */
int i;
i = Read8(pSrcPtr, pLength);
if (i > *pLength) {
LOGI(" ASP GLITCH: string exceeds cell len");
return;
}
while (i--) {
uint8_t ch = Read8(pSrcPtr, pLength);
BufPrintQChar(ch);
}
}
}
/*
* Format the current column number into something like 'A' or 'BA'. This
* stores the value in fPrintColBuf and returns a pointer to it.
*/
const char* ReformatASP::PrintCol(int col)
{
if (col < 0 || col >= 702) {
LOGI(" ASP can't PrintCol(%d)", col);
fPrintColBuf[0] = fPrintColBuf[1] = '?';
fPrintColBuf[2] = '\0';
} else if (col < 26) {
fPrintColBuf[0] = 'A' + col;
fPrintColBuf[1] = '\0';
} else {
fPrintColBuf[0] = 'A' + col / 26;
fPrintColBuf[1] = 'A' + col % 26;
fPrintColBuf[2] = '\0';
}
return fPrintColBuf;
}
/*
* Convert a 64-bit SANE double to an x86 double. The format is the same as
* IEEE 754, which happily is the same used by the VC++6.0 compiler.
*
* Info from http://www.cs.trinity.edu/About/The_Courses/cs2322/ieee-fp.html
* (also http://www.psc.edu/general/software/packages/ieee/ieee.html).
*
* -----
* The 64-bit double format is divided into three fields as shown below:
*
* 1 11 52
* +-------------------------------------+
* | s | e | f |
* +-------------------------------------+
*
* The value v of the number is determined by these fields as shown in
* the following table:
*
* Values of double-format numbers (64 bits)
* ___________________________________________________________
* e f v class of v
* ___________________________________________________________
* 0<e<2047 (any) v=(-1)s x 2(e-1023) x (1.f) normalized
* e=0 f!=0 v=(-1)s x 2(e-1022) x (0.f) denormalized
* e=0 f=0 v=(-1)s x 0 zero
* e=2047 f=0 v=(-1)s x infinity infinity
* e=2047 f!=0 v is a NaN NaN
*
* For example, the double representation (in hex notation) of 1.5 is
* 3FF8000000000000
* is
* 3F847AE147AE147A
* -----
*/
double ReformatASP::ConvertSANEDouble(const uint8_t* srcPtr)
{
double newVal;
uint8_t* dptr;
int i;
ASSERT(sizeof(newVal) == kSANELen);
dptr = (uint8_t*) &newVal;
for (i = 0; i < kSANELen; i++)
*dptr++ = *srcPtr++;
return newVal;
}