mirror of
https://github.com/fadden/ciderpress.git
synced 2024-11-29 20:49:27 +00:00
2adbe9591f
The AWP5 MouseText output is okay for some things but not others. This adds an ASCII conversion, enabled through a preference.
1167 lines
35 KiB
C++
1167 lines
35 KiB
C++
/*
|
|
* CiderPress
|
|
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
|
|
* See the file LICENSE for distribution terms.
|
|
*/
|
|
/*
|
|
* Convert AppleWorks 3.0 documents.
|
|
*/
|
|
#include "StdAfx.h"
|
|
#include "AppleWorks.h"
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* AppleWorks WP
|
|
* ===========================================================================
|
|
*/
|
|
|
|
/*
|
|
* AppleWorks word processor file format, from FTN.1A.xxxx.
|
|
*
|
|
* The overall file format is:
|
|
*
|
|
* file header
|
|
* array of line records
|
|
* $ff $ff
|
|
* optional tags
|
|
*
|
|
* AppleWorks 5.0 introduced inverse and MouseText characters.
|
|
* These just use previously-unused byte ranges. The full set
|
|
* of values is thus:
|
|
* 00-1f special
|
|
* 20-7f plain ASCII
|
|
* 80-9f inverse upper (map to 40-5f)
|
|
* a0-bf inverse symbols/numbers (map to 20-3f)
|
|
* c0-df MouseText
|
|
* e0-ff inverse lower (map to 60-7f)
|
|
*
|
|
* We can output MouseText as Unicode symbols. Inverse text can use the Rich
|
|
* Text "highlight" feature; the "background color" feature doesn't seem to
|
|
* have any effect.
|
|
*/
|
|
|
|
/*
|
|
* Decide whether or not we want to handle this file.
|
|
*/
|
|
void ReformatAWP::Examine(ReformatHolder* pHolder)
|
|
{
|
|
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
|
|
|
|
if (pHolder->GetFileType() == kTypeAWP)
|
|
applies = ReformatHolder::kApplicProbably;
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatAWP, applies,
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
}
|
|
|
|
/*
|
|
* Reformat an AppleWorks WP document.
|
|
*/
|
|
int ReformatAWP::Process(const ReformatHolder* pHolder,
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
ReformatOutput* pOutput)
|
|
{
|
|
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
long length = srcLen;
|
|
int retval = -1;
|
|
|
|
bool skipRecord;
|
|
uint8_t lineRecCode, lineRecData;
|
|
|
|
fMouseTextToASCII = pHolder->GetOption(ReformatHolder::kOptMouseTextToASCII) != 0;
|
|
|
|
if (srcLen > 65536)
|
|
fUseRTF = false;
|
|
|
|
//fUseRTF = false;
|
|
//fShowEmbeds = false;
|
|
|
|
/* expect header plus EOF bytes at least */
|
|
if (srcLen <= kFileHeaderSize) {
|
|
LOGI(" AWP truncated?");
|
|
goto bail;
|
|
}
|
|
|
|
RTFBegin(kRTFFlagColorTable);
|
|
|
|
/*
|
|
* Grab the file header.
|
|
*/
|
|
assert(sizeof(fFileHeader) == kFileHeaderSize);
|
|
|
|
memcpy(&fFileHeader, srcPtr, sizeof(fFileHeader));
|
|
srcPtr += sizeof(fFileHeader);
|
|
length -= sizeof(fFileHeader);
|
|
|
|
/* do some quick sanity checks */
|
|
if (fFileHeader.seventyNine != kSeventyNine) {
|
|
LOGI("ERROR: expected %d in signature byte, found %d",
|
|
kSeventyNine, fFileHeader.seventyNine);
|
|
goto bail;
|
|
}
|
|
if (fFileHeader.sfMinVers && fFileHeader.sfMinVers != kSFMinVers30) {
|
|
LOGI("WARNING: unexpected value %d for sfMinVers",
|
|
fFileHeader.sfMinVers);
|
|
/* keep going */
|
|
}
|
|
|
|
InitDocState();
|
|
|
|
/* if first line record is invalid, skip it */
|
|
skipRecord = false;
|
|
if (fFileHeader.sfMinVers == kSFMinVers30)
|
|
skipRecord = true;
|
|
|
|
/* set margins to 1.0 inches at 10cpi */
|
|
RTFLeftMargin(10);
|
|
RTFRightMargin(10);
|
|
|
|
/*
|
|
* Read the line records.
|
|
*/
|
|
while (1) {
|
|
if (length < 0) {
|
|
LOGI(" AWP truncated file");
|
|
goto bail;
|
|
}
|
|
lineRecData = Read8(&srcPtr, &length);
|
|
if (length < 0) {
|
|
LOGI(" AWP truncated file");
|
|
goto bail;
|
|
}
|
|
lineRecCode = Read8(&srcPtr, &length);
|
|
|
|
if (length < 0) {
|
|
LOGI(" AWP truncated file");
|
|
goto bail;
|
|
}
|
|
|
|
if (skipRecord) {
|
|
skipRecord = false;
|
|
continue;
|
|
}
|
|
|
|
/* end of data reached? */
|
|
if (lineRecData == kEOFMarker && lineRecCode == kEOFMarker)
|
|
break;
|
|
|
|
if (ProcessLineRecord(lineRecData, lineRecCode, &srcPtr, &length) != 0)
|
|
{
|
|
LOGI("ProcessLineRecord failed, bailing");
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Read the optional tags.
|
|
*/
|
|
/* (nah) */
|
|
|
|
RTFEnd();
|
|
|
|
SetResultBuffer(pOutput);
|
|
retval = 0;
|
|
|
|
bail:
|
|
return retval;
|
|
}
|
|
|
|
/*
|
|
* Initialize the DocState structure.
|
|
*/
|
|
void ReformatAWP::InitDocState(void)
|
|
{
|
|
memset(&fDocState, 0, sizeof(fDocState));
|
|
fDocState.line = 1;
|
|
}
|
|
|
|
/*
|
|
* Process a line record.
|
|
*/
|
|
int ReformatAWP::ProcessLineRecord(uint8_t lineRecData, uint8_t lineRecCode,
|
|
const uint8_t** pSrcPtr, long* pLength)
|
|
{
|
|
int err = 0;
|
|
|
|
//LOGI(" AWP line rec <0x%02x><0x%02x>", lineRecCode, lineRecData);
|
|
|
|
if (lineRecCode == kLineRecordCarriageReturn) {
|
|
/* ignore the horizontal offset for now */
|
|
RTFNewPara();
|
|
} else if (lineRecCode == kLineRecordText) {
|
|
if (pLength > 0)
|
|
err = HandleTextRecord(lineRecData, pSrcPtr, pLength);
|
|
else
|
|
err = -1;
|
|
} else if (lineRecCode >= kLineRecordCommandMin &&
|
|
lineRecCode <= kLineRecordCommandMax)
|
|
{
|
|
switch (lineRecCode) {
|
|
case kLineRecordCommandCenter:
|
|
RTFParaCenter();
|
|
break;
|
|
case kLineRecordCommandRightJustify:
|
|
RTFParaRight();
|
|
break;
|
|
case kLineRecordCommandUnjustify:
|
|
RTFParaLeft();
|
|
break;
|
|
case kLineRecordCommandJustify:
|
|
RTFParaJustify();
|
|
break;
|
|
case kLineRecordCommandLeftMargin:
|
|
RTFLeftMargin(lineRecData);
|
|
break;
|
|
case kLineRecordCommandRightMargin:
|
|
RTFRightMargin(lineRecData);
|
|
break;
|
|
|
|
/* we handle these by showing them in the text */
|
|
case kLineRecordCommandPageNumber:
|
|
if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue);
|
|
BufPrintf("<set-page-number %d>", lineRecData);
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
}
|
|
break;
|
|
case kLineRecordCommandPageHeader:
|
|
if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue);
|
|
BufPrintf("<page-header>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
}
|
|
break;
|
|
case kLineRecordCommandPageHeaderEnd:
|
|
if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue);
|
|
BufPrintf("</page-header>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
}
|
|
break;
|
|
case kLineRecordCommandPageFooter:
|
|
if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue);
|
|
BufPrintf("<page-footer>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
}
|
|
break;
|
|
case kLineRecordCommandPageFooterEnd:
|
|
if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue);
|
|
BufPrintf("</page-footer>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
}
|
|
break;
|
|
case kLineRecordCommandNewPage:
|
|
if (fUseRTF)
|
|
RTFPageBreak();
|
|
else if (fShowEmbeds) {
|
|
RTFSetColor(kColorBlue); // won't do anything
|
|
BufPrintf("<page-break>");
|
|
RTFSetColor(kColorNone);
|
|
}
|
|
break;
|
|
|
|
case kLineRecordCommandPlatenWidth:
|
|
case kLineRecordCommandCharsPerInch:
|
|
case kLineRecordCommandProportional1:
|
|
case kLineRecordCommandProportional2:
|
|
case kLineRecordCommandIndent:
|
|
case kLineRecordCommandPaperLength:
|
|
case kLineRecordCommandTopMargin:
|
|
case kLineRecordCommandBottomMargin:
|
|
case kLineRecordCommandLinesPerInch:
|
|
case kLineRecordCommandSingleSpace:
|
|
case kLineRecordCommandDoubleSpace:
|
|
case kLineRecordCommandTripleSpace:
|
|
case kLineRecordCommandGroupBegin:
|
|
case kLineRecordCommandGroupEnd:
|
|
case kLineRecordCommandSkipLines:
|
|
case kLineRecordCommandPauseEachPage:
|
|
case kLineRecordCommandPauseHere:
|
|
case kLineRecordCommandSetMarker:
|
|
case kLineRecordCommandSetPageNumber:
|
|
case kLineRecordCommandPageBreak:
|
|
case kLineRecordCommandPageBreak256:
|
|
case kLineRecordCommandPageBreakPara:
|
|
case kLineRecordCommandPageBreakPara256:
|
|
default:
|
|
LOGD(" AWP cmd <0x%02x><0x%02x>", lineRecCode, lineRecData);
|
|
break;
|
|
}
|
|
} else {
|
|
/* bad command */
|
|
LOGW("WARNING: unrecognized code 0x%02x at 0x%p", lineRecCode,
|
|
*pSrcPtr);
|
|
fDocState.softFailures++;
|
|
if (fDocState.softFailures > kMaxSoftFailures) {
|
|
LOGE("ERROR: too many failures, giving up");
|
|
err = -1;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Handle a text record. The first two bytes are flags, the rest is
|
|
* either the text or a ruler. Special codes may be embedded in the text.
|
|
*
|
|
* "lineRecData" has the number of bytes of input that we have yet to read.
|
|
*/
|
|
int ReformatAWP::HandleTextRecord(uint8_t lineRecData,
|
|
const uint8_t** pSrcPtr, long* pLength)
|
|
{
|
|
int err = 0;
|
|
uint8_t tabFlags;
|
|
uint8_t byteCountPlusCR;
|
|
int byteCount = lineRecData;
|
|
bool noOutput = false;
|
|
bool inverse = false;
|
|
int ic;
|
|
|
|
tabFlags = Read8(pSrcPtr, pLength);
|
|
byteCount--;
|
|
byteCountPlusCR = Read8(pSrcPtr, pLength);
|
|
byteCount--;
|
|
if (*pLength < 0) {
|
|
err = -1;
|
|
goto bail;
|
|
}
|
|
|
|
if (byteCount <= 0) {
|
|
LOGI("WARNING: line %ld: short line (%d)",
|
|
fDocState.line, byteCount);
|
|
/* this is bad, but keep going anyway */
|
|
}
|
|
|
|
if ((byteCountPlusCR & ~kCRatEOL) != byteCount) {
|
|
LOGI("WARNING: line %ld: byteCount now %d, offset 3 count %d",
|
|
fDocState.line, byteCount, byteCountPlusCR & ~kCRatEOL);
|
|
/* not sure why this would legally happen */
|
|
}
|
|
|
|
if (tabFlags == kTabFlagsIsRuler)
|
|
noOutput = true;
|
|
|
|
while ((*pLength > 0) && (byteCount--)) {
|
|
ic = Read8(pSrcPtr, pLength);
|
|
if (*pLength < 0) {
|
|
err = -1;
|
|
goto bail;
|
|
}
|
|
|
|
if (noOutput)
|
|
continue;
|
|
|
|
if (ic < kMinTextChar) {
|
|
switch (ic) {
|
|
case kSpecialCharBoldBegin:
|
|
RTFBoldOn();
|
|
break;
|
|
case kSpecialCharBoldEnd:
|
|
RTFBoldOff();
|
|
break;
|
|
case kSpecialCharSuperscriptBegin:
|
|
RTFSuperscriptOn();
|
|
break;
|
|
case kSpecialCharSuperscriptEnd:
|
|
RTFSuperscriptOff();
|
|
break;
|
|
case kSpecialCharSubscriptBegin:
|
|
RTFSubscriptOn();
|
|
break;
|
|
case kSpecialCharSubscriptEnd:
|
|
RTFSubscriptOff();
|
|
break;
|
|
case kSpecialCharUnderlineBegin:
|
|
RTFUnderlineOn();
|
|
break;
|
|
case kSpecialCharUnderlineEnd:
|
|
RTFUnderlineOff();
|
|
break;
|
|
case kSpecialCharEnterKeyboard:
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("<kdb-entry>");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
break;
|
|
case kSpecialCharPrintPageNumber:
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("<page#>");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
break;
|
|
case kSpecialCharStickySpace:
|
|
/* MSWord uses "\~", but RichEdit ignores that */
|
|
BufPrintf("\u00a0"); // Unicode NO-BREAK SPACE
|
|
break;
|
|
case kSpecialCharMailMerge:
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("<mail-merge>");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
case kSpecialCharPrintDate:
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("<date>");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
break;
|
|
case kSpecialCharPrintTime:
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("<time>");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
break;
|
|
case kSpecialCharTab:
|
|
if (fUseRTF)
|
|
RTFTab();
|
|
else
|
|
BufPrintf("\t");
|
|
break;
|
|
case kSpecialCharTabFill:
|
|
/* tab fill char, not vis in doc */
|
|
BufPrintf(" ");
|
|
break;
|
|
default:
|
|
LOGI(" AWP unhandled special char 0x%02x", ic);
|
|
if (fShowEmbeds) {
|
|
TextColor oldColor = RTFSetColor(kColorBlue);
|
|
BufPrintf("^");
|
|
RTFSetColor(oldColor);
|
|
}
|
|
}
|
|
} else {
|
|
// Character.
|
|
bool wantInverse = false;
|
|
uint16_t mtLow = 0, mtHigh = 0;
|
|
|
|
if (ic >= 0x80 && ic <= 0x9f) {
|
|
// inverse upper; map 100x xxxx --> 010x xxxx
|
|
ic ^= 0xc0;
|
|
wantInverse = true;
|
|
} else if (ic >= 0xa0 && ic <= 0xbf || ic >= 0xe0 && ic <= 0xff) {
|
|
// inverse symbols; map 101x xxxx --> 001x xxxx
|
|
// inverse lower; map 111x xxxx --> 011x xxxx
|
|
ic ^= 0x80;
|
|
wantInverse = true;
|
|
} else if (ic >= 0xc0 && ic <= 0xdf) {
|
|
// MouseText characters
|
|
if (fMouseTextToASCII) {
|
|
ic = MouseTextToASCII(ic & 0x1f);
|
|
} else {
|
|
MouseTextToUTF16(ic & 0x1f, &mtLow, &mtHigh);
|
|
ic = '?';
|
|
}
|
|
} else {
|
|
// plain ASCII
|
|
}
|
|
|
|
if (wantInverse && !inverse) {
|
|
inverse = true;
|
|
RTFInverseOn();
|
|
} else if (!wantInverse && inverse) {
|
|
inverse = false;
|
|
RTFInverseOff();
|
|
}
|
|
|
|
if (fUseRTF) {
|
|
if (mtLow != 0) {
|
|
if (mtHigh != 0) {
|
|
RTFPrintUTF16Char(mtHigh);
|
|
}
|
|
RTFPrintUTF16Char(mtLow);
|
|
} else {
|
|
RTFPrintChar(ic);
|
|
}
|
|
} else {
|
|
// Plain text output.
|
|
BufPrintf("%c", PrintableChar(ic));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (inverse) {
|
|
RTFInverseOff();
|
|
}
|
|
|
|
/* if there's a carriage return at the end of the line, output it now */
|
|
if (byteCountPlusCR & kCRatEOL) {
|
|
RTFNewPara();
|
|
}
|
|
|
|
/* another line processed, advance the line counter */
|
|
fDocState.line++;
|
|
|
|
bail:
|
|
return err;
|
|
}
|
|
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* AppleWorks DB
|
|
* ===========================================================================
|
|
*/
|
|
|
|
/*
|
|
* AppleWorks database file format, from FTN.19.xxxx.
|
|
*
|
|
* The overall file format is:
|
|
*
|
|
* variable-sized file header
|
|
* 0 to 8 (0 to 30 in 3.0) report records, 600 bytes each
|
|
* variable-sized data records
|
|
* $ff $ff
|
|
* optional tags
|
|
*/
|
|
|
|
/*
|
|
* Decide whether or not we want to handle this file.
|
|
*/
|
|
void ReformatADB::Examine(ReformatHolder* pHolder)
|
|
{
|
|
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
|
|
|
|
if (pHolder->GetFileType() == kTypeADB)
|
|
applies = ReformatHolder::kApplicProbably;
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatADB, applies,
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
}
|
|
|
|
/*
|
|
* Reformat an AppleWorks DB document.
|
|
*/
|
|
int ReformatADB::Process(const ReformatHolder* pHolder,
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
ReformatOutput* pOutput)
|
|
{
|
|
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
long length = srcLen;
|
|
int retval = -1;
|
|
int headerLen, numCats, numRecs, numReports;
|
|
|
|
fUseRTF = false;
|
|
|
|
/* expect header plus EOF bytes at least */
|
|
if (srcLen <= kMinHeaderLen) {
|
|
LOGI(" ADB truncated?");
|
|
goto bail;
|
|
}
|
|
|
|
headerLen = Get16LE(srcPtr);
|
|
if (headerLen < kMinHeaderLen || headerLen > length) {
|
|
LOGI(" ADB bad headerLen %d, file len is %d", headerLen,
|
|
srcLen);
|
|
goto bail;
|
|
}
|
|
|
|
RTFBegin();
|
|
|
|
/* offset +035: #of categories in file */
|
|
numCats = *(srcPtr + 35);
|
|
if (numCats < 1 || numCats > 0x1e) {
|
|
LOGI(" ADB GLITCH: unexpected numCats %d", numCats);
|
|
/* keep going... */
|
|
}
|
|
LOGI(" ADB should be %d categories", numCats);
|
|
|
|
/* offset +036-037: #of records in file */
|
|
numRecs = Get16LE(srcPtr + 36) & 0x7fff;
|
|
LOGI(" ADB should be %d records", numRecs);
|
|
|
|
/* offset +038: #of reports in file */
|
|
numReports = *(srcPtr + 38);
|
|
LOGI(" ADB should be %d reports", numReports);
|
|
|
|
/* dump category names as first record */
|
|
const uint8_t* catPtr;
|
|
int catCount;
|
|
catPtr = srcPtr + 357;
|
|
catCount = numCats;
|
|
while (catCount--) {
|
|
if (catCount == numCats-1)
|
|
BufPrintf("\"");
|
|
else
|
|
BufPrintf(",\"");
|
|
|
|
int nameLen = *catPtr;
|
|
const uint8_t* namePtr = catPtr + 1;
|
|
while (nameLen--) {
|
|
if (*namePtr == '"')
|
|
BufPrintf("\"\"");
|
|
else
|
|
BufPrintf("%c", *namePtr);
|
|
namePtr++;
|
|
}
|
|
|
|
BufPrintf("\"");
|
|
|
|
catPtr += kCatNameLen+2;
|
|
}
|
|
BufPrintf("\r\n");
|
|
|
|
/*
|
|
* Advance pointer to first data record. The first record contains
|
|
* "standard values".
|
|
*
|
|
* Each record looks like this:
|
|
* $00-$01: count of bytes in remainder of record
|
|
* $02 : control byte, one of:
|
|
* $01-$7f: number of following bytes for this category
|
|
* $81-$9e: this (minus $80) is #of categories to skip
|
|
* $ff : end of record
|
|
*
|
|
* The data within the categories may have special meanings, e.g. if it
|
|
* starts with $c0 it's a date record and $d4 is a time record.
|
|
*/
|
|
int offsetToData;
|
|
offsetToData = kOffsetToFirstCatHeader +
|
|
numCats*kCatHeaderLen + numReports*kReportRecordLen;
|
|
LOGI(" ADB data records begin at offset 0x%08lx", offsetToData);
|
|
if (offsetToData >= length) {
|
|
LOGI(" ADB GLITCH: offset >= length %ld", length);
|
|
goto bail;
|
|
}
|
|
|
|
srcPtr += offsetToData;
|
|
length -= offsetToData;
|
|
|
|
int rr;
|
|
for (rr = 0; rr < numRecs && length > 0; rr++) {
|
|
int recordRem = Read16(&srcPtr, &length);
|
|
if (rr == 0) {
|
|
/* skip first record */
|
|
srcPtr += recordRem;
|
|
length -= recordRem;
|
|
if (*(srcPtr-1) != 0xff) {
|
|
LOGI(" ADB GLITCH: first record skipped past 0x%02x",
|
|
*(srcPtr-1));
|
|
/* keep going, I guess */
|
|
}
|
|
continue;
|
|
}
|
|
|
|
int catNum = 0;
|
|
|
|
/* scan through all categories in this record */
|
|
int ctrl = Read8(&srcPtr, &length);
|
|
while (ctrl != 0xff && length > 0) {
|
|
if (ctrl >= 0x01 && ctrl <= 0x7f) {
|
|
/* just data */
|
|
if (catNum == 0)
|
|
BufPrintf("\"");
|
|
else
|
|
BufPrintf(",\"");
|
|
if (*srcPtr == 0xc0) {
|
|
static const char kMonths[12][4] = {
|
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
|
};
|
|
/* date entry */
|
|
Read8(&srcPtr, &length); // throw out the 0xc0
|
|
char year[2], month, day[2];
|
|
year[0] = Read8(&srcPtr, &length);
|
|
year[1] = Read8(&srcPtr, &length);
|
|
month = Read8(&srcPtr, &length);
|
|
day[0] = Read8(&srcPtr, &length);
|
|
day[1] = Read8(&srcPtr, &length);
|
|
if (day[0] == ' ')
|
|
day[0] = '0';
|
|
BufPrintf("%c%c-%s-%s%c%c",
|
|
day[0], day[1],
|
|
month >= 'A' && month <= 'L' ? kMonths[month-'A'] : "???",
|
|
year[0] < '7' ? "20" : "19", year[0], year[1]);
|
|
} else if (*srcPtr == 0xd4) {
|
|
/* time entry */
|
|
Read8(&srcPtr, &length); // throw out the 0xd4
|
|
char hour, minute[2];
|
|
hour = Read8(&srcPtr, &length);
|
|
minute[0] = Read8(&srcPtr, &length);
|
|
minute[1] = Read8(&srcPtr, &length);
|
|
if (hour >= 'A' && hour < 'M') {
|
|
if (hour == 'A') // don't show 00:00
|
|
hour = 'A' + 12;
|
|
BufPrintf("%02d:%c%c AM",
|
|
hour - 'A', minute[0], minute[1]);
|
|
} else if (hour >= 'M' && hour <= 'X') {
|
|
if (hour == 'M') // don't show 00:00
|
|
hour = 'M' + 12;
|
|
BufPrintf("%02d:%c%c PM",
|
|
hour - 'M', minute[0], minute[1]);
|
|
}
|
|
} else {
|
|
while (ctrl--) {
|
|
uint8_t ch = Read8(&srcPtr, &length);
|
|
BufPrintf("%c", ch);
|
|
if (ch == '"')
|
|
BufPrintf("%c", ch);
|
|
}
|
|
}
|
|
BufPrintf("\"");
|
|
} else if (ctrl >= 0x81 && ctrl <= 0x9e) {
|
|
/* skip over empty categories */
|
|
ctrl -= 0x80;
|
|
while (ctrl--) {
|
|
BufPrintf(",");
|
|
catNum++;
|
|
}
|
|
catNum--; // don't double-count this category
|
|
} else {
|
|
LOGI(" ADB GLITCH: invalid ctrl byte 0x%02x", ctrl);
|
|
break;
|
|
/* keep going anyway? */
|
|
}
|
|
|
|
catNum++;
|
|
ctrl = Read8(&srcPtr, &length);
|
|
}
|
|
while (catNum < numCats) {
|
|
BufPrintf(",");
|
|
catNum++;
|
|
}
|
|
|
|
/* end of record */
|
|
RTFNewPara();
|
|
}
|
|
LOGI(" ADB at exit rr=%d numRecs=%d", rr, numRecs);
|
|
|
|
int checkEnd;
|
|
checkEnd = Read16(&srcPtr, &length);
|
|
if (checkEnd != 0xffff) {
|
|
LOGI(" ADB GLITCH: last read returned 0x%04x", checkEnd);
|
|
} else {
|
|
LOGI(" ADB found EOF; success");
|
|
}
|
|
|
|
/*
|
|
* Read the optional tags.
|
|
*/
|
|
/* (nah) */
|
|
|
|
RTFEnd();
|
|
|
|
SetResultBuffer(pOutput);
|
|
pOutput->SetOutputKind(ReformatOutput::kOutputCSV);
|
|
retval = 0;
|
|
|
|
bail:
|
|
return retval;
|
|
}
|
|
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* AppleWorks SS
|
|
* ===========================================================================
|
|
*/
|
|
|
|
/*
|
|
* AppleWorks spreadsheet file format, from FTN.1b.xxxx.
|
|
*
|
|
* The overall file format is:
|
|
*
|
|
* fixed-sized file header
|
|
* series of variable-length row records
|
|
* collection of cell data
|
|
* $ff $ff
|
|
* optional tags
|
|
*
|
|
* The cell data can take several different forms.
|
|
*/
|
|
|
|
/*
|
|
* Decide whether or not we want to handle this file.
|
|
*/
|
|
void ReformatASP::Examine(ReformatHolder* pHolder)
|
|
{
|
|
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
|
|
|
|
if (pHolder->GetFileType() == kTypeASP)
|
|
applies = ReformatHolder::kApplicProbably;
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatASP, applies,
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
}
|
|
|
|
/*
|
|
* Reformat an AppleWorks SS document.
|
|
*/
|
|
int ReformatASP::Process(const ReformatHolder* pHolder,
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
ReformatOutput* pOutput)
|
|
{
|
|
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
long length = srcLen;
|
|
int retval = -1;
|
|
const FileHeader* pFileHeader;
|
|
bool aw30flag;
|
|
|
|
ASSERT(sizeof(FileHeader) == kFileHeaderSize);
|
|
|
|
fUseRTF = false;
|
|
|
|
/* must at least have the header */
|
|
if (length < kFileHeaderSize) {
|
|
LOGI(" ADB truncated?");
|
|
goto bail;
|
|
}
|
|
|
|
RTFBegin();
|
|
|
|
pFileHeader = (const FileHeader*) srcPtr;
|
|
aw30flag = false;
|
|
if (pFileHeader->ssMinVers != 0)
|
|
aw30flag = true;
|
|
LOGI(" ASP ssMinVers=0x%02x, aw30flag=%d",
|
|
pFileHeader->ssMinVers, aw30flag);
|
|
|
|
/*
|
|
* Advance pointer past file header. v3.0 adds a couple of extra bytes
|
|
* right after the end of the header that the FTN says we should just
|
|
* ignore.
|
|
*/
|
|
srcPtr += kFileHeaderSize;
|
|
length -= kFileHeaderSize;
|
|
if (aw30flag) {
|
|
srcPtr += 2;
|
|
length -= 2;
|
|
}
|
|
|
|
/*
|
|
* Loop through the file, reading one row at a time. There is no row
|
|
* count because spreadsheets are sparse.
|
|
*
|
|
* We assume that rows are stored from top to bottom. The count begins
|
|
* with 1, not zero.
|
|
*/
|
|
fCurrentRow = 1;
|
|
while (length > 0) {
|
|
uint16_t rowLen;
|
|
int rowNum;
|
|
|
|
/* row length or EOF marker */
|
|
rowLen = Read16(&srcPtr, &length);
|
|
if (rowLen == 0xffff) {
|
|
LOGI(" ASP found EOF marker, we're done");
|
|
break;
|
|
}
|
|
|
|
rowNum = Read16(&srcPtr, &length);
|
|
//LOGI(" ASP process row %d (cur=%d)", rowNum, currentRow);
|
|
|
|
/* fill out empty rows */
|
|
ASSERT(fCurrentRow <= rowNum);
|
|
while (fCurrentRow < rowNum) {
|
|
BufPrintf("\"\"\r\n");
|
|
fCurrentRow++;
|
|
}
|
|
|
|
if (ProcessRow(rowNum, &srcPtr, &length) != 0)
|
|
break;
|
|
|
|
fCurrentRow++;
|
|
}
|
|
|
|
/*
|
|
* Read the optional tags.
|
|
*/
|
|
/* (nah) */
|
|
|
|
RTFEnd();
|
|
|
|
SetResultBuffer(pOutput);
|
|
pOutput->SetOutputKind(ReformatOutput::kOutputCSV);
|
|
retval = 0;
|
|
|
|
bail:
|
|
return retval;
|
|
}
|
|
|
|
/*
|
|
* Process one row of spreadsheet data.
|
|
*/
|
|
int ReformatASP::ProcessRow(int rowNum, const uint8_t** pSrcPtr, long* pLength)
|
|
{
|
|
uint8_t ctrl;
|
|
bool first = true;
|
|
|
|
fCurrentCol = 0;
|
|
while (*pLength > 0) {
|
|
ctrl = Read8(pSrcPtr, pLength);
|
|
if (ctrl >= 0x01 && ctrl <= 0x7f) {
|
|
if (!first)
|
|
BufPrintf(",");
|
|
else
|
|
first = false;
|
|
/* read cell entry contents */
|
|
if (ctrl > *pLength) {
|
|
LOGI(" ASP GLITCH: cell len exceeds file len (%d %d)",
|
|
*pLength, ctrl);
|
|
break;
|
|
}
|
|
ProcessCell(*pSrcPtr, ctrl);
|
|
(*pSrcPtr) += ctrl;
|
|
*pLength -= ctrl;
|
|
} else if (ctrl >= 0x81 && ctrl <= 0xfe) {
|
|
/* skip this many columns */
|
|
if (!first)
|
|
BufPrintf(",");
|
|
else
|
|
first = false;
|
|
|
|
ctrl -= 0x80;
|
|
ctrl--;
|
|
while (ctrl--) {
|
|
BufPrintf(",");
|
|
fCurrentCol++;
|
|
}
|
|
} else if (ctrl == 0xff) {
|
|
/* end of row */
|
|
break;
|
|
} else {
|
|
/* unexpected 0x00 or 0x80 */
|
|
LOGI(" ASP GLITCH: unexpected ctrl byte 0x%02x", ctrl);
|
|
break;
|
|
}
|
|
|
|
fCurrentCol++;
|
|
}
|
|
|
|
BufPrintf("\r\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Process the contents of a single cell.
|
|
*/
|
|
void ReformatASP::ProcessCell(const uint8_t* srcPtr, long cellLength)
|
|
{
|
|
uint8_t flag1, flag2;
|
|
double dval;
|
|
int i;
|
|
|
|
BufPrintf("\"");
|
|
|
|
flag1 = *srcPtr++;
|
|
cellLength--;
|
|
|
|
if (flag1 & 0x80) { /* bit 7 set? */
|
|
/* this is a value, not a label */
|
|
flag2 = *srcPtr++;
|
|
cellLength--;
|
|
|
|
if (flag1 & 0x20) { /* bit 5 set? */
|
|
/* this is a "value constant" */
|
|
dval = ConvertSANEDouble(srcPtr);
|
|
BufPrintf("%f", dval);
|
|
} else {
|
|
if (flag2 & 0x08) { /* bit 3 set? */
|
|
/* this is a "value label", AW30+ only */
|
|
/* skip over cached string result */
|
|
if (*srcPtr >= cellLength) {
|
|
LOGI(" ASP GLITCH: invalid value label str len");
|
|
BufPrintf("GLITCH");
|
|
} else {
|
|
srcPtr += *srcPtr +1;
|
|
/* output tokens */
|
|
while (cellLength > 0)
|
|
PrintToken(&srcPtr, &cellLength);
|
|
}
|
|
} else {
|
|
/* this is a "value formula" */
|
|
//dval = ConvertSANEDouble(srcPtr);
|
|
/* skip over cached computation result */
|
|
if (cellLength <= kSANELen) {
|
|
LOGI(" ASP GLITCH: invalid value formula len");
|
|
BufPrintf("GLITCH");
|
|
} else {
|
|
srcPtr += kSANELen;
|
|
cellLength -= kSANELen;
|
|
/* [tokens] */
|
|
while (cellLength > 0)
|
|
PrintToken(&srcPtr, &cellLength);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
/* this is a label, not a value */
|
|
|
|
if (flag1 & 0x20) { /* bit 5 set? */
|
|
/* propagated label cell */
|
|
for (i = 0; i < kPropCount; i++)
|
|
BufPrintQChar(*srcPtr);
|
|
} else {
|
|
/* regular label cell */
|
|
for (i = 0; i < cellLength; i++)
|
|
BufPrintQChar(srcPtr[i]);
|
|
}
|
|
}
|
|
|
|
BufPrintf("\"");
|
|
}
|
|
|
|
/*
|
|
* Print the AppleWorks SS token pointed to by srcPtr. Some tokens require
|
|
* several bytes to express.
|
|
*/
|
|
void ReformatASP::PrintToken(const uint8_t** pSrcPtr, long* pLength)
|
|
{
|
|
/* string constants; note these must NOT contain '"' chars */
|
|
const int kTokenStart = 0xc0;
|
|
const char* tokenTable[0x100-kTokenStart] = {
|
|
/*0xc0*/ "@Deg", "@Rad", "@Pi", "@True",
|
|
/*0xc4*/ "@False", "@Not", "@IsBlank", "@IsNA",
|
|
/*0xc8*/ "@IsError", "@Exp", "@Ln", "@Log",
|
|
/*0xcc*/ "@Cos", "@Sin", "@Tan", "@ACos",
|
|
/*0xd0*/ "@ASin", "@ATan2", "@ATan", "@Mod",
|
|
/*0xd4*/ "@FV", "@PV", "@PMT", "@Term",
|
|
/*0xd8*/ "@Rate", "@Round", "@Or", "@And",
|
|
/*0xdc*/ "@Sum", "@Avg", "@Choose", "@Count",
|
|
/*0xe0*/ "@Error", "@IRR", "@If", "@Int",
|
|
/*0xe4*/ "@Lookup", "@Max", "@Min", "@NA",
|
|
/*0xe8*/ "@NPV", "@Sqrt", "@Abs", "",
|
|
/*0xec*/ "<>", ">=", "<=", "=",
|
|
/*0xf0*/ ">", "<", ",", "^",
|
|
/*0xf4*/ ")", "-", "+", "/",
|
|
/*0xf8*/ "*", "(", "-" /*unary*/, "+" /*unary*/,
|
|
/*0xfc*/ "...", "", "", ""
|
|
};
|
|
uint8_t token;
|
|
|
|
token = Read8(pSrcPtr, pLength);
|
|
if (token < kTokenStart) {
|
|
LOGI(" ASP GLITCH: funky token 0x%02x", token);
|
|
return;
|
|
}
|
|
|
|
BufPrintf("%s", tokenTable[token - kTokenStart]);
|
|
if (token == 0xe0 || token == 0xe7) {
|
|
/* @Error and @NA followed by three zero bytes */
|
|
if (*pLength < 3) {
|
|
LOGI(" ASP GLITCH: ran off end processing tokens");
|
|
return;
|
|
}
|
|
*pSrcPtr += 3;
|
|
*pLength -= 3;
|
|
} else if (token == 0xfd) {
|
|
/* SANE double number */
|
|
if (*pLength < 8) {
|
|
LOGI(" ASP GLITCH: not enough left to grab a SANE");
|
|
return;
|
|
}
|
|
double dval = ConvertSANEDouble(*pSrcPtr);
|
|
BufPrintf("%f", dval);
|
|
*pSrcPtr += 8;
|
|
*pLength -= 8;
|
|
} else if (token == 0xfe) {
|
|
/* row, column reference (relative to current cell) */
|
|
int row, col;
|
|
col = Read8(pSrcPtr, pLength);
|
|
if (col >= 128)
|
|
col -= 256;
|
|
row = Read16(pSrcPtr, pLength);
|
|
if (row >= 32768)
|
|
row -= 65536;
|
|
BufPrintf("%s%d", PrintCol(fCurrentCol+col), fCurrentRow+row);
|
|
} else if (token == 0xff) {
|
|
/* Pascal string */
|
|
int i;
|
|
i = Read8(pSrcPtr, pLength);
|
|
if (i > *pLength) {
|
|
LOGI(" ASP GLITCH: string exceeds cell len");
|
|
return;
|
|
}
|
|
while (i--) {
|
|
uint8_t ch = Read8(pSrcPtr, pLength);
|
|
BufPrintQChar(ch);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Format the current column number into something like 'A' or 'BA'. This
|
|
* stores the value in fPrintColBuf and returns a pointer to it.
|
|
*/
|
|
const char* ReformatASP::PrintCol(int col)
|
|
{
|
|
if (col < 0 || col >= 702) {
|
|
LOGI(" ASP can't PrintCol(%d)", col);
|
|
fPrintColBuf[0] = fPrintColBuf[1] = '?';
|
|
fPrintColBuf[2] = '\0';
|
|
} else if (col < 26) {
|
|
fPrintColBuf[0] = 'A' + col;
|
|
fPrintColBuf[1] = '\0';
|
|
} else {
|
|
fPrintColBuf[0] = 'A' + col / 26;
|
|
fPrintColBuf[1] = 'A' + col % 26;
|
|
fPrintColBuf[2] = '\0';
|
|
}
|
|
return fPrintColBuf;
|
|
}
|
|
|
|
/*
|
|
* Convert a 64-bit SANE double to an x86 double. The format is the same as
|
|
* IEEE 754, which happily is the same used by the VC++6.0 compiler.
|
|
*
|
|
* Info from http://www.cs.trinity.edu/About/The_Courses/cs2322/ieee-fp.html
|
|
* (also http://www.psc.edu/general/software/packages/ieee/ieee.html).
|
|
*
|
|
* -----
|
|
* The 64-bit double format is divided into three fields as shown below:
|
|
*
|
|
* 1 11 52
|
|
* +-------------------------------------+
|
|
* | s | e | f |
|
|
* +-------------------------------------+
|
|
*
|
|
* The value v of the number is determined by these fields as shown in
|
|
* the following table:
|
|
*
|
|
* Values of double-format numbers (64 bits)
|
|
* ___________________________________________________________
|
|
* e f v class of v
|
|
* ___________________________________________________________
|
|
* 0<e<2047 (any) v=(-1)s x 2(e-1023) x (1.f) normalized
|
|
* e=0 f!=0 v=(-1)s x 2(e-1022) x (0.f) denormalized
|
|
* e=0 f=0 v=(-1)s x 0 zero
|
|
* e=2047 f=0 v=(-1)s x infinity infinity
|
|
* e=2047 f!=0 v is a NaN NaN
|
|
*
|
|
* For example, the double representation (in hex notation) of 1.5 is
|
|
* 3FF8000000000000
|
|
* is
|
|
* 3F847AE147AE147A
|
|
* -----
|
|
*/
|
|
double ReformatASP::ConvertSANEDouble(const uint8_t* srcPtr)
|
|
{
|
|
double newVal;
|
|
uint8_t* dptr;
|
|
int i;
|
|
|
|
ASSERT(sizeof(newVal) == kSANELen);
|
|
|
|
dptr = (uint8_t*) &newVal;
|
|
for (i = 0; i < kSANELen; i++)
|
|
*dptr++ = *srcPtr++;
|
|
|
|
return newVal;
|
|
}
|