/* * CiderPress * Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved. * See the file LICENSE for distribution terms. */ /* * Convert AppleWorks 3.0 documents. */ #include "StdAfx.h" #include "AppleWorks.h" /* * =========================================================================== * AppleWorks WP * =========================================================================== */ /* * AppleWorks word processor file format, from FTN.1A.xxxx. * * The overall file format is: * * file header * array of line records * $ff $ff * optional tags * * AppleWorks 5.0 introduced inverse and MouseText characters. * These just use previously-unused byte ranges. The full set * of values is thus: * 00-1f special * 20-7f plain ASCII * 80-9f inverse upper (map to 40-5f) * a0-bf inverse symbols/numbers (map to 20-3f) * c0-df MouseText * e0-ff inverse lower (map to 60-7f) * * We can output MouseText as Unicode symbols. Inverse text can use the Rich * Text "highlight" feature; the "background color" feature doesn't seem to * have any effect. */ /* * Decide whether or not we want to handle this file. */ void ReformatAWP::Examine(ReformatHolder* pHolder) { ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot; if (pHolder->GetFileType() == kTypeAWP) applies = ReformatHolder::kApplicProbably; pHolder->SetApplic(ReformatHolder::kReformatAWP, applies, ReformatHolder::kApplicNot, ReformatHolder::kApplicNot); } /* * Reformat an AppleWorks WP document. */ int ReformatAWP::Process(const ReformatHolder* pHolder, ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part, ReformatOutput* pOutput) { const uint8_t* srcPtr = pHolder->GetSourceBuf(part); long srcLen = pHolder->GetSourceLen(part); long length = srcLen; int retval = -1; bool skipRecord; uint8_t lineRecCode, lineRecData; fMouseTextToASCII = pHolder->GetOption(ReformatHolder::kOptMouseTextToASCII) != 0; if (srcLen > 65536) fUseRTF = false; //fUseRTF = false; //fShowEmbeds = false; /* expect header plus EOF bytes at least */ if (srcLen <= kFileHeaderSize) { LOGI(" AWP truncated?"); goto bail; } RTFBegin(kRTFFlagColorTable); /* * Grab the file header. */ assert(sizeof(fFileHeader) == kFileHeaderSize); memcpy(&fFileHeader, srcPtr, sizeof(fFileHeader)); srcPtr += sizeof(fFileHeader); length -= sizeof(fFileHeader); /* do some quick sanity checks */ if (fFileHeader.seventyNine != kSeventyNine) { LOGI("ERROR: expected %d in signature byte, found %d", kSeventyNine, fFileHeader.seventyNine); goto bail; } if (fFileHeader.sfMinVers && fFileHeader.sfMinVers != kSFMinVers30) { LOGI("WARNING: unexpected value %d for sfMinVers", fFileHeader.sfMinVers); /* keep going */ } InitDocState(); /* if first line record is invalid, skip it */ skipRecord = false; if (fFileHeader.sfMinVers == kSFMinVers30) skipRecord = true; /* set margins to 1.0 inches at 10cpi */ RTFLeftMargin(10); RTFRightMargin(10); /* * Read the line records. */ while (1) { if (length < 0) { LOGI(" AWP truncated file"); goto bail; } lineRecData = Read8(&srcPtr, &length); if (length < 0) { LOGI(" AWP truncated file"); goto bail; } lineRecCode = Read8(&srcPtr, &length); if (length < 0) { LOGI(" AWP truncated file"); goto bail; } if (skipRecord) { skipRecord = false; continue; } /* end of data reached? */ if (lineRecData == kEOFMarker && lineRecCode == kEOFMarker) break; if (ProcessLineRecord(lineRecData, lineRecCode, &srcPtr, &length) != 0) { LOGI("ProcessLineRecord failed, bailing"); goto bail; } } /* * Read the optional tags. */ /* (nah) */ RTFEnd(); SetResultBuffer(pOutput); retval = 0; bail: return retval; } /* * Initialize the DocState structure. */ void ReformatAWP::InitDocState(void) { memset(&fDocState, 0, sizeof(fDocState)); fDocState.line = 1; } /* * Process a line record. */ int ReformatAWP::ProcessLineRecord(uint8_t lineRecData, uint8_t lineRecCode, const uint8_t** pSrcPtr, long* pLength) { int err = 0; //LOGI(" AWP line rec <0x%02x><0x%02x>", lineRecCode, lineRecData); if (lineRecCode == kLineRecordCarriageReturn) { /* ignore the horizontal offset for now */ RTFNewPara(); } else if (lineRecCode == kLineRecordText) { if (pLength > 0) err = HandleTextRecord(lineRecData, pSrcPtr, pLength); else err = -1; } else if (lineRecCode >= kLineRecordCommandMin && lineRecCode <= kLineRecordCommandMax) { switch (lineRecCode) { case kLineRecordCommandCenter: RTFParaCenter(); break; case kLineRecordCommandRightJustify: RTFParaRight(); break; case kLineRecordCommandUnjustify: RTFParaLeft(); break; case kLineRecordCommandJustify: RTFParaJustify(); break; case kLineRecordCommandLeftMargin: RTFLeftMargin(lineRecData); break; case kLineRecordCommandRightMargin: RTFRightMargin(lineRecData); break; /* we handle these by showing them in the text */ case kLineRecordCommandPageNumber: if (fShowEmbeds) { RTFSetColor(kColorBlue); BufPrintf("", lineRecData); RTFSetColor(kColorNone); RTFNewPara(); } break; case kLineRecordCommandPageHeader: if (fShowEmbeds) { RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(kColorNone); RTFNewPara(); } break; case kLineRecordCommandPageHeaderEnd: if (fShowEmbeds) { RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(kColorNone); RTFNewPara(); } break; case kLineRecordCommandPageFooter: if (fShowEmbeds) { RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(kColorNone); RTFNewPara(); } break; case kLineRecordCommandPageFooterEnd: if (fShowEmbeds) { RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(kColorNone); RTFNewPara(); } break; case kLineRecordCommandNewPage: if (fUseRTF) RTFPageBreak(); else if (fShowEmbeds) { RTFSetColor(kColorBlue); // won't do anything BufPrintf(""); RTFSetColor(kColorNone); } break; case kLineRecordCommandPlatenWidth: case kLineRecordCommandCharsPerInch: case kLineRecordCommandProportional1: case kLineRecordCommandProportional2: case kLineRecordCommandIndent: case kLineRecordCommandPaperLength: case kLineRecordCommandTopMargin: case kLineRecordCommandBottomMargin: case kLineRecordCommandLinesPerInch: case kLineRecordCommandSingleSpace: case kLineRecordCommandDoubleSpace: case kLineRecordCommandTripleSpace: case kLineRecordCommandGroupBegin: case kLineRecordCommandGroupEnd: case kLineRecordCommandSkipLines: case kLineRecordCommandPauseEachPage: case kLineRecordCommandPauseHere: case kLineRecordCommandSetMarker: case kLineRecordCommandSetPageNumber: case kLineRecordCommandPageBreak: case kLineRecordCommandPageBreak256: case kLineRecordCommandPageBreakPara: case kLineRecordCommandPageBreakPara256: default: LOGD(" AWP cmd <0x%02x><0x%02x>", lineRecCode, lineRecData); break; } } else { /* bad command */ LOGW("WARNING: unrecognized code 0x%02x at 0x%p", lineRecCode, *pSrcPtr); fDocState.softFailures++; if (fDocState.softFailures > kMaxSoftFailures) { LOGE("ERROR: too many failures, giving up"); err = -1; } } return err; } /* * Handle a text record. The first two bytes are flags, the rest is * either the text or a ruler. Special codes may be embedded in the text. * * "lineRecData" has the number of bytes of input that we have yet to read. */ int ReformatAWP::HandleTextRecord(uint8_t lineRecData, const uint8_t** pSrcPtr, long* pLength) { int err = 0; uint8_t tabFlags; uint8_t byteCountPlusCR; int byteCount = lineRecData; bool noOutput = false; bool inverse = false; int ic; tabFlags = Read8(pSrcPtr, pLength); byteCount--; byteCountPlusCR = Read8(pSrcPtr, pLength); byteCount--; if (*pLength < 0) { err = -1; goto bail; } if (byteCount <= 0) { LOGI("WARNING: line %ld: short line (%d)", fDocState.line, byteCount); /* this is bad, but keep going anyway */ } if ((byteCountPlusCR & ~kCRatEOL) != byteCount) { LOGI("WARNING: line %ld: byteCount now %d, offset 3 count %d", fDocState.line, byteCount, byteCountPlusCR & ~kCRatEOL); /* not sure why this would legally happen */ } if (tabFlags == kTabFlagsIsRuler) noOutput = true; while ((*pLength > 0) && (byteCount--)) { ic = Read8(pSrcPtr, pLength); if (*pLength < 0) { err = -1; goto bail; } if (noOutput) continue; if (ic < kMinTextChar) { switch (ic) { case kSpecialCharBoldBegin: RTFBoldOn(); break; case kSpecialCharBoldEnd: RTFBoldOff(); break; case kSpecialCharSuperscriptBegin: RTFSuperscriptOn(); break; case kSpecialCharSuperscriptEnd: RTFSuperscriptOff(); break; case kSpecialCharSubscriptBegin: RTFSubscriptOn(); break; case kSpecialCharSubscriptEnd: RTFSubscriptOff(); break; case kSpecialCharUnderlineBegin: RTFUnderlineOn(); break; case kSpecialCharUnderlineEnd: RTFUnderlineOff(); break; case kSpecialCharEnterKeyboard: if (fShowEmbeds) { TextColor oldColor = RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(oldColor); } break; case kSpecialCharPrintPageNumber: if (fShowEmbeds) { TextColor oldColor = RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(oldColor); } break; case kSpecialCharStickySpace: /* MSWord uses "\~", but RichEdit ignores that */ BufPrintf("\u00a0"); // Unicode NO-BREAK SPACE break; case kSpecialCharMailMerge: if (fShowEmbeds) { TextColor oldColor = RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(oldColor); } case kSpecialCharPrintDate: if (fShowEmbeds) { TextColor oldColor = RTFSetColor(kColorBlue); BufPrintf(""); RTFSetColor(oldColor); } break; case kSpecialCharPrintTime: if (fShowEmbeds) { TextColor oldColor = RTFSetColor(kColorBlue); BufPrintf("