mirror of
https://github.com/fadden/ciderpress.git
synced 2025-01-05 04:29:52 +00:00
bb24f51ccb
We weren't doing a MOR-to-UNI conversion on the sub-volume name, so HFS volumes with non-ASCII characters didn't look right. This also relocates the character-conversion code to a new source file. It's currently part of the reformat lib, though it arguably belongs in util (but that would introduce a new dependency between reformat and util).
536 lines
15 KiB
C++
536 lines
15 KiB
C++
/*
|
|
* CiderPress
|
|
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
|
|
* See the file LICENSE for distribution terms.
|
|
*/
|
|
/*
|
|
* Reformat AWGS files.
|
|
*/
|
|
#include "StdAfx.h"
|
|
#include "AWGS.h"
|
|
|
|
/*
|
|
* Decide whether or not we want to handle this file.
|
|
*/
|
|
void ReformatAWGS_WP::Examine(ReformatHolder* pHolder)
|
|
{
|
|
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
|
|
|
|
if (pHolder->GetFileType() == kTypeGWP && pHolder->GetAuxType() == 0x8010)
|
|
applies = ReformatHolder::kApplicYes;
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatAWGS_WP, applies,
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
}
|
|
|
|
/*
|
|
* Convert AWGS into formatted text.
|
|
*/
|
|
int ReformatAWGS_WP::Process(const ReformatHolder* pHolder,
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
ReformatOutput* pOutput)
|
|
{
|
|
const uint8_t* srcBuf = pHolder->GetSourceBuf(part);
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
fUseRTF = true;
|
|
Chunk doc, header, footer;
|
|
uint16_t val;
|
|
|
|
Charset::CheckGSCharConv();
|
|
|
|
/* must at least have the doc header and globals */
|
|
if (srcLen < kMinExpectedLen) {
|
|
LOGI("Too short to be AWGS");
|
|
return -1;
|
|
}
|
|
|
|
RTFBegin(kRTFFlagColorTable);
|
|
|
|
/*
|
|
* Pull interesting values out of the document header.
|
|
*/
|
|
val = Get16LE(srcBuf + 0);
|
|
if (val != kExpectedVersion1 && val != kExpectedVersion2) {
|
|
LOGI("AWGS_WP: unexpected version number (got 0x%04x, wanted 0x%04x)",
|
|
val, kExpectedVersion1);
|
|
DebugBreak();
|
|
}
|
|
val = Get16LE(srcBuf + 2);
|
|
if (val != kDocHeaderLen) {
|
|
LOGI("Unexpected doc header len (got 0x%04x, wanted 0x%04x)",
|
|
val, kDocHeaderLen);
|
|
return -1;
|
|
}
|
|
/* the color table is 32 bytes at +56, should we be interested */
|
|
|
|
srcBuf += kDocHeaderLen;
|
|
srcLen -= kDocHeaderLen;
|
|
|
|
/*
|
|
* Pull interesting values out of the WP global variables section.
|
|
*/
|
|
val = Get16LE(srcBuf + 0);
|
|
if (val > kExpectedIntVersion) {
|
|
LOGI("Unexpected internal version number (got %d, expected %d)",
|
|
val, kExpectedIntVersion);
|
|
return -1;
|
|
}
|
|
|
|
/* date/time are pascal strings */
|
|
LOGI("File saved at '%.26hs' '%.10s'", srcBuf + 6, srcBuf + 32);
|
|
|
|
srcBuf += kWPGlobalsLen;
|
|
srcLen -= kWPGlobalsLen;
|
|
|
|
/*
|
|
* Now come the three chunks, in order: main document, header, footer.
|
|
*/
|
|
LOGI("AWGS_WP: scanning doc");
|
|
if (!ReadChunk(&srcBuf, &srcLen, &doc))
|
|
return -1;
|
|
LOGI("AWGS_WP: scanning header");
|
|
if (!ReadChunk(&srcBuf, &srcLen, &header))
|
|
return -1;
|
|
LOGI("AWGS_WP: scanning footer");
|
|
if (!ReadChunk(&srcBuf, &srcLen, &footer))
|
|
return -1;
|
|
|
|
if (srcLen != 0) {
|
|
LOGI("AWGS NOTE: %ld bytes left in file", srcLen);
|
|
}
|
|
|
|
/*
|
|
* Dump the chunks, starting with header and footer.
|
|
*/
|
|
RTFSetColor(kColorMediumBlue);
|
|
RTFSetFont(kFontCourierNew);
|
|
RTFSetFontSize(10);
|
|
BufPrintf("<header>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
|
|
PrintChunk(&header);
|
|
|
|
RTFSetColor(kColorMediumBlue);
|
|
RTFSetFont(kFontCourierNew);
|
|
RTFSetFontSize(10);
|
|
BufPrintf("</header>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
|
|
RTFSetColor(kColorMediumBlue);
|
|
RTFSetFont(kFontCourierNew);
|
|
RTFSetFontSize(10);
|
|
BufPrintf("<footer>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
|
|
PrintChunk(&footer);
|
|
|
|
RTFSetColor(kColorMediumBlue);
|
|
RTFSetFont(kFontCourierNew);
|
|
RTFSetFontSize(10);
|
|
BufPrintf("</footer>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
|
|
LOGI("AWGS_WP: rendering document");
|
|
PrintChunk(&doc);
|
|
|
|
RTFEnd();
|
|
|
|
SetResultBuffer(pOutput, true);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Read one of the chunks of the file.
|
|
*/
|
|
bool ReformatAWGS_WP::ReadChunk(const uint8_t** pSrcBuf, long* pSrcLen,
|
|
Chunk* pChunk)
|
|
{
|
|
/* starts with the saveArray count */
|
|
pChunk->saveArrayCount = Get16LE(*pSrcBuf);
|
|
if (pChunk->saveArrayCount == 0) {
|
|
/* AWGS always has at least 1 paragraph */
|
|
LOGI("Save array is empty");
|
|
return false;
|
|
}
|
|
|
|
*pSrcBuf += 2;
|
|
*pSrcLen -= 2;
|
|
|
|
/* locate and move past the SaveArray */
|
|
pChunk->saveArray = *pSrcBuf;
|
|
|
|
*pSrcBuf += pChunk->saveArrayCount * kSaveArrayEntryLen;
|
|
*pSrcLen -= pChunk->saveArrayCount * kSaveArrayEntryLen;
|
|
if (*pSrcLen <= 0) {
|
|
LOGI("SaveArray exceeds file length (count=%d len now %ld)",
|
|
pChunk->saveArrayCount, *pSrcLen);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Scan the "save array" to find the highest-numbered ruler. This tells
|
|
* us how many rulers there are.
|
|
*/
|
|
pChunk->numRulers = GetNumRulers(pChunk->saveArray, pChunk->saveArrayCount);
|
|
if (*pSrcLen < pChunk->numRulers * kRulerEntryLen) {
|
|
LOGI("Not enough room for rulers (rem=%ld, needed=%ld)",
|
|
*pSrcLen, pChunk->numRulers * kRulerEntryLen);
|
|
return false;
|
|
}
|
|
LOGI("+++ found %d rulers", pChunk->numRulers);
|
|
|
|
pChunk->rulers = *pSrcBuf;
|
|
*pSrcBuf += pChunk->numRulers * kRulerEntryLen;
|
|
*pSrcLen -= pChunk->numRulers * kRulerEntryLen;
|
|
|
|
/*
|
|
* Now we're at the docTextBlocks section.
|
|
*/
|
|
pChunk->textBlocks = *pSrcBuf;
|
|
pChunk->numTextBlocks = GetNumTextBlocks(pChunk->saveArray,
|
|
pChunk->saveArrayCount);
|
|
if (!SkipTextBlocks(pSrcBuf, pSrcLen, pChunk->numTextBlocks))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Output a single chunk. We do this by walking down the saveArray.
|
|
*/
|
|
void ReformatAWGS_WP::PrintChunk(const Chunk* pChunk)
|
|
{
|
|
const int kDefaultStatusBits = kAWGSJustifyLeft | kAWGSSingleSpace;
|
|
SaveArrayEntry sae;
|
|
const uint8_t* saveArray;
|
|
int saCount;
|
|
const uint8_t* blockPtr;
|
|
long blockLen;
|
|
const uint8_t* pRuler;
|
|
uint16_t rulerStatusBits;
|
|
|
|
saveArray = pChunk->saveArray;
|
|
saCount = pChunk->saveArrayCount;
|
|
for ( ; saCount > 0; saCount--, saveArray += kSaveArrayEntryLen) {
|
|
UnpackSaveArrayEntry(saveArray, &sae);
|
|
|
|
/*
|
|
* Page-break paragraphs have no real data and an invalid value
|
|
* in the "rulerNum" field. So we just throw out a page break
|
|
* here and call it a day.
|
|
*/
|
|
if (sae.attributes == 0x0001) {
|
|
/* this is a page-break paragraph */
|
|
RTFSetColor(kColorMediumBlue);
|
|
RTFSetFont(kFontCourierNew);
|
|
RTFSetFontSize(10);
|
|
BufPrintf("<page-break>");
|
|
RTFSetColor(kColorNone);
|
|
RTFNewPara();
|
|
RTFPageBreak(); // only supported by Word
|
|
continue;
|
|
}
|
|
|
|
if (sae.rulerNum < pChunk->numRulers) {
|
|
pRuler = pChunk->rulers + sae.rulerNum * kRulerEntryLen;
|
|
rulerStatusBits = Get16LE(pRuler + 2);
|
|
} else {
|
|
LOGI("AWGS_WP GLITCH: invalid ruler index %d", sae.rulerNum);
|
|
rulerStatusBits = kDefaultStatusBits;
|
|
}
|
|
|
|
if (rulerStatusBits & kAWGSJustifyFull)
|
|
RTFParaJustify();
|
|
else if (rulerStatusBits & kAWGSJustifyRight)
|
|
RTFParaRight();
|
|
else if (rulerStatusBits & kAWGSJustifyCenter)
|
|
RTFParaCenter();
|
|
else if (rulerStatusBits & kAWGSJustifyLeft)
|
|
RTFParaLeft();
|
|
RTFSetPara();
|
|
|
|
/*
|
|
* Find the text block that holds this paragraph. We could speed
|
|
* this up by creating an array of entries rather than walking the
|
|
* list every time. However, the block count tends to be fairly
|
|
* small (e.g. 7 for a 16K doc).
|
|
*/
|
|
blockPtr = FindTextBlock(pChunk, sae.textBlock);
|
|
if (blockPtr == NULL) {
|
|
LOGI("AWGS_WP bad textBlock %d", sae.textBlock);
|
|
return;
|
|
}
|
|
blockLen = (long) Get32LE(blockPtr);
|
|
if (blockLen <= 0 || blockLen > 65535) {
|
|
LOGI("AWGS_WP invalid block len %d", blockLen);
|
|
return;
|
|
}
|
|
blockPtr += 4;
|
|
|
|
if (sae.offset >= blockLen) {
|
|
LOGI("AWGS_WP bad offset: %d, blockLen=%ld",
|
|
sae.offset, blockLen);
|
|
return;
|
|
}
|
|
PrintParagraph(blockPtr + sae.offset, blockLen - sae.offset);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Print the contents of the text blocks.
|
|
*
|
|
* We're assured that the text block format is correct because we had to
|
|
* skip through them earlier. We don't really need to worry about running
|
|
* off the end due to a bad file.
|
|
*/
|
|
const uint8_t* ReformatAWGS_WP::FindTextBlock(const Chunk* pChunk, int blockNum)
|
|
{
|
|
const uint8_t* blockPtr = pChunk->textBlocks;
|
|
uint32_t blockSize;
|
|
|
|
while (blockNum--) {
|
|
blockSize = Get32LE(blockPtr);
|
|
blockPtr += 4 + blockSize;
|
|
}
|
|
|
|
return blockPtr;
|
|
}
|
|
|
|
|
|
/*
|
|
* Print one paragraph.
|
|
*
|
|
* Stop when we hit '\r'. We watch "maxLen" just to be safe.
|
|
*
|
|
* Returns the #of bytes consumed.
|
|
*/
|
|
int ReformatAWGS_WP::PrintParagraph(const uint8_t* ptr, long maxLen)
|
|
{
|
|
const uint8_t* startPtr = ptr;
|
|
uint16_t firstFont;
|
|
uint8_t firstStyle, firstSize, firstColor;
|
|
uint8_t uch;
|
|
|
|
if (maxLen < 7) {
|
|
LOGI("AWGS_WP GLITCH: not enough storage for para header (%d)",
|
|
maxLen);
|
|
return 1; // don't return zero or we might loop forever
|
|
}
|
|
/* pull out the paragraph header */
|
|
firstFont = Get16LE(ptr);
|
|
firstStyle = *(ptr + 2);
|
|
firstSize = *(ptr + 3);
|
|
firstColor = *(ptr + 4);
|
|
|
|
ptr += 7;
|
|
maxLen -= 7;
|
|
|
|
/*
|
|
* Set the font first; that defines the point size multiplier. Set
|
|
* the size second, because the point size determines whether we
|
|
* show underline. Set the style last.
|
|
*/
|
|
//LOGI("+++ Para start: font=0x%04x size=%d style=0x%02x",
|
|
// firstFont, firstSize, firstStyle);
|
|
RTFSetGSFont(firstFont);
|
|
RTFSetGSFontSize(firstSize);
|
|
RTFSetGSFontStyle(firstStyle);
|
|
|
|
while (maxLen > 0) {
|
|
uch = *ptr++;
|
|
maxLen--;
|
|
switch (uch) {
|
|
case 0x01: // font change - two bytes follow
|
|
if (maxLen >= 2) {
|
|
RTFSetGSFont(Get16LE(ptr));
|
|
ptr += 2;
|
|
maxLen -= 2;
|
|
}
|
|
break;
|
|
case 0x02: // text style change
|
|
if (maxLen >= 1) {
|
|
RTFSetGSFontStyle(*ptr++);
|
|
maxLen--;
|
|
}
|
|
break;
|
|
case 0x03: // text size change
|
|
if (maxLen >= 1) {
|
|
RTFSetGSFontSize(*ptr++);
|
|
maxLen--;
|
|
}
|
|
break;
|
|
case 0x04: // color change (0-15)
|
|
if (maxLen >= 1) {
|
|
ptr++;
|
|
maxLen--;
|
|
}
|
|
break;
|
|
case 0x05: // page token (replace with page #)
|
|
case 0x06: // date token (replace with date)
|
|
case 0x07: // time token (replace with time)
|
|
RTFSetColor(kColorMediumBlue);
|
|
if (uch == 0x05)
|
|
BufPrintf("<page>");
|
|
else if (uch == 0x06)
|
|
BufPrintf("<date>");
|
|
else
|
|
BufPrintf("<time>");
|
|
RTFSetColor(kColorNone);
|
|
break;
|
|
case '\r':
|
|
RTFNewPara();
|
|
return ptr - startPtr;
|
|
case '\t':
|
|
RTFTab();
|
|
break;
|
|
default:
|
|
RTFPrintUTF16Char(Charset::ConvertMacRomanToUTF16(uch));
|
|
break;
|
|
}
|
|
}
|
|
|
|
LOGI("AWGS_WP: WARNING: ran out of data before hitting '\r'");
|
|
return ptr - startPtr;
|
|
}
|
|
|
|
|
|
/*
|
|
* Run through the SaveArray and find the highest-numbered ruler index.
|
|
*/
|
|
uint16_t ReformatAWGS_WP::GetNumRulers(const uint8_t* pSaveArray,
|
|
uint16_t saveArrayCount)
|
|
{
|
|
SaveArrayEntry sa;
|
|
int maxRuler = -1;
|
|
|
|
while (saveArrayCount--) {
|
|
UnpackSaveArrayEntry(pSaveArray, &sa);
|
|
|
|
/*
|
|
* Ignore the record if sa.attributes == 1 (page break).
|
|
*/
|
|
if (sa.attributes == 0 && sa.rulerNum > maxRuler)
|
|
maxRuler = sa.rulerNum;
|
|
|
|
pSaveArray += kSaveArrayEntryLen;
|
|
}
|
|
|
|
/* there must be at least one paragraph, so this must hold */
|
|
assert(maxRuler >= 0);
|
|
|
|
return (uint16_t) (maxRuler+1);
|
|
}
|
|
|
|
/*
|
|
* Run through the SaveArray and find the highest-numbered text block
|
|
* index.
|
|
*
|
|
* These are stored linearly, so we just need to look at the last entry.
|
|
*/
|
|
uint16_t ReformatAWGS_WP::GetNumTextBlocks(const uint8_t* pSaveArray,
|
|
uint16_t saveArrayCount)
|
|
{
|
|
SaveArrayEntry sa;
|
|
uint16_t maxTextBlock;
|
|
|
|
assert(saveArrayCount > 0);
|
|
UnpackSaveArrayEntry(pSaveArray + (saveArrayCount-1) * kSaveArrayEntryLen,
|
|
&sa);
|
|
maxTextBlock = sa.textBlock;
|
|
|
|
#ifdef _DEBUG
|
|
int maxPara = -1;
|
|
|
|
while (saveArrayCount--) {
|
|
UnpackSaveArrayEntry(pSaveArray, &sa);
|
|
|
|
/*
|
|
* Ignore the record if sa.attributes == 1 (page break).
|
|
*/
|
|
if (sa.attributes == 0 && sa.textBlock > maxPara)
|
|
maxPara = sa.textBlock;
|
|
|
|
pSaveArray += kSaveArrayEntryLen;
|
|
}
|
|
/* always at least one paragraph */
|
|
assert(maxPara >= 0);
|
|
|
|
/* verify our result */
|
|
if (maxPara != maxTextBlock) {
|
|
LOGI("Max para mismatch (%d vs %d)", maxPara, maxTextBlock);
|
|
assert(false);
|
|
}
|
|
#endif
|
|
|
|
return (uint16_t) (maxTextBlock+1);
|
|
}
|
|
|
|
/*
|
|
* Unpack a SaveArray entry.
|
|
*/
|
|
void ReformatAWGS_WP::UnpackSaveArrayEntry(const uint8_t* pSaveArray,
|
|
SaveArrayEntry* pSAE)
|
|
{
|
|
pSAE->textBlock = Get16LE(pSaveArray + 0);
|
|
pSAE->offset = Get16LE(pSaveArray + 2);
|
|
pSAE->attributes = Get16LE(pSaveArray + 4);
|
|
pSAE->rulerNum = Get16LE(pSaveArray + 6);
|
|
pSAE->pixelHeight = Get16LE(pSaveArray + 8);
|
|
pSAE->numLines = Get16LE(pSaveArray + 10);
|
|
|
|
//LOGI("SA: textBlock=%d off=%d attr=%d ruler=%d lines=%d",
|
|
// pSAE->textBlock, pSAE->offset, pSAE->attributes, pSAE->rulerNum,
|
|
// pSAE->numLines);
|
|
}
|
|
|
|
/*
|
|
* Skip past a series of text blocks.
|
|
*
|
|
* Returns "true" on success, "false" on failure.
|
|
*/
|
|
bool ReformatAWGS_WP::SkipTextBlocks(const uint8_t** pSrcBuf,
|
|
long* pSrcLen, int textBlockCount)
|
|
{
|
|
uint32_t blockSize;
|
|
const uint8_t* srcBuf = *pSrcBuf;
|
|
long srcLen = *pSrcLen;
|
|
|
|
LOGI("Scanning %d text blocks", textBlockCount);
|
|
|
|
if (srcLen < 4)
|
|
return false;
|
|
|
|
while (textBlockCount--) {
|
|
blockSize = Get32LE(srcBuf);
|
|
srcBuf += 4;
|
|
srcLen -= 4;
|
|
|
|
LOGI("+++ blockSize=%lu srcLen=%ld", blockSize, srcLen);
|
|
if ((long) blockSize < kMinTextBlockSize) {
|
|
LOGI("Block size too small (%d - %d)",
|
|
blockSize, Get16LE(srcBuf));
|
|
return false;
|
|
}
|
|
if ((long) blockSize > srcLen) {
|
|
LOGI("Ran off the end in doc text blocks");
|
|
return false;
|
|
}
|
|
if (Get16LE(srcBuf) != blockSize || Get16LE(srcBuf+2) != blockSize) {
|
|
LOGI("AWGS WARNING: inconsistent block size values (%ld vs %d/%d)",
|
|
blockSize, Get16LE(srcBuf), Get16LE(srcBuf+2));
|
|
/* okay to ignore it, so long as everything else works out */
|
|
}
|
|
srcBuf += blockSize;
|
|
srcLen -= blockSize;
|
|
}
|
|
|
|
*pSrcBuf = srcBuf;
|
|
*pSrcLen = srcLen;
|
|
|
|
return true;
|
|
}
|