ciderpress/reformat/Text8.cpp

220 lines
6.7 KiB
C++

/*
* CiderPress
* Copyright (C) 2009 by CiderPress authors. All Rights Reserved.
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Convert 8-bit word processor files.
*
* Most formats convert reasonably well with "Converted Text", but this
* allows the files to be handled more transparently (e.g. Magic Window
* "formatted files", which can be mistaken for code.
*/
#include "StdAfx.h"
#include "Text8.h"
/*
* ===========================================================================
* Magic Window / Magic Window II
* ===========================================================================
*/
/*
* Magic Window and Magic Window II appear to use the same format for their
* "formatted files". The files are of type 'B', with a valid address field,
* and what looks like junk in the length field. The files have a 256-byte
* header that seems to hold some sort of title string as well as some
* binary goodies that I'm not sure what they are.
*
* The data from offset 256 on is entirely mixed-case high-ASCII text. It
* may contain printer-specific escape codes for bold, italic, etc.
*
* A ".MW" filename suffix is enforced by the program.
*/
/*
* Decide whether or not we want to handle this file.
*/
void ReformatMagicWindow::Examine(ReformatHolder* pHolder)
{
if (pHolder->GetFileType() == kTypeBIN) {
bool isMW = ReformatMagicWindow::IsFormatted(pHolder);
bool isDotMW = stricmp(pHolder->GetNameExt(), ".MW") == 0;
if (isMW && isDotMW) {
/* gotta be */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicYes,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else if (isDotMW) {
/* right type and name; maybe our test is broken? */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicProbably,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else if (isMW) {
/* not likely, but offer it as non-default option */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicProbablyNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else {
/* not one of ours */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
} else {
/* "unformatted" text even if ".MW"; nothing special required */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
}
/*
* Figure out if this is a Magic Window "formatted" file.
*
* I don't know much about the format, so this is based on the similarities
* observed between half a dozen documents from different sources.
*/
/*static*/ bool ReformatMagicWindow::IsFormatted(const ReformatHolder* pHolder)
{
const uint8_t* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
int i, count00, count20;
/* want 256-byte header, plus a few bytes to check text */
if (srcLen < kHeaderLen+8)
return false;
/*
* First byte always seems to be 0x8d.
*/
if (ptr[0x00] != 0x8d)
return false;
/*
* 0x58 - 0xa0 is mostly filled with 0x00 (for Magic Window) or 0x20
* (for Magic Window II). Both seem to have space for the title in the
* preceeding part, but it's high-ASCII for MW and low-ASCII for MW2.
*
* Expect 50 out of 72 to match. If this is actually just uninitialized
* data then this test will be bogus.
*/
count00 = count20 = 0;
for (i = 0x58; i < 0xa0; i++) {
if (ptr[i] == 0x00)
count00++;
if (ptr[i] == 0x20)
count20++;
}
if (count00 < 50 && count20 < 50)
return false;
/*
* 0xa2 has some recognizeable bytes; sample values:
* MW 42 06 36 50 08 40
* MW2 42 06 36 55 08 40
* MW2 42 04 3a 50 00 50
* Not really sure what to make of these. If we can bracket these
* values we might have something.
*/
if (ptr[0xa2] != 0x42 ||
(ptr[0xa3] < 2 && ptr[0xa3] > 10) ||
(ptr[0xa4] < 0x30 && ptr[0xa4] > 0x40))
return false;
/*
* Make sure the rest of the file is 100% high ASCII.
*/
ptr += kHeaderLen;
srcLen -= kHeaderLen;
while (srcLen--) {
if ((*ptr & 0x80) == 0)
return false;
}
return true;
}
/*
* Skip the header and text-convert the rest.
*/
int ReformatMagicWindow::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
fUseRTF = false;
RTFBegin();
if (srcLen <= kHeaderLen)
goto bail;
ConvertEOL(srcPtr + kHeaderLen, srcLen - kHeaderLen, true);
//done:
RTFEnd();
SetResultBuffer(pOutput);
retval = 0;
bail:
return retval;
}
/*
* ===========================================================================
* Gutenberg Word Processor
* ===========================================================================
*/
/*
* Decide whether or not we want to handle this file.
*/
void ReformatGutenberg::Examine(ReformatHolder* pHolder)
{
if ((pHolder->GetFileType() == kTypeTXT) &&
(pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatGutenberg)) {
pHolder->SetApplic(ReformatHolder::kReformatGutenberg,
ReformatHolder::kApplicYes,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
}
/*
* Convert the text.
*/
int ReformatGutenberg::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
fUseRTF = false;
RTFBegin();
ConvertEOL(srcPtr, srcLen, true, true);
RTFEnd();
SetResultBuffer(pOutput);
retval = 0;
return retval;
}