2014-11-04 00:26:53 +00:00
|
|
|
/*
|
|
|
|
* CiderPress
|
|
|
|
* Copyright (C) 2009 by CiderPress authors. All Rights Reserved.
|
|
|
|
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
|
|
|
|
* See the file LICENSE for distribution terms.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Convert 8-bit word processor files.
|
|
|
|
*
|
|
|
|
* Most formats convert reasonably well with "Converted Text", but this
|
|
|
|
* allows the files to be handled more transparently (e.g. Magic Window
|
|
|
|
* "formatted files", which can be mistaken for code.
|
|
|
|
*/
|
|
|
|
#include "StdAfx.h"
|
|
|
|
#include "Text8.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ===========================================================================
|
|
|
|
* Magic Window / Magic Window II
|
|
|
|
* ===========================================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Magic Window and Magic Window II appear to use the same format for their
|
|
|
|
* "formatted files". The files are of type 'B', with a valid address field,
|
|
|
|
* and what looks like junk in the length field. The files have a 256-byte
|
|
|
|
* header that seems to hold some sort of title string as well as some
|
|
|
|
* binary goodies that I'm not sure what they are.
|
|
|
|
*
|
|
|
|
* The data from offset 256 on is entirely mixed-case high-ASCII text. It
|
|
|
|
* may contain printer-specific escape codes for bold, italic, etc.
|
|
|
|
*
|
|
|
|
* A ".MW" filename suffix is enforced by the program.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Decide whether or not we want to handle this file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ReformatMagicWindow::Examine(ReformatHolder* pHolder)
|
|
|
|
{
|
|
|
|
if (pHolder->GetFileType() == kTypeBIN) {
|
|
|
|
bool isMW = ReformatMagicWindow::IsFormatted(pHolder);
|
2014-11-19 01:10:23 +00:00
|
|
|
bool isDotMW = stricmp(pHolder->GetNameExt(), ".MW") == 0;
|
2014-11-04 00:26:53 +00:00
|
|
|
|
|
|
|
if (isMW && isDotMW) {
|
|
|
|
/* gotta be */
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
|
|
|
|
ReformatHolder::kApplicYes,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
} else if (isDotMW) {
|
|
|
|
/* right type and name; maybe our test is broken? */
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
|
|
|
|
ReformatHolder::kApplicProbably,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
} else if (isMW) {
|
|
|
|
/* not likely, but offer it as non-default option */
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
|
|
|
|
ReformatHolder::kApplicProbablyNot,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
} else {
|
|
|
|
/* not one of ours */
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
|
|
|
|
ReformatHolder::kApplicNot,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* "unformatted" text even if ".MW"; nothing special required */
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
|
|
|
|
ReformatHolder::kApplicNot,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Figure out if this is a Magic Window "formatted" file.
|
|
|
|
*
|
|
|
|
* I don't know much about the format, so this is based on the similarities
|
|
|
|
* observed between half a dozen documents from different sources.
|
|
|
|
*/
|
|
|
|
/*static*/ bool
|
|
|
|
ReformatMagicWindow::IsFormatted(const ReformatHolder* pHolder)
|
|
|
|
{
|
2014-11-21 02:10:18 +00:00
|
|
|
const uint8_t* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
|
2014-11-04 00:26:53 +00:00
|
|
|
long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
|
|
|
|
int i, count00, count20;
|
|
|
|
|
|
|
|
|
|
|
|
/* want 256-byte header, plus a few bytes to check text */
|
|
|
|
if (srcLen < kHeaderLen+8)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First byte always seems to be 0x8d.
|
|
|
|
*/
|
|
|
|
if (ptr[0x00] != 0x8d)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 0x58 - 0xa0 is mostly filled with 0x00 (for Magic Window) or 0x20
|
|
|
|
* (for Magic Window II). Both seem to have space for the title in the
|
|
|
|
* preceeding part, but it's high-ASCII for MW and low-ASCII for MW2.
|
|
|
|
*
|
|
|
|
* Expect 50 out of 72 to match. If this is actually just uninitialized
|
|
|
|
* data then this test will be bogus.
|
|
|
|
*/
|
|
|
|
count00 = count20 = 0;
|
|
|
|
for (i = 0x58; i < 0xa0; i++) {
|
|
|
|
if (ptr[i] == 0x00)
|
|
|
|
count00++;
|
|
|
|
if (ptr[i] == 0x20)
|
|
|
|
count20++;
|
|
|
|
}
|
|
|
|
if (count00 < 50 && count20 < 50)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 0xa2 has some recognizeable bytes; sample values:
|
|
|
|
* MW 42 06 36 50 08 40
|
|
|
|
* MW2 42 06 36 55 08 40
|
|
|
|
* MW2 42 04 3a 50 00 50
|
|
|
|
* Not really sure what to make of these. If we can bracket these
|
|
|
|
* values we might have something.
|
|
|
|
*/
|
|
|
|
if (ptr[0xa2] != 0x42 ||
|
|
|
|
(ptr[0xa3] < 2 && ptr[0xa3] > 10) ||
|
|
|
|
(ptr[0xa4] < 0x30 && ptr[0xa4] > 0x40))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure the rest of the file is 100% high ASCII.
|
|
|
|
*/
|
|
|
|
ptr += kHeaderLen;
|
|
|
|
srcLen -= kHeaderLen;
|
|
|
|
while (srcLen--) {
|
|
|
|
if ((*ptr & 0x80) == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip the header and text-convert the rest.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ReformatMagicWindow::Process(const ReformatHolder* pHolder,
|
|
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
|
|
ReformatOutput* pOutput)
|
|
|
|
{
|
2014-11-21 02:10:18 +00:00
|
|
|
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
|
2014-11-04 00:26:53 +00:00
|
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
|
|
long length = srcLen;
|
|
|
|
int retval = -1;
|
|
|
|
|
|
|
|
fUseRTF = false;
|
|
|
|
|
|
|
|
RTFBegin();
|
|
|
|
|
|
|
|
if (srcLen <= kHeaderLen)
|
|
|
|
goto bail;
|
|
|
|
|
|
|
|
ConvertEOL(srcPtr + kHeaderLen, srcLen - kHeaderLen, true);
|
|
|
|
|
|
|
|
//done:
|
|
|
|
RTFEnd();
|
|
|
|
|
|
|
|
SetResultBuffer(pOutput);
|
|
|
|
retval = 0;
|
|
|
|
|
|
|
|
bail:
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ===========================================================================
|
|
|
|
* Gutenberg Word Processor
|
|
|
|
* ===========================================================================
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Decide whether or not we want to handle this file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ReformatGutenberg::Examine(ReformatHolder* pHolder)
|
|
|
|
{
|
|
|
|
if ((pHolder->GetFileType() == kTypeTXT) &&
|
|
|
|
(pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatGutenberg)) {
|
|
|
|
|
|
|
|
pHolder->SetApplic(ReformatHolder::kReformatGutenberg,
|
|
|
|
ReformatHolder::kApplicYes,
|
|
|
|
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert the text.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ReformatGutenberg::Process(const ReformatHolder* pHolder,
|
|
|
|
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
|
|
|
|
ReformatOutput* pOutput)
|
|
|
|
{
|
2014-11-21 02:10:18 +00:00
|
|
|
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
|
2014-11-04 00:26:53 +00:00
|
|
|
long srcLen = pHolder->GetSourceLen(part);
|
|
|
|
long length = srcLen;
|
|
|
|
int retval = -1;
|
|
|
|
|
|
|
|
fUseRTF = false;
|
|
|
|
|
|
|
|
RTFBegin();
|
|
|
|
|
|
|
|
ConvertEOL(srcPtr, srcLen, true, true);
|
|
|
|
|
|
|
|
RTFEnd();
|
|
|
|
|
|
|
|
SetResultBuffer(pOutput);
|
|
|
|
retval = 0;
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|