ciderpress/reformat/Text8.cpp
Andy McFadden 8f61f84585 Use types with explicit sizes
Much of what the "reformat" code does involves processing data that is
8, 16, or 32 bits.  We want to use size-specific types from stdint.h
(e.g. uint16_t) rather than "unsigned short".

This was a quick pass to replace the various "unsigned" declarations.
More can be done here and elsewhere.
2014-11-20 18:10:18 -08:00

226 lines
6.7 KiB
C++

/*
* CiderPress
* Copyright (C) 2009 by CiderPress authors. All Rights Reserved.
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Convert 8-bit word processor files.
*
* Most formats convert reasonably well with "Converted Text", but this
* allows the files to be handled more transparently (e.g. Magic Window
* "formatted files", which can be mistaken for code.
*/
#include "StdAfx.h"
#include "Text8.h"
/*
* ===========================================================================
* Magic Window / Magic Window II
* ===========================================================================
*/
/*
* Magic Window and Magic Window II appear to use the same format for their
* "formatted files". The files are of type 'B', with a valid address field,
* and what looks like junk in the length field. The files have a 256-byte
* header that seems to hold some sort of title string as well as some
* binary goodies that I'm not sure what they are.
*
* The data from offset 256 on is entirely mixed-case high-ASCII text. It
* may contain printer-specific escape codes for bold, italic, etc.
*
* A ".MW" filename suffix is enforced by the program.
*/
/*
* Decide whether or not we want to handle this file.
*/
void
ReformatMagicWindow::Examine(ReformatHolder* pHolder)
{
if (pHolder->GetFileType() == kTypeBIN) {
bool isMW = ReformatMagicWindow::IsFormatted(pHolder);
bool isDotMW = stricmp(pHolder->GetNameExt(), ".MW") == 0;
if (isMW && isDotMW) {
/* gotta be */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicYes,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else if (isDotMW) {
/* right type and name; maybe our test is broken? */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicProbably,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else if (isMW) {
/* not likely, but offer it as non-default option */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicProbablyNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
} else {
/* not one of ours */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
} else {
/* "unformatted" text even if ".MW"; nothing special required */
pHolder->SetApplic(ReformatHolder::kReformatMagicWindow,
ReformatHolder::kApplicNot,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
}
/*
* Figure out if this is a Magic Window "formatted" file.
*
* I don't know much about the format, so this is based on the similarities
* observed between half a dozen documents from different sources.
*/
/*static*/ bool
ReformatMagicWindow::IsFormatted(const ReformatHolder* pHolder)
{
const uint8_t* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
long srcLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
int i, count00, count20;
/* want 256-byte header, plus a few bytes to check text */
if (srcLen < kHeaderLen+8)
return false;
/*
* First byte always seems to be 0x8d.
*/
if (ptr[0x00] != 0x8d)
return false;
/*
* 0x58 - 0xa0 is mostly filled with 0x00 (for Magic Window) or 0x20
* (for Magic Window II). Both seem to have space for the title in the
* preceeding part, but it's high-ASCII for MW and low-ASCII for MW2.
*
* Expect 50 out of 72 to match. If this is actually just uninitialized
* data then this test will be bogus.
*/
count00 = count20 = 0;
for (i = 0x58; i < 0xa0; i++) {
if (ptr[i] == 0x00)
count00++;
if (ptr[i] == 0x20)
count20++;
}
if (count00 < 50 && count20 < 50)
return false;
/*
* 0xa2 has some recognizeable bytes; sample values:
* MW 42 06 36 50 08 40
* MW2 42 06 36 55 08 40
* MW2 42 04 3a 50 00 50
* Not really sure what to make of these. If we can bracket these
* values we might have something.
*/
if (ptr[0xa2] != 0x42 ||
(ptr[0xa3] < 2 && ptr[0xa3] > 10) ||
(ptr[0xa4] < 0x30 && ptr[0xa4] > 0x40))
return false;
/*
* Make sure the rest of the file is 100% high ASCII.
*/
ptr += kHeaderLen;
srcLen -= kHeaderLen;
while (srcLen--) {
if ((*ptr & 0x80) == 0)
return false;
}
return true;
}
/*
* Skip the header and text-convert the rest.
*/
int
ReformatMagicWindow::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
fUseRTF = false;
RTFBegin();
if (srcLen <= kHeaderLen)
goto bail;
ConvertEOL(srcPtr + kHeaderLen, srcLen - kHeaderLen, true);
//done:
RTFEnd();
SetResultBuffer(pOutput);
retval = 0;
bail:
return retval;
}
/*
* ===========================================================================
* Gutenberg Word Processor
* ===========================================================================
*/
/*
* Decide whether or not we want to handle this file.
*/
void
ReformatGutenberg::Examine(ReformatHolder* pHolder)
{
if ((pHolder->GetFileType() == kTypeTXT) &&
(pHolder->GetSourceFormat() == ReformatHolder::kSourceFormatGutenberg)) {
pHolder->SetApplic(ReformatHolder::kReformatGutenberg,
ReformatHolder::kApplicYes,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
}
/*
* Convert the text.
*/
int
ReformatGutenberg::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const uint8_t* srcPtr = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
long length = srcLen;
int retval = -1;
fUseRTF = false;
RTFBegin();
ConvertEOL(srcPtr, srcLen, true, true);
RTFEnd();
SetResultBuffer(pOutput);
retval = 0;
return retval;
}