ciderpress/reformat/CPMFiles.cpp

146 lines
4.9 KiB
C++
Raw Normal View History

2007-03-27 17:47:10 +00:00
/*
* CiderPress
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Special handling for files on CP/M disks.
*/
#include "StdAfx.h"
#include "CPMFiles.h"
const int kCtrlZ = 0x1a; // end-of-file indicator
/*
* Table determining what's a binary character and what isn't. This is
* roughly the same table as is used in GenericArchive.cpp. The code will
* additionally allow Ctrl-Z, and will allow occurrences of 0x00 that appear
* after the Ctrl-Z.
*
* Even if we don't allow high ASCII, we must still allow 0xe5 if it occurs
* after a Ctrl-Z.
*
* After looking at the generic ISO-latin-1 table, Paul Schlyter writes:
* -----
* Remove 88, 89, 8A, 8C and 8D as well from this table. The CP/M version of
* Wordstar uses the hi bit of any character for its own uses - for instance
* 0D 0A is a "soft end-of-line" which Wordstar can move around, while 8D 8A is
* a "hard end-of-line" which WordStar does not move around. Other characters
* can have this bit used to signal hilighted text. On a lot of CP/M systems
* the hi bit is ignored when displaying characters (= sending the characters to
* the standard console output), thus one can often "type" a WordStar file and
* have it displayed as readable text.
* -----
*/
static const char gIsBinary[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, /* ^@-^O */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* ^P-^_ */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* - / */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - ? */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ - O */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* P - _ */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ` - o */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p - DEL */
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, /* 0x80 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xf0 */
};
/*
* Decide whether or not this is a CP/M text file.
*
* End-of-file is at the first Ctrl-Z, but we can't stop there because it
* could be a binary file with a leading Ctrl-Z (e.g. PNG).
*/
void
ReformatCPMText::Examine(ReformatHolder* pHolder)
{
ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot;
const unsigned char* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData);
long fileLen = pHolder->GetSourceLen(ReformatHolder::kPartData);
const char* nameExt = pHolder->GetNameExt();
bool foundCtrlZ = false;
/* only show this on CP/M disks */
if (pHolder->GetSourceFormat() != ReformatHolder::kSourceFormatCPM)
goto done;
applies = ReformatHolder::kApplicProbablyNot;
/* allow, but don't default to, text conversion of ".com" files */
if (strcasecmp(nameExt, ".com") == 0) {
WMSG0("Not reformatting '.com' file as text\n");
goto done;
}
/*
* Scan file, looking for illegal chars.
*
* Thought for the day: could also require that Ctrl-Z appear in the
* last 128 bytes of the file. May want to count all high-ASCII values
* as illegal but allow a certain percentage of "illegal" characters in
* the mix.
*/
while (fileLen--) {
if (*ptr == kCtrlZ) {
foundCtrlZ = true;
} else if (foundCtrlZ && *ptr == 0x00) {
/* do nothing -- 0x00 is okay if it comes after Ctrl-Z */
} else {
if (gIsBinary[*ptr]) {
WMSG2("CP/M found binary char 0x%02x at offset 0x%04x\n",
*ptr,
ptr - pHolder->GetSourceBuf(ReformatHolder::kPartData));
break;
}
}
ptr++;
}
if (fileLen == -1)
applies = ReformatHolder::kApplicProbably;
done:
pHolder->SetApplic(ReformatHolder::kReformatCPMText, applies,
ReformatHolder::kApplicNot, ReformatHolder::kApplicNot);
}
/*
* Convert EOL markers.
*
* The primary difference between "CP/M text" and other formats is that we
* stop on the first occurrence of Ctrl-Z.
*
* Generally speaking, CP/M text files should already be in CRLF format, so
* this will go quickly.
*/
int
ReformatCPMText::Process(const ReformatHolder* pHolder,
ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part,
ReformatOutput* pOutput)
{
const unsigned char* srcBuf = pHolder->GetSourceBuf(part);
long srcLen = pHolder->GetSourceLen(part);
fUseRTF = false;
if (pHolder->GetSourceLen(part) == 0)
return -1;
for (long ll = 0; ll < srcLen; ll++) {
if (*srcBuf == kCtrlZ /*|| *srcBuf == '\0'*/) {
srcLen = ll;
break;
}
srcBuf++;
}
ConvertEOL(pHolder->GetSourceBuf(part), srcLen, true);
SetResultBuffer(pOutput);
return 0;
}