/* * CiderPress * Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved. * See the file LICENSE for distribution terms. */ /* * Special handling for files on CP/M disks. */ #include "StdAfx.h" #include "CPMFiles.h" const int kCtrlZ = 0x1a; // end-of-file indicator /* * Table determining what's a binary character and what isn't. This is * roughly the same table as is used in GenericArchive.cpp. The code will * additionally allow Ctrl-Z, and will allow occurrences of 0x00 that appear * after the Ctrl-Z. * * Even if we don't allow high ASCII, we must still allow 0xe5 if it occurs * after a Ctrl-Z. * * After looking at the generic ISO-latin-1 table, Paul Schlyter writes: * ----- * Remove 88, 89, 8A, 8C and 8D as well from this table. The CP/M version of * Wordstar uses the hi bit of any character for its own uses - for instance * 0D 0A is a "soft end-of-line" which Wordstar can move around, while 8D 8A is * a "hard end-of-line" which WordStar does not move around. Other characters * can have this bit used to signal hilighted text. On a lot of CP/M systems * the hi bit is ignored when displaying characters (= sending the characters to * the standard console output), thus one can often "type" a WordStar file and * have it displayed as readable text. * ----- */ static const char gIsBinary[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, /* ^@-^O */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* ^P-^_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* - / */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - ? */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* @ - O */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* P - _ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* ` - o */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* p - DEL */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, /* 0x80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xf0 */ }; /* * Decide whether or not this is a CP/M text file. * * End-of-file is at the first Ctrl-Z, but we can't stop there because it * could be a binary file with a leading Ctrl-Z (e.g. PNG). */ void ReformatCPMText::Examine(ReformatHolder* pHolder) { ReformatHolder::ReformatApplies applies = ReformatHolder::kApplicNot; const uint8_t* ptr = pHolder->GetSourceBuf(ReformatHolder::kPartData); long fileLen = pHolder->GetSourceLen(ReformatHolder::kPartData); const char* nameExt = pHolder->GetNameExt(); bool foundCtrlZ = false; /* only show this on CP/M disks */ if (pHolder->GetSourceFormat() != ReformatHolder::kSourceFormatCPM) goto done; applies = ReformatHolder::kApplicProbablyNot; /* allow, but don't default to, text conversion of ".com" files */ if (stricmp(nameExt, ".com") == 0) { LOGI("Not reformatting '.com' file as text"); goto done; } /* * Scan file, looking for illegal chars. * * Thought for the day: could also require that Ctrl-Z appear in the * last 128 bytes of the file. May want to count all high-ASCII values * as illegal but allow a certain percentage of "illegal" characters in * the mix. */ while (fileLen--) { if (*ptr == kCtrlZ) { foundCtrlZ = true; } else if (foundCtrlZ && *ptr == 0x00) { /* do nothing -- 0x00 is okay if it comes after Ctrl-Z */ } else { if (gIsBinary[*ptr]) { LOGI("CP/M found binary char 0x%02x at offset 0x%04x", *ptr, ptr - pHolder->GetSourceBuf(ReformatHolder::kPartData)); break; } } ptr++; } if (fileLen == -1) applies = ReformatHolder::kApplicProbably; done: pHolder->SetApplic(ReformatHolder::kReformatCPMText, applies, ReformatHolder::kApplicNot, ReformatHolder::kApplicNot); } /* * Convert EOL markers. * * The primary difference between "CP/M text" and other formats is that we * stop on the first occurrence of Ctrl-Z. * * Generally speaking, CP/M text files should already be in CRLF format, so * this will go quickly. */ int ReformatCPMText::Process(const ReformatHolder* pHolder, ReformatHolder::ReformatID id, ReformatHolder::ReformatPart part, ReformatOutput* pOutput) { const uint8_t* srcBuf = pHolder->GetSourceBuf(part); long srcLen = pHolder->GetSourceLen(part); fUseRTF = false; if (pHolder->GetSourceLen(part) == 0) return -1; for (long ll = 0; ll < srcLen; ll++) { if (*srcBuf == kCtrlZ /*|| *srcBuf == '\0'*/) { srcLen = ll; break; } srcBuf++; } ConvertEOL(pHolder->GetSourceBuf(part), srcLen, true); SetResultBuffer(pOutput); return 0; }