ciderpress/app/BasicImport.cpp
Andy McFadden d8223dbcfd Relocate method comments
This moves method comments from the .cpp file to the .h file,
where users of the methods can find them.  This also makes it
possible for the IDE to show the comments when you mouse-hover over
the method name, though Visual Studio is a bit weak in this regard.

Also, added "override" keywords on overridden methods.  Reasonably
current versions of popular compilers seem to support this.

Also, don't have the return type on a separate line in the .cpp file.
The motivation for the practice -- quickly finding a method definition
with "^name" -- is less useful in C++ than C, and modern IDEs provide
more convenient ways to do the same thing.

Also, do some more conversion from unsigned types to uintXX_t.

This commit is primarily for the "app" directory.
2014-11-21 22:33:39 -08:00

660 lines
20 KiB
C++

/*
* CiderPress
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Import BASIC programs stored in a text file.
*
* The current implementation is a bit lame. It just dumps text strings into
* a read-only edit buffer, instead of providing a nicer UI. The real trouble
* with this style of interface is that i18n is even more awkward.
*/
#include "StdAfx.h"
#include "../reformat/BASIC.h"
#include "BasicImport.h"
#include "HelpTopics.h"
/*
* ==========================================================================
* BASTokenLookup
* ==========================================================================
*/
void BASTokenLookup::Init(const char* tokenList, int numTokens, int tokenLen)
{
int i;
ASSERT(tokenList != NULL);
ASSERT(numTokens > 0);
ASSERT(tokenLen > 0);
delete[] fTokenPtr; // in case we're being re-initialized
delete[] fTokenLen;
fTokenPtr = new const char*[numTokens];
fTokenLen = new int[numTokens];
fNumTokens = numTokens;
for (i = 0; i < numTokens; i++) {
fTokenPtr[i] = tokenList;
fTokenLen[i] = strlen(tokenList);
tokenList += tokenLen;
}
}
int BASTokenLookup::Lookup(const char* str, int len, int* pFoundLen)
{
int longestIndex, longestLen;
int i;
longestIndex = longestLen = -1;
for (i = 0; i < fNumTokens; i++) {
if (fTokenLen[i] <= len && fTokenLen[i] > longestLen &&
strnicmp(str, fTokenPtr[i], fTokenLen[i]) == 0)
{
longestIndex = i;
longestLen = fTokenLen[i];
}
}
*pFoundLen = longestLen;
return longestIndex;
}
/*
* ==========================================================================
* ImportBASDialog
* ==========================================================================
*/
BEGIN_MESSAGE_MAP(ImportBASDialog, CDialog)
ON_COMMAND(IDHELP, OnHelp)
END_MESSAGE_MAP()
BOOL ImportBASDialog::OnInitDialog(void)
{
CDialog::OnInitDialog(); // base class init
PathName path(fFileName);
CString fileNameOnly(path.GetFileName());
CString ext(fileNameOnly.Right(4));
if (ext.CompareNoCase(L".txt") == 0) {
LOGI("removing extension from '%ls'", (LPCWSTR) fileNameOnly);
fileNameOnly = fileNameOnly.Left(fileNameOnly.GetLength() - 4);
}
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_SAVEAS);
pEdit->SetWindowText(fileNameOnly);
pEdit->SetSel(0, -1);
pEdit->SetFocus();
/*
* Do the actual import. If it fails, disable the "save" button.
*/
if (!ImportBAS(fFileName)) {
CButton* pButton = (CButton*) GetDlgItem(IDOK);
pButton->EnableWindow(FALSE);
pEdit->EnableWindow(FALSE);
}
return FALSE; // keep our focus
}
static const char kFailed[] = "failed.\r\n\r\n";
static const char kSuccess[] = "success!\r\n\r\n";
bool ImportBASDialog::ImportBAS(const WCHAR* fileName)
{
FILE* fp = NULL;
ExpandBuffer msgs(1024);
long fileLen, outLen, count;
char* buf = NULL;
char* outBuf = NULL;
bool result = false;
msgs.Printf("Importing from '%ls'...", fileName);
fp = _wfopen(fileName, L"rb"); // EOL unknown, open as binary and deal
if (fp == NULL) {
msgs.Printf("%sUnable to open file.", kFailed);
goto bail;
}
/* determine file length, and verify that it looks okay */
fseek(fp, 0, SEEK_END);
fileLen = ftell(fp);
rewind(fp);
if (ferror(fp) || fileLen < 0) {
msgs.Printf("%sUnable to determine file length.", kFailed);
goto bail;
}
if (fileLen == 0) {
msgs.Printf("%sFile is empty.", kFailed);
goto bail;
}
if (fileLen >= 128*1024) {
msgs.Printf("%sFile is too large to be Applesoft.", kFailed);
goto bail;
}
buf = new char[fileLen];
if (buf == NULL) {
msgs.Printf("%sUnable to allocate memory.", kFailed);
goto bail;
}
/* read the entire thing into memory */
count = fread(buf, 1, fileLen, fp);
if (count != fileLen) {
msgs.Printf("%sCould only read %ld of %ld bytes.", kFailed,
count, fileLen);
goto bail;
}
/* process it */
if (!ConvertTextToBAS(buf, fileLen, &outBuf, &outLen, &msgs))
goto bail;
result = true;
SetOutput(outBuf, outLen);
bail:
if (fp != NULL)
fclose(fp);
delete[] buf;
/* copy our error messages out */
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_RESULTS);
char* msgBuf = NULL;
long msgLen;
msgs.SeizeBuffer(&msgBuf, &msgLen);
CString msgStr(msgBuf);
pEdit->SetWindowText(msgStr);
delete[] msgBuf;
return result;
}
bool ImportBASDialog::ConvertTextToBAS(const char* buf, long fileLen,
char** pOutBuf, long* pOutLen, ExpandBuffer* pMsgs)
{
ExpandBuffer output(32768);
CString msg;
const char* lineStart;
const char* lineEnd;
long textRemaining;
int lineNum;
fBASLookup.Init(ReformatApplesoft::GetApplesoftTokens(),
ReformatApplesoft::kTokenCount, ReformatApplesoft::kTokenLen);
lineEnd = buf;
textRemaining = fileLen;
lineNum = 0;
while (textRemaining > 0) {
lineNum++;
lineStart = lineEnd;
lineEnd = FindEOL(lineStart, textRemaining);
if (!ProcessBASLine(lineStart, lineEnd - lineStart, &output,
/*ref*/ msg))
{
pMsgs->Printf("%sLine %d: %ls", kFailed, lineNum, (LPCWSTR) msg);
return false;
}
textRemaining -= lineEnd - lineStart;
}
/* output EOF marker */
output.Putc(0x00);
output.Putc(0x00);
/* grab the buffer */
char* outBuf;
long outLen;
output.SeizeBuffer(&outBuf, &outLen);
if (outLen >= 0xc000) {
pMsgs->Printf("%sOutput is too large to be valid", kFailed);
delete[] outBuf;
return false;
}
/* go back and fix up the "next line" pointers, assuming a $0801 start */
if (!FixBASLinePointers(outBuf, outLen, 0x0801)) {
pMsgs->Printf("%sFailed while fixing line pointers", kFailed);
delete[] outBuf;
return false;
}
*pOutBuf = outBuf;
*pOutLen = outLen;
pMsgs->Printf("%sProcessed %d lines", kSuccess, lineNum);
pMsgs->Printf("\r\nTokenized file is %d bytes long", *pOutLen);
return true;
}
/*
From an Applesoft disassembly by Bob Sander-Cederlof:
D56C- E8 1420 PARSE INX NEXT INPUT CHARACTER
D56D- BD 00 02 1430 .1 LDA INPUT.BUFFER,X
D570- 24 13 1440 BIT DATAFLG IN A "DATA" STATEMENT?
D572- 70 04 1450 BVS .2 YES (DATAFLG = $49)
D574- C9 20 1460 CMP #' ' IGNORE BLANKS
D576- F0 F4 1470 BEQ PARSE
D578- 85 0E 1480 .2 STA ENDCHR
D57A- C9 22 1490 CMP #'" START OF QUOTATION?
D57C- F0 74 1500 BEQ .13
D57E- 70 4D 1510 BVS .9 BRANCH IF IN "DATA" STATEMENT
D580- C9 3F 1520 CMP #'? SHORTHAND FOR "PRINT"?
D582- D0 04 1530 BNE .3 NO
D584- A9 BA 1540 LDA #TOKEN.PRINT YES, REPLACE WITH "PRINT" TOKEN
D586- D0 45 1550 BNE .9 ...ALWAYS
D588- C9 30 1560 .3 CMP #'0 IS IT A DIGIT, COLON, OR SEMI-COLON?
D58A- 90 04 1570 BCC .4 NO, PUNCTUATION !"#$%&'()*+,-./
D58C- C9 3C 1580 CMP #';'+1
D58E- 90 3D 1590 BCC .9 YES, NOT A TOKEN
1600 *--------------------------------
1610 * SEARCH TOKEN NAME TABLE FOR MATCH STARTING
1620 * WITH CURRENT CHAR FROM INPUT LINE
1630 *--------------------------------
D590- 84 AD 1640 .4 STY STRNG2 SAVE INDEX TO OUTPUT LINE
D592- A9 D0 1650 LDA #TOKEN.NAME.TABLE-$100
D594- 85 9D 1660 STA FAC MAKE PNTR FOR SEARCH
D596- A9 CF 1670 LDA /TOKEN.NAME.TABLE-$100
D598- 85 9E 1680 STA FAC+1
D59A- A0 00 1690 LDY #0 USE Y-REG WITH (FAC) TO ADDRESS TABLE
D59C- 84 0F 1700 STY TKN.CNTR HOLDS CURRENT TOKEN-$80
D59E- 88 1710 DEY PREPARE FOR "INY" A FEW LINES DOWN
D59F- 86 B8 1720 STX TXTPTR SAVE POSITION IN INPUT LINE
D5A1- CA 1730 DEX PREPARE FOR "INX" A FEW LINES DOWN
D5A2- C8 1740 .5 INY ADVANCE POINTER TO TOKEN TABLE
D5A3- D0 02 1750 BNE .6 Y=Y+1 IS ENOUGH
D5A5- E6 9E 1760 INC FAC+1 ALSO NEED TO BUMP THE PAGE
D5A7- E8 1770 .6 INX ADVANCE POINTER TO INPUT LINE
D5A8- BD 00 02 1780 .7 LDA INPUT.BUFFER,X NEXT CHAR FROM INPUT LINE
D5AB- C9 20 1790 CMP #' ' THIS CHAR A BLANK?
D5AD- F0 F8 1800 BEQ .6 YES, IGNORE ALL BLANKS
D5AF- 38 1810 SEC NO, COMPARE TO CHAR IN TABLE
D5B0- F1 9D 1820 SBC (FAC),Y SAME AS NEXT CHAR OF TOKEN NAME?
D5B2- F0 EE 1830 BEQ .5 YES, CONTINUE MATCHING
D5B4- C9 80 1840 CMP #$80 MAYBE; WAS IT SAME EXCEPT FOR BIT 7?
D5B6- D0 41 1850 BNE .14 NO, SKIP TO NEXT TOKEN
D5B8- 05 0F 1860 ORA TKN.CNTR YES, END OF TOKEN; GET TOKEN #
D5BA- C9 C5 1870 CMP #TOKEN.AT DID WE MATCH "AT"?
D5BC- D0 0D 1880 BNE .8 NO, SO NO AMBIGUITY
D5BE- BD 01 02 1890 LDA INPUT.BUFFER+1,X "AT" COULD BE "ATN" OR "A TO"
D5C1- C9 4E 1900 CMP #'N "ATN" HAS PRECEDENCE OVER "AT"
D5C3- F0 34 1910 BEQ .14 IT IS "ATN", FIND IT THE HARD WAY
D5C5- C9 4F 1920 CMP #'O "TO" HAS PRECEDENCE OVER "AT"
D5C7- F0 30 1930 BEQ .14 IT IS "A TO", FIN IT THE HARD WAY
D5C9- A9 C5 1940 LDA #TOKEN.AT NOT "ATN" OR "A TO", SO USE "AT"
1950 *--------------------------------
1960 * STORE CHARACTER OR TOKEN IN OUTPUT LINE
1970 *--------------------------------
Note the special handling for "AT" and "TO". When it examines the next
character, it does NOT skip whitespace, making spaces significant when
differentiating between "at n"/"atn" and "at o"/"ato".
*/
bool ImportBASDialog::ProcessBASLine(const char* buf, int len,
ExpandBuffer* pOutput, CString& msg)
{
const int kMaxTokenLen = 7; // longest token; must also hold linenum
const int kTokenAT = 0xc5 - 128;
const int kTokenATN = 0xe1 - 128;
char tokenBuf[kMaxTokenLen+1];
bool gotOne = false;
bool haveLineNum = false;
char ch;
int tokenLen;
int lineNum;
int foundToken;
if (!len)
return false;
/*
* Remove the CR, LF, or CRLF from the end of the line.
*/
if (len > 1 && buf[len-2] == '\r' && buf[len-1] == '\n') {
//LOGI("removed CRLF");
len -= 2;
} else if (buf[len-1] == '\r') {
//LOGI("removed CR");
len--;
} else if (buf[len-1] == '\n') {
//LOGI("removed LF");
len--;
} else {
//LOGI("no EOL marker found");
}
if (!len)
return true; // blank lines are okay
/*
* Extract the line number.
*/
tokenLen = 0;
while (len > 0) {
if (!GetNextNWC(&buf, &len, &ch)) {
if (!gotOne)
return true; // blank lines with whitespace are okay
else {
// end of line reached while scanning line number is bad
msg = L"found nothing except line number";
return false;
}
}
gotOne = true;
if (!isdigit(ch))
break;
if (tokenLen == 5) { // theoretical max is "65535"
msg = L"line number has too many digits";
return false;
}
tokenBuf[tokenLen++] = ch;
}
if (!tokenLen) {
msg = L"line did not start with a line number";
return false;
}
tokenBuf[tokenLen] = '\0';
lineNum = atoi(tokenBuf);
LOGI("FOUND line %d", lineNum);
pOutput->Putc((char) 0xcc); // placeholder
pOutput->Putc((char) 0xcc);
pOutput->Putc(lineNum & 0xff);
pOutput->Putc((lineNum >> 8) & 0xff);
/*
* Start scanning tokens.
*
* We need to find the longest matching token (i.e. prefer "ONERR" over
* "ON"). Grab a bunch of characters, ignoring whitespace, and scan
* for a match.
*/
buf--; // back up
len++;
foundToken = -1;
while (len > 0) {
const char* dummy = buf;
int remaining = len;
/* load up the buffer */
for (tokenLen = 0; tokenLen < kMaxTokenLen; tokenLen++) {
if (!GetNextNWC(&dummy, &remaining, &ch))
break;
if (ch == '"')
break;
tokenBuf[tokenLen] = ch;
}
if (tokenLen == 0) {
if (ch == '"') {
/*
* Note it's possible for strings to be unterminated. This
* will go unnoticed by Applesoft if it's at the end of a
* line.
*/
GetNextNWC(&buf, &len, &ch);
pOutput->Putc(ch);
while (len--) {
ch = *buf++;
pOutput->Putc(ch);
if (ch == '"')
break;
}
} else {
/* end of line reached */
break;
}
} else {
int token, foundLen;
token = fBASLookup.Lookup(tokenBuf, tokenLen, &foundLen);
if (token >= 0) {
/* match! */
if (token == kTokenAT || token == kTokenATN) {
/* have to go back and re-scan original */
const char* tp = buf +1;
while (toupper(*tp++) != 'T')
;
if (toupper(*tp) == 'N') {
/* keep this token */
assert(token == kTokenATN);
} else if (toupper(*tp) == 'O') {
/* eat and emit the 'A' so we get the "TO" instead */
goto output_single;
} else {
if (token == kTokenATN) {
/* reduce to "AT" */
token = kTokenAT;
foundLen--;
}
}
}
pOutput->Putc(token + 128);
/* consume token chars, including whitespace */
for (int j = 0; j < foundLen; j++)
GetNextNWC(&buf, &len, &ch);
//LOGI("TOKEN '%s' (%d)",
// fBASLookup.GetToken(token), tokenLen);
/* special handling for REM or DATA */
if (token == 0xb2 - 128) {
/* for a REM statement, copy verbatim to end of line */
if (*buf == ' ') {
/* eat one leading space, if present */
buf++;
len--;
}
while (len--) {
ch = *buf++;
pOutput->Putc(ch);
}
} else if (token == 0x83 - 128) {
bool inQuote = false;
/* for a DATA statement, copy until ':' */
if (*buf == ' ') {
/* eat one leading space */
buf++;
len--;
}
while (len--) {
ch = *buf++;
if (ch == '"') // ignore ':' in quoted strings
inQuote = !inQuote;
if (!inQuote && ch == ':') {
len++;
buf--;
break;
}
pOutput->Putc(ch);
}
}
} else {
/*
* Not a quote, and no token begins with this character.
* Output it and advance.
*/
output_single:
GetNextNWC(&buf, &len, &ch);
pOutput->Putc(toupper(ch));
}
}
}
pOutput->Putc('\0');
return true;
}
bool ImportBASDialog::FixBASLinePointers(char* buf, long len,
uint16_t addr)
{
uint16_t val;
char* start;
while (len >= 4) {
start = buf;
val = (*buf) & 0xff | (*(buf+1)) << 8;
if (val == 0)
break;
if (val != 0xcccc) {
LOGI("unexpected value 0x%04x found", val);
return false;
}
buf += 4;
len -= 4;
/*
* Find the next end-of-line marker.
*/
while (*buf != '\0' && len > 0) {
buf++;
len--;
}
if (!len) {
LOGI("ran off the end?");
return false;
}
buf++;
len--;
/*
* Set the value.
*/
val = (unsigned short) (buf - start);
ASSERT((int) val == buf - start);
addr += val;
*start = addr & 0xff;
*(start+1) = (addr >> 8) & 0xff;
}
return true;
}
const char* ImportBASDialog::FindEOL(const char* buf, long max)
{
ASSERT(max >= 0);
if (max == 0)
return NULL;
while (max) {
if (*buf == '\r' || *buf == '\n') {
if (*buf == '\r' && max > 0 && *(buf+1) == '\n')
return buf+2;
return buf+1;
}
buf++;
max--;
}
/*
* Looks like the last line didn't have an EOL. That's okay.
*/
return buf;
}
bool ImportBASDialog::GetNextNWC(const char** pBuf, int* pLen, char* pCh)
{
static const char* kWhitespace = " \t\r\n";
while (*pLen > 0) {
const char* ptr;
char ch;
ch = **pBuf;
ptr = strchr(kWhitespace, ch);
(*pBuf)++;
(*pLen)--;
if (ptr == NULL) {
*pCh = ch;
return true;
}
}
return false;
}
void ImportBASDialog::OnOK(void)
{
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_SAVEAS);
CString fileName;
pEdit->GetWindowText(fileName);
if (fileName.IsEmpty()) {
CString appName;
appName.LoadString(IDS_MB_APP_NAME);
MessageBox(L"You must specify a filename.",
appName, MB_OK);
}
/*
* Write the file to the currently-open archive.
*/
GenericArchive::FileDetails details;
details.entryKind = GenericArchive::FileDetails::kFileKindDataFork;
details.origName = L"Imported BASIC";
details.storageName = fileName;
details.access = 0xe3; // unlocked, backup bit set
details.fileType = kFileTypeBAS;
details.extraType = 0x0801;
details.storageType = DiskFS::kStorageSeedling;
time_t now = time(NULL);
GenericArchive::UNIXTimeToDateTime(&now, &details.createWhen);
GenericArchive::UNIXTimeToDateTime(&now, &details.archiveWhen);
GenericArchive::UNIXTimeToDateTime(&now, &details.modWhen);
CString errMsg;
fDirty = true;
if (!MainWindow::SaveToArchive(&details, (const unsigned char*) fOutput,
fOutputLen, NULL, -1, /*ref*/errMsg, this))
{
goto bail;
}
/* success! close the dialog */
CDialog::OnOK();
bail:
if (!errMsg.IsEmpty()) {
CString msg;
msg.Format(L"Unable to import file: %ls.", (LPCWSTR) errMsg);
ShowFailureMsg(this, msg, IDS_FAILED);
return;
}
return;
}
void ImportBASDialog::OnHelp(void)
{
WinHelp(HELP_TOPIC_IMPORT_BASIC, HELP_CONTEXT);
}