ciderpress/app/BasicImport.cpp
Andy McFadden b79498da50 Improve filename handling when adding files
Most of this change is a conversion of the old FileDetails struct
into a new LocalFileDetails class.  The new class keeps the
members private, and keeps the Unicode and MOR representations of
the string separate.

The NuFX and DiskImg libraries don't support UTF-16 filenames,
so we stil can't add files with non-CP-1252 filenames, but we're
a step closer.

Also, update NufxLib with a couple of fixes from the main project.

Also, fix handling of "%00" when adding files.

Also, mark most of the A2FileDOS fields private.  Not sure why
they weren't.
2015-01-08 14:16:20 -08:00

656 lines
20 KiB
C++

/*
* CiderPress
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
* See the file LICENSE for distribution terms.
*/
/*
* Import BASIC programs stored in a text file.
*
* The current implementation is a bit lame. It just dumps text strings into
* a read-only edit buffer, instead of providing a nicer UI. The real trouble
* with this style of interface is that i18n is even more awkward.
*/
#include "StdAfx.h"
#include "../reformat/BASIC.h"
#include "BasicImport.h"
/*
* ==========================================================================
* BASTokenLookup
* ==========================================================================
*/
void BASTokenLookup::Init(const char* tokenList, int numTokens, int tokenLen)
{
int i;
ASSERT(tokenList != NULL);
ASSERT(numTokens > 0);
ASSERT(tokenLen > 0);
delete[] fTokenPtr; // in case we're being re-initialized
delete[] fTokenLen;
fTokenPtr = new const char*[numTokens];
fTokenLen = new int[numTokens];
fNumTokens = numTokens;
for (i = 0; i < numTokens; i++) {
fTokenPtr[i] = tokenList;
fTokenLen[i] = strlen(tokenList);
tokenList += tokenLen;
}
}
int BASTokenLookup::Lookup(const char* str, int len, int* pFoundLen)
{
int longestIndex, longestLen;
int i;
longestIndex = longestLen = -1;
for (i = 0; i < fNumTokens; i++) {
if (fTokenLen[i] <= len && fTokenLen[i] > longestLen &&
strnicmp(str, fTokenPtr[i], fTokenLen[i]) == 0)
{
longestIndex = i;
longestLen = fTokenLen[i];
}
}
*pFoundLen = longestLen;
return longestIndex;
}
/*
* ==========================================================================
* ImportBASDialog
* ==========================================================================
*/
BEGIN_MESSAGE_MAP(ImportBASDialog, CDialog)
ON_COMMAND(IDHELP, OnHelp)
END_MESSAGE_MAP()
BOOL ImportBASDialog::OnInitDialog(void)
{
CDialog::OnInitDialog(); // base class init
PathName path(fFileName);
CString fileNameOnly(path.GetFileName());
CString ext(fileNameOnly.Right(4));
if (ext.CompareNoCase(L".txt") == 0) {
LOGI("removing extension from '%ls'", (LPCWSTR) fileNameOnly);
fileNameOnly = fileNameOnly.Left(fileNameOnly.GetLength() - 4);
}
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_SAVEAS);
pEdit->SetWindowText(fileNameOnly);
pEdit->SetSel(0, -1);
pEdit->SetFocus();
/*
* Do the actual import. If it fails, disable the "save" button.
*/
if (!ImportBAS(fFileName)) {
CButton* pButton = (CButton*) GetDlgItem(IDOK);
pButton->EnableWindow(FALSE);
pEdit->EnableWindow(FALSE);
}
return FALSE; // keep our focus
}
static const char kFailed[] = "failed.\r\n\r\n";
static const char kSuccess[] = "success!\r\n\r\n";
bool ImportBASDialog::ImportBAS(const WCHAR* fileName)
{
FILE* fp = NULL;
ExpandBuffer msgs(1024);
long fileLen, outLen, count;
char* buf = NULL;
char* outBuf = NULL;
bool result = false;
msgs.Printf("Importing from '%ls'...", fileName);
fp = _wfopen(fileName, L"rb"); // EOL unknown, open as binary and deal
if (fp == NULL) {
msgs.Printf("%sUnable to open file.", kFailed);
goto bail;
}
/* determine file length, and verify that it looks okay */
fseek(fp, 0, SEEK_END);
fileLen = ftell(fp);
rewind(fp);
if (ferror(fp) || fileLen < 0) {
msgs.Printf("%sUnable to determine file length.", kFailed);
goto bail;
}
if (fileLen == 0) {
msgs.Printf("%sFile is empty.", kFailed);
goto bail;
}
if (fileLen >= 128*1024) {
msgs.Printf("%sFile is too large to be Applesoft.", kFailed);
goto bail;
}
buf = new char[fileLen];
if (buf == NULL) {
msgs.Printf("%sUnable to allocate memory.", kFailed);
goto bail;
}
/* read the entire thing into memory */
count = fread(buf, 1, fileLen, fp);
if (count != fileLen) {
msgs.Printf("%sCould only read %ld of %ld bytes.", kFailed,
count, fileLen);
goto bail;
}
/* process it */
if (!ConvertTextToBAS(buf, fileLen, &outBuf, &outLen, &msgs))
goto bail;
result = true;
SetOutput(outBuf, outLen);
bail:
if (fp != NULL)
fclose(fp);
delete[] buf;
/* copy our error messages out */
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_RESULTS);
char* msgBuf = NULL;
long msgLen;
msgs.SeizeBuffer(&msgBuf, &msgLen);
CString msgStr(msgBuf);
pEdit->SetWindowText(msgStr);
delete[] msgBuf;
return result;
}
bool ImportBASDialog::ConvertTextToBAS(const char* buf, long fileLen,
char** pOutBuf, long* pOutLen, ExpandBuffer* pMsgs)
{
ExpandBuffer output(32768);
CString msg;
const char* lineStart;
const char* lineEnd;
long textRemaining;
int lineNum;
fBASLookup.Init(ReformatApplesoft::GetApplesoftTokens(),
ReformatApplesoft::kTokenCount, ReformatApplesoft::kTokenLen);
lineEnd = buf;
textRemaining = fileLen;
lineNum = 0;
while (textRemaining > 0) {
lineNum++;
lineStart = lineEnd;
lineEnd = FindEOL(lineStart, textRemaining);
if (!ProcessBASLine(lineStart, lineEnd - lineStart, &output,
/*ref*/ msg))
{
pMsgs->Printf("%sLine %d: %ls", kFailed, lineNum, (LPCWSTR) msg);
return false;
}
textRemaining -= lineEnd - lineStart;
}
/* output EOF marker */
output.Putc(0x00);
output.Putc(0x00);
/* grab the buffer */
char* outBuf;
long outLen;
output.SeizeBuffer(&outBuf, &outLen);
if (outLen >= 0xc000) {
pMsgs->Printf("%sOutput is too large to be valid", kFailed);
delete[] outBuf;
return false;
}
/* go back and fix up the "next line" pointers, assuming a $0801 start */
if (!FixBASLinePointers(outBuf, outLen, 0x0801)) {
pMsgs->Printf("%sFailed while fixing line pointers", kFailed);
delete[] outBuf;
return false;
}
*pOutBuf = outBuf;
*pOutLen = outLen;
pMsgs->Printf("%sProcessed %d lines", kSuccess, lineNum);
pMsgs->Printf("\r\nTokenized file is %d bytes long", *pOutLen);
return true;
}
/*
From an Applesoft disassembly by Bob Sander-Cederlof:
D56C- E8 1420 PARSE INX NEXT INPUT CHARACTER
D56D- BD 00 02 1430 .1 LDA INPUT.BUFFER,X
D570- 24 13 1440 BIT DATAFLG IN A "DATA" STATEMENT?
D572- 70 04 1450 BVS .2 YES (DATAFLG = $49)
D574- C9 20 1460 CMP #' ' IGNORE BLANKS
D576- F0 F4 1470 BEQ PARSE
D578- 85 0E 1480 .2 STA ENDCHR
D57A- C9 22 1490 CMP #'" START OF QUOTATION?
D57C- F0 74 1500 BEQ .13
D57E- 70 4D 1510 BVS .9 BRANCH IF IN "DATA" STATEMENT
D580- C9 3F 1520 CMP #'? SHORTHAND FOR "PRINT"?
D582- D0 04 1530 BNE .3 NO
D584- A9 BA 1540 LDA #TOKEN.PRINT YES, REPLACE WITH "PRINT" TOKEN
D586- D0 45 1550 BNE .9 ...ALWAYS
D588- C9 30 1560 .3 CMP #'0 IS IT A DIGIT, COLON, OR SEMI-COLON?
D58A- 90 04 1570 BCC .4 NO, PUNCTUATION !"#$%&'()*+,-./
D58C- C9 3C 1580 CMP #';'+1
D58E- 90 3D 1590 BCC .9 YES, NOT A TOKEN
1600 *--------------------------------
1610 * SEARCH TOKEN NAME TABLE FOR MATCH STARTING
1620 * WITH CURRENT CHAR FROM INPUT LINE
1630 *--------------------------------
D590- 84 AD 1640 .4 STY STRNG2 SAVE INDEX TO OUTPUT LINE
D592- A9 D0 1650 LDA #TOKEN.NAME.TABLE-$100
D594- 85 9D 1660 STA FAC MAKE PNTR FOR SEARCH
D596- A9 CF 1670 LDA /TOKEN.NAME.TABLE-$100
D598- 85 9E 1680 STA FAC+1
D59A- A0 00 1690 LDY #0 USE Y-REG WITH (FAC) TO ADDRESS TABLE
D59C- 84 0F 1700 STY TKN.CNTR HOLDS CURRENT TOKEN-$80
D59E- 88 1710 DEY PREPARE FOR "INY" A FEW LINES DOWN
D59F- 86 B8 1720 STX TXTPTR SAVE POSITION IN INPUT LINE
D5A1- CA 1730 DEX PREPARE FOR "INX" A FEW LINES DOWN
D5A2- C8 1740 .5 INY ADVANCE POINTER TO TOKEN TABLE
D5A3- D0 02 1750 BNE .6 Y=Y+1 IS ENOUGH
D5A5- E6 9E 1760 INC FAC+1 ALSO NEED TO BUMP THE PAGE
D5A7- E8 1770 .6 INX ADVANCE POINTER TO INPUT LINE
D5A8- BD 00 02 1780 .7 LDA INPUT.BUFFER,X NEXT CHAR FROM INPUT LINE
D5AB- C9 20 1790 CMP #' ' THIS CHAR A BLANK?
D5AD- F0 F8 1800 BEQ .6 YES, IGNORE ALL BLANKS
D5AF- 38 1810 SEC NO, COMPARE TO CHAR IN TABLE
D5B0- F1 9D 1820 SBC (FAC),Y SAME AS NEXT CHAR OF TOKEN NAME?
D5B2- F0 EE 1830 BEQ .5 YES, CONTINUE MATCHING
D5B4- C9 80 1840 CMP #$80 MAYBE; WAS IT SAME EXCEPT FOR BIT 7?
D5B6- D0 41 1850 BNE .14 NO, SKIP TO NEXT TOKEN
D5B8- 05 0F 1860 ORA TKN.CNTR YES, END OF TOKEN; GET TOKEN #
D5BA- C9 C5 1870 CMP #TOKEN.AT DID WE MATCH "AT"?
D5BC- D0 0D 1880 BNE .8 NO, SO NO AMBIGUITY
D5BE- BD 01 02 1890 LDA INPUT.BUFFER+1,X "AT" COULD BE "ATN" OR "A TO"
D5C1- C9 4E 1900 CMP #'N "ATN" HAS PRECEDENCE OVER "AT"
D5C3- F0 34 1910 BEQ .14 IT IS "ATN", FIND IT THE HARD WAY
D5C5- C9 4F 1920 CMP #'O "TO" HAS PRECEDENCE OVER "AT"
D5C7- F0 30 1930 BEQ .14 IT IS "A TO", FIN IT THE HARD WAY
D5C9- A9 C5 1940 LDA #TOKEN.AT NOT "ATN" OR "A TO", SO USE "AT"
1950 *--------------------------------
1960 * STORE CHARACTER OR TOKEN IN OUTPUT LINE
1970 *--------------------------------
Note the special handling for "AT" and "TO". When it examines the next
character, it does NOT skip whitespace, making spaces significant when
differentiating between "at n"/"atn" and "at o"/"ato".
*/
bool ImportBASDialog::ProcessBASLine(const char* buf, int len,
ExpandBuffer* pOutput, CString& msg)
{
const int kMaxTokenLen = 7; // longest token; must also hold linenum
const int kTokenAT = 0xc5 - 128;
const int kTokenATN = 0xe1 - 128;
char tokenBuf[kMaxTokenLen+1];
bool gotOne = false;
bool haveLineNum = false;
char ch;
int tokenLen;
int lineNum;
int foundToken;
if (!len)
return false;
/*
* Remove the CR, LF, or CRLF from the end of the line.
*/
if (len > 1 && buf[len-2] == '\r' && buf[len-1] == '\n') {
//LOGI("removed CRLF");
len -= 2;
} else if (buf[len-1] == '\r') {
//LOGI("removed CR");
len--;
} else if (buf[len-1] == '\n') {
//LOGI("removed LF");
len--;
} else {
//LOGI("no EOL marker found");
}
if (!len)
return true; // blank lines are okay
/*
* Extract the line number.
*/
tokenLen = 0;
while (len > 0) {
if (!GetNextNWC(&buf, &len, &ch)) {
if (!gotOne)
return true; // blank lines with whitespace are okay
else {
// end of line reached while scanning line number is bad
msg = L"found nothing except line number";
return false;
}
}
gotOne = true;
if (!isdigit(ch))
break;
if (tokenLen == 5) { // theoretical max is "65535"
msg = L"line number has too many digits";
return false;
}
tokenBuf[tokenLen++] = ch;
}
if (!tokenLen) {
msg = L"line did not start with a line number";
return false;
}
tokenBuf[tokenLen] = '\0';
lineNum = atoi(tokenBuf);
LOGI("FOUND line %d", lineNum);
pOutput->Putc((char) 0xcc); // placeholder
pOutput->Putc((char) 0xcc);
pOutput->Putc(lineNum & 0xff);
pOutput->Putc((lineNum >> 8) & 0xff);
/*
* Start scanning tokens.
*
* We need to find the longest matching token (i.e. prefer "ONERR" over
* "ON"). Grab a bunch of characters, ignoring whitespace, and scan
* for a match.
*/
buf--; // back up
len++;
foundToken = -1;
while (len > 0) {
const char* dummy = buf;
int remaining = len;
/* load up the buffer */
for (tokenLen = 0; tokenLen < kMaxTokenLen; tokenLen++) {
if (!GetNextNWC(&dummy, &remaining, &ch))
break;
if (ch == '"')
break;
tokenBuf[tokenLen] = ch;
}
if (tokenLen == 0) {
if (ch == '"') {
/*
* Note it's possible for strings to be unterminated. This
* will go unnoticed by Applesoft if it's at the end of a
* line.
*/
GetNextNWC(&buf, &len, &ch);
pOutput->Putc(ch);
while (len--) {
ch = *buf++;
pOutput->Putc(ch);
if (ch == '"')
break;
}
} else {
/* end of line reached */
break;
}
} else {
int token, foundLen;
token = fBASLookup.Lookup(tokenBuf, tokenLen, &foundLen);
if (token >= 0) {
/* match! */
if (token == kTokenAT || token == kTokenATN) {
/* have to go back and re-scan original */
const char* tp = buf +1;
while (toupper(*tp++) != 'T')
;
if (toupper(*tp) == 'N') {
/* keep this token */
assert(token == kTokenATN);
} else if (toupper(*tp) == 'O') {
/* eat and emit the 'A' so we get the "TO" instead */
goto output_single;
} else {
if (token == kTokenATN) {
/* reduce to "AT" */
token = kTokenAT;
foundLen--;
}
}
}
pOutput->Putc(token + 128);
/* consume token chars, including whitespace */
for (int j = 0; j < foundLen; j++)
GetNextNWC(&buf, &len, &ch);
//LOGI("TOKEN '%s' (%d)",
// fBASLookup.GetToken(token), tokenLen);
/* special handling for REM or DATA */
if (token == 0xb2 - 128) {
/* for a REM statement, copy verbatim to end of line */
if (*buf == ' ') {
/* eat one leading space, if present */
buf++;
len--;
}
while (len--) {
ch = *buf++;
pOutput->Putc(ch);
}
} else if (token == 0x83 - 128) {
bool inQuote = false;
/* for a DATA statement, copy until ':' */
if (*buf == ' ') {
/* eat one leading space */
buf++;
len--;
}
while (len--) {
ch = *buf++;
if (ch == '"') // ignore ':' in quoted strings
inQuote = !inQuote;
if (!inQuote && ch == ':') {
len++;
buf--;
break;
}
pOutput->Putc(ch);
}
}
} else {
/*
* Not a quote, and no token begins with this character.
* Output it and advance.
*/
output_single:
GetNextNWC(&buf, &len, &ch);
pOutput->Putc(toupper(ch));
}
}
}
pOutput->Putc('\0');
return true;
}
bool ImportBASDialog::FixBASLinePointers(char* buf, long len,
uint16_t addr)
{
uint16_t val;
char* start;
while (len >= 4) {
start = buf;
val = (*buf) & 0xff | (*(buf+1)) << 8;
if (val == 0)
break;
if (val != 0xcccc) {
LOGI("unexpected value 0x%04x found", val);
return false;
}
buf += 4;
len -= 4;
/*
* Find the next end-of-line marker.
*/
while (*buf != '\0' && len > 0) {
buf++;
len--;
}
if (!len) {
LOGI("ran off the end?");
return false;
}
buf++;
len--;
/*
* Set the value.
*/
val = (unsigned short) (buf - start);
ASSERT((int) val == buf - start);
addr += val;
*start = addr & 0xff;
*(start+1) = (addr >> 8) & 0xff;
}
return true;
}
const char* ImportBASDialog::FindEOL(const char* buf, long max)
{
ASSERT(max >= 0);
if (max == 0)
return NULL;
while (max) {
if (*buf == '\r' || *buf == '\n') {
if (*buf == '\r' && max > 0 && *(buf+1) == '\n')
return buf+2;
return buf+1;
}
buf++;
max--;
}
/*
* Looks like the last line didn't have an EOL. That's okay.
*/
return buf;
}
bool ImportBASDialog::GetNextNWC(const char** pBuf, int* pLen, char* pCh)
{
static const char* kWhitespace = " \t\r\n";
while (*pLen > 0) {
const char* ptr;
char ch;
ch = **pBuf;
ptr = strchr(kWhitespace, ch);
(*pBuf)++;
(*pLen)--;
if (ptr == NULL) {
*pCh = ch;
return true;
}
}
return false;
}
void ImportBASDialog::OnOK(void)
{
CEdit* pEdit = (CEdit*) GetDlgItem(IDC_IMPORT_BAS_SAVEAS);
CString fileName;
pEdit->GetWindowText(fileName);
if (fileName.IsEmpty()) {
CString appName;
CheckedLoadString(&appName, IDS_MB_APP_NAME);
MessageBox(L"You must specify a filename.",
appName, MB_OK);
}
/*
* Write the file to the currently-open archive.
*/
GenericArchive::LocalFileDetails details;
details.SetEntryKind(GenericArchive::LocalFileDetails::kFileKindDataFork);
details.SetLocalPathName(L"Imported BASIC");
details.SetStrippedLocalPathName(fileName);
details.SetAccess(0xe3); // unlocked, backup bit set
details.SetFileType(kFileTypeBAS);
details.SetExtraType(0x0801);
details.SetStorageType(DiskFS::kStorageSeedling);
time_t now = time(NULL);
NuDateTime ndt;
GenericArchive::UNIXTimeToDateTime(&now, &ndt);
details.SetCreateWhen(ndt);
details.SetArchiveWhen(ndt);
details.SetModWhen(ndt);
CString errMsg;
fDirty = true;
if (!MainWindow::SaveToArchive(&details, (const unsigned char*) fOutput,
fOutputLen, NULL, -1, &errMsg, this))
{
goto bail;
}
/* success! close the dialog */
CDialog::OnOK();
bail:
if (!errMsg.IsEmpty()) {
CString msg;
msg.Format(L"Unable to import file: %ls.", (LPCWSTR) errMsg);
ShowFailureMsg(this, msg, IDS_FAILED);
return;
}
return;
}