Ignore spaces more aggressively

It turns out that Applesoft happily accepts 'T H E N' for `THEN`
but the parser did not. This change fixes that and adds tests for
some odd cases.

Interestingly, this means that there are some valid statements
that Applesoft can never parse correctly because it is greedy
and ignores (most) spaces. For example, `NOT RACE` will always
parse as `NOTRACE` even though `NOT RACE` is a valid expression.
This commit is contained in:
Ian Flanigan 2022-01-31 08:48:21 +01:00
parent cdba120526
commit 6cd703e6b7
2 changed files with 77 additions and 9 deletions

View File

@ -172,6 +172,7 @@ function writeWord(mem: Memory, addr: word, val: byte) {
}
class LineBuffer implements IterableIterator<string> {
private prevChar: number = 0;
constructor(private readonly line: string, private curChar: number = 0) { }
[Symbol.iterator](): IterableIterator<string> {
@ -183,9 +184,10 @@ class LineBuffer implements IterableIterator<string> {
}
next(): IteratorResult<string> {
if (this.curChar >= this.line.length) {
if (this.atEnd()) {
return { done: true, value: undefined };
}
this.prevChar = this.curChar;
return { done: false, value: this.line[this.curChar++] };
}
@ -201,18 +203,31 @@ class LineBuffer implements IterableIterator<string> {
* @param token An all-uppercase string to match.
*/
lookingAtToken(token: string): boolean {
// Back up one since next() has already consumed the first character.
const possibleToken = this.line.substring(
this.curChar, this.curChar + token.length).toUpperCase();
if (possibleToken === token) {
this.curChar += token.length;
const oldCurChar = this.curChar;
const oldPrevChar = this.prevChar;
let possibleToken = '';
for (const char of this) {
if (char === ' ') {
continue;
}
possibleToken += char;
if (possibleToken.length === token.length) {
break;
}
}
if (possibleToken.toUpperCase() === token) {
// Matched; set prevChar to before the match.
this.prevChar = oldCurChar;
return true;
}
// No match; restore state.
this.curChar = oldCurChar;
this.prevChar = oldPrevChar;
return false;
}
backup(chars: number = 1) {
this.curChar = Math.max(this.curChar - chars, 0);
backup() {
this.curChar = this.prevChar;
}
peek(): string {
@ -302,7 +317,7 @@ export default class ApplesoftCompiler {
// TO takes precedence over AT
if (lookAhead === 'O') {
// Backup to before the token
lineBuffer.backup(possibleToken.length);
lineBuffer.backup();
// and emit the 'A' (upper- or lower-case)
return lineBuffer.next().value.charCodeAt(0);
}

View File

@ -375,4 +375,57 @@ describe('ApplesoftCompiler', () => {
0x4c, 0x44, 0x21, 0x22, 0x00, 0x00, 0x00
]));
});
it('skips spaces when reading tokens', () => {
const compiler = new ApplesoftCompiler();
compiler.compile('10 T H E N');
expect(compiler.program()).toEqual(new Uint8Array([
0x07, 0x08, 0x0a, 0x00, 0xc4, 0x00, 0x00, 0x00,
]));
});
it('skips spaces and ignores case when reading tokens', () => {
const compiler = new ApplesoftCompiler();
compiler.compile('10 T h E n');
expect(compiler.program()).toEqual(new Uint8Array([
0x07, 0x08, 0x0a, 0x00, 0xc4, 0x00, 0x00, 0x00,
]));
});
it('smashes tokens together', () => {
const compiler = new ApplesoftCompiler();
compiler.compile('10 NOT RACE A THEN B');
expect(compiler.program()).toEqual(new Uint8Array([
0x0c, 0x08, 0x0a, 0x00, 0x9c, 0xc5, 0x48, 0x45,
0x4e, 0x42, 0x00, 0x00, 0x00,
]));
});
it('parses 10ATOZ correctly', () => {
const compiler = new ApplesoftCompiler();
compiler.compile('10ATOZ');
expect(compiler.program()).toEqual(new Uint8Array([
0x09, 0x08, 0x0a, 0x00, 0x41, 0xc1, 0x5a, 0x00,
0x00, 0x00,
]));
});
it('parses a bunch of crazy correctly', () => {
const compiler = new ApplesoftCompiler();
compiler.compile([
'10 A THEN B',
'30 A TO Z',
'40 AT N',
'50 A TN',
'60 N O T R A C E',
'70 NOT RACE'].join('\n'));
expect(compiler.program()).toEqual(new Uint8Array([
0x0b, 0x08, 0x0a, 0x00, 0xc5, 0x48, 0x45,
0x4e, 0x42, 0x00, 0x13, 0x08, 0x1e, 0x00, 0x41,
0xc1, 0x5a, 0x00, 0x1a, 0x08, 0x28, 0x00, 0xc5,
0x4e, 0x00, 0x20, 0x08, 0x32, 0x00, 0xe1, 0x00,
0x26, 0x08, 0x3c, 0x00, 0x9c, 0x00, 0x2c, 0x08,
0x46, 0x00, 0x9c, 0x00, 0x00, 0x00,
]));
});
});