apple2js/js/applesoft/compiler.ts
2022-07-13 20:34:50 -07:00

320 lines
10 KiB
TypeScript

import { byte, KnownValues, Memory, word } from '../types';
import { STRING_TO_TOKEN } from './tokens';
import { TXTTAB, PRGEND, VARTAB, ARYTAB, STREND } from './zeropage';
/** Default address for program start */
const PROGRAM_START = 0x801;
/** Parse states. Starts in `NORMAL`. */
enum STATES {
/**
* Tries to tokenize the input. Transitions:
* * `"`: `STRING`
* * `REM`: `COMMENT`
* * `DATA`: `DATA`
*/
NORMAL = 0,
/**
* Stores the input exactly. Tranistions:
* * `"`: `NORMAL`
*/
STRING = 1,
/** Stores the input exactly up until the end of the line. No transitions. */
COMMENT = 2,
/**
* Stores the input exactly. Transitions:
* * `:`: `NORMAL`
* * `"`: `DATA_QUOTE`
*/
DATA = 3,
/**
* Stores the input exactly. Transitions:
* * `"`: `DATA`
*/
DATA_QUOTE = 4,
}
function writeByte(mem: Memory, addr: word, val: byte) {
const page = addr >> 8;
const off = addr & 0xff;
return mem.write(page, off, val);
}
function writeWord(mem: Memory, addr: word, val: byte) {
const lsb = val & 0xff;
const msb = val >> 8;
writeByte(mem, addr, lsb);
writeByte(mem, addr + 1, msb);
}
class LineBuffer implements IterableIterator<string> {
private prevChar: number = 0;
constructor(private readonly line: string, private curChar: number = 0) { }
[Symbol.iterator](): IterableIterator<string> {
return this;
}
clone(): LineBuffer {
return new LineBuffer(this.line, this.curChar);
}
next(): IteratorResult<string, string | undefined> {
if (this.atEnd()) {
return { done: true, value: undefined };
}
this.prevChar = this.curChar;
return { done: false, value: this.line[this.curChar++] };
}
/**
* Tries to match the input token at the current buffer location. If
* the token matches, the current buffer location is advanced passed
* the token and this method returns `true`. Otherwise, this method
* returns `false`.
*
* The input is assumed to be an all-uppercase string and the tokens
* in the buffer are uppercased before the comparison.
*
* @param token An all-uppercase string to match.
*/
lookingAtToken(token: string): boolean {
const oldCurChar = this.curChar;
const oldPrevChar = this.prevChar;
let possibleToken = '';
for (const char of this) {
if (char === ' ') {
continue;
}
possibleToken += char;
if (possibleToken.length === token.length) {
break;
}
}
if (possibleToken.toUpperCase() === token) {
// Matched; set prevChar to before the match.
this.prevChar = oldCurChar;
return true;
}
// No match; restore state.
this.curChar = oldCurChar;
this.prevChar = oldPrevChar;
return false;
}
backup() {
this.curChar = this.prevChar;
}
peek(): string {
if (this.atEnd()) {
throw new RangeError(`Reading past the end of ${this.line}`);
}
return this.line[this.curChar];
}
atEnd(): boolean {
return this.curChar >= this.line.length;
}
}
export default class ApplesoftCompiler {
private lines: Map<number, byte[]> = new Map();
/**
* Loads an Applesoft BASIC program into memory.
*
* @param mem Memory, including zero page, into which the program is
* loaded.
* @param program A string with a BASIC program to compile (tokenize).
* @param programStart Optional start address of the program. Defaults to
* standard Applesoft program address, 0x801.
*/
static compileToMemory(mem: Memory, program: string, programStart: word = PROGRAM_START) {
const compiler = new ApplesoftCompiler();
compiler.compile(program);
const compiledProgram: Uint8Array = compiler.program(programStart);
for (let i = 0; i < compiledProgram.byteLength; i++) {
writeByte(mem, programStart + i, compiledProgram[i]);
}
// Set zero page locations. Applesoft is weird because, when a line
// is inserted, PRGEND is copied to VARTAB in the beginning, but then
// VARTAB is manipulated to make space for the line, then PRGEND is
// set from VARTAB. There's also a bug in NEW at D657 where the carry
// flag is not cleared, so it can add 2 or 3. The upshot, though, is
// that PRGEND and VARTAB end up being 1 or 2 bytes past the end of
// the program. From my tests is the emulator, it's usually 1, so
// that's what we're going with here.
const prgend = programStart + compiledProgram.byteLength + 1;
writeWord(mem, TXTTAB, programStart);
writeWord(mem, PRGEND, prgend);
writeWord(mem, VARTAB, prgend);
writeWord(mem, ARYTAB, prgend);
writeWord(mem, STREND, prgend);
}
private readLineNumber(lineBuffer: LineBuffer): number {
let lineNoStr = '';
for (const character of lineBuffer) {
if (/\d/.test(character)) {
lineNoStr += character;
} else {
lineBuffer.backup();
break;
}
}
if (lineNoStr.length === 0) {
throw new Error('Missing line number');
}
return parseInt(lineNoStr, 10);
}
private readToken(lineBuffer: LineBuffer): byte {
// Try to match a token
for (const possibleToken in STRING_TO_TOKEN) {
if (lineBuffer.lookingAtToken(possibleToken)) {
// NOTE(flan): This special token-preference
// logic is straight from the Applesoft BASIC
// code (D5BE-D5CA in the Apple //e ROM).
// Found a token
if (possibleToken === 'AT' && !lineBuffer.atEnd()) {
const lookAhead = lineBuffer.peek();
// ATN takes precedence over AT
if (lookAhead === 'N') {
lineBuffer.next();
return STRING_TO_TOKEN['ATN'];
}
// TO takes precedence over AT
if (lookAhead === 'O') {
// Backup to before the token
lineBuffer.backup();
// and emit the 'A' (upper- or lower-case)
return lineBuffer.next().value?.charCodeAt(0) ?? 0;
}
}
return STRING_TO_TOKEN[possibleToken];
}
}
// If not a token, output the character upper-cased
return lineBuffer.next().value?.toUpperCase().charCodeAt(0) ?? 0;
}
private compileLine(line: string | null | undefined) {
const result: byte[] = [];
if (!line) {
return;
}
const lineBuffer = new LineBuffer(line);
let state: KnownValues<typeof STATES> = STATES.NORMAL;
const lineNumber = this.readLineNumber(lineBuffer);
if (lineNumber < 0 || lineNumber > 65535) {
throw new Error('Line number out of range');
}
// Read the rest of the line
for (const character of lineBuffer) {
const charCode = character.charCodeAt(0);
switch (state) {
case STATES.NORMAL:
// Skip spaces
if (character === ' ') {
break;
}
// Transition to parsing a string
if (character === '"') {
result.push(charCode);
state = STATES.STRING;
break;
}
// Shorthand for PRINT (D580 in Apple //e ROM)
if (character === '?') {
result.push(STRING_TO_TOKEN['PRINT']);
break;
}
// Try to parse a token or character
lineBuffer.backup();
{
const token = this.readToken(lineBuffer);
if (token === STRING_TO_TOKEN['REM']) {
state = STATES.COMMENT;
}
if (token === STRING_TO_TOKEN['DATA']) {
state = STATES.DATA;
}
result.push(token);
}
break;
case STATES.COMMENT:
result.push(character.charCodeAt(0));
break;
case STATES.STRING:
if (character === '"') {
state = STATES.NORMAL;
}
result.push(character.charCodeAt(0));
break;
case STATES.DATA:
if (character === ':') {
state = STATES.NORMAL;
}
if (character === '"') {
state = STATES.DATA_QUOTE;
}
result.push(character.charCodeAt(0));
break;
case STATES.DATA_QUOTE:
if (character === '"') {
state = STATES.DATA;
}
result.push(character.charCodeAt(0));
break;
}
}
this.lines.set(lineNumber, result);
}
compile(program: string) {
const lines = program.split(/[\r\n]+/g);
while (lines.length) {
const line = lines.shift();
this.compileLine(line);
}
}
/** Returns the compiled program at the given start address. */
program(programStart: word = PROGRAM_START): Uint8Array {
const result: byte[] = [];
// Lines can be inserted out of order, but they should be in order
// when tokenized.
const lineNumbers = [...this.lines.keys()].sort();
for (const lineNo of lineNumbers) {
const lineBytes = this.lines.get(lineNo) || [];
const nextLineAddr = programStart + result.length + 4
+ lineBytes.length + 1; // +1 for the zero at end of line
result.push(nextLineAddr & 0xff, nextLineAddr >> 8);
result.push(lineNo & 0xff, lineNo >> 8);
result.push(...lineBytes);
result.push(0x00);
}
result.push(0x00, 0x00);
return new Uint8Array(result);
}
}