import { byte, KnownValues, Memory, word } from '../types'; import { STRING_TO_TOKEN } from './tokens'; import { TXTTAB, PRGEND, VARTAB, ARYTAB, STREND } from './zeropage'; /** Default address for program start */ const PROGRAM_START = 0x801; /** Parse states. Starts in `NORMAL`. */ enum STATES { /** * Tries to tokenize the input. Transitions: * * `"`: `STRING` * * `REM`: `COMMENT` * * `DATA`: `DATA` */ NORMAL = 0, /** * Stores the input exactly. Tranistions: * * `"`: `NORMAL` */ STRING = 1, /** Stores the input exactly up until the end of the line. No transitions. */ COMMENT = 2, /** * Stores the input exactly. Transitions: * * `:`: `NORMAL` * * `"`: `DATA_QUOTE` */ DATA = 3, /** * Stores the input exactly. Transitions: * * `"`: `DATA` */ DATA_QUOTE = 4, } function writeByte(mem: Memory, addr: word, val: byte) { const page = addr >> 8; const off = addr & 0xff; return mem.write(page, off, val); } function writeWord(mem: Memory, addr: word, val: byte) { const lsb = val & 0xff; const msb = val >> 8; writeByte(mem, addr, lsb); writeByte(mem, addr + 1, msb); } class LineBuffer implements IterableIterator { private prevChar: number = 0; constructor(private readonly line: string, private curChar: number = 0) { } [Symbol.iterator](): IterableIterator { return this; } clone(): LineBuffer { return new LineBuffer(this.line, this.curChar); } next(): IteratorResult { if (this.atEnd()) { return { done: true, value: undefined }; } this.prevChar = this.curChar; return { done: false, value: this.line[this.curChar++] }; } /** * Tries to match the input token at the current buffer location. If * the token matches, the current buffer location is advanced passed * the token and this method returns `true`. Otherwise, this method * returns `false`. * * The input is assumed to be an all-uppercase string and the tokens * in the buffer are uppercased before the comparison. * * @param token An all-uppercase string to match. */ lookingAtToken(token: string): boolean { const oldCurChar = this.curChar; const oldPrevChar = this.prevChar; let possibleToken = ''; for (const char of this) { if (char === ' ') { continue; } possibleToken += char; if (possibleToken.length === token.length) { break; } } if (possibleToken.toUpperCase() === token) { // Matched; set prevChar to before the match. this.prevChar = oldCurChar; return true; } // No match; restore state. this.curChar = oldCurChar; this.prevChar = oldPrevChar; return false; } backup() { this.curChar = this.prevChar; } peek(): string { if (this.atEnd()) { throw new RangeError(`Reading past the end of ${this.line}`); } return this.line[this.curChar]; } atEnd(): boolean { return this.curChar >= this.line.length; } } export default class ApplesoftCompiler { private lines: Map = new Map(); /** * Loads an Applesoft BASIC program into memory. * * @param mem Memory, including zero page, into which the program is * loaded. * @param program A string with a BASIC program to compile (tokenize). * @param programStart Optional start address of the program. Defaults to * standard Applesoft program address, 0x801. */ static compileToMemory(mem: Memory, program: string, programStart: word = PROGRAM_START) { const compiler = new ApplesoftCompiler(); compiler.compile(program); const compiledProgram: Uint8Array = compiler.program(programStart); for (let i = 0; i < compiledProgram.byteLength; i++) { writeByte(mem, programStart + i, compiledProgram[i]); } // Set zero page locations. Applesoft is weird because, when a line // is inserted, PRGEND is copied to VARTAB in the beginning, but then // VARTAB is manipulated to make space for the line, then PRGEND is // set from VARTAB. There's also a bug in NEW at D657 where the carry // flag is not cleared, so it can add 2 or 3. The upshot, though, is // that PRGEND and VARTAB end up being 1 or 2 bytes past the end of // the program. From my tests is the emulator, it's usually 1, so // that's what we're going with here. const prgend = programStart + compiledProgram.byteLength + 1; writeWord(mem, TXTTAB, programStart); writeWord(mem, PRGEND, prgend); writeWord(mem, VARTAB, prgend); writeWord(mem, ARYTAB, prgend); writeWord(mem, STREND, prgend); } private readLineNumber(lineBuffer: LineBuffer): number { let lineNoStr = ''; for (const character of lineBuffer) { if (/\d/.test(character)) { lineNoStr += character; } else { lineBuffer.backup(); break; } } if (lineNoStr.length === 0) { throw new Error('Missing line number'); } return parseInt(lineNoStr, 10); } private readToken(lineBuffer: LineBuffer): byte { // Try to match a token for (const possibleToken in STRING_TO_TOKEN) { if (lineBuffer.lookingAtToken(possibleToken)) { // NOTE(flan): This special token-preference // logic is straight from the Applesoft BASIC // code (D5BE-D5CA in the Apple //e ROM). // Found a token if (possibleToken === 'AT' && !lineBuffer.atEnd()) { const lookAhead = lineBuffer.peek(); // ATN takes precedence over AT if (lookAhead === 'N') { lineBuffer.next(); return STRING_TO_TOKEN['ATN']; } // TO takes precedence over AT if (lookAhead === 'O') { // Backup to before the token lineBuffer.backup(); // and emit the 'A' (upper- or lower-case) return lineBuffer.next().value?.charCodeAt(0) ?? 0; } } return STRING_TO_TOKEN[possibleToken]; } } // If not a token, output the character upper-cased return lineBuffer.next().value?.toUpperCase().charCodeAt(0) ?? 0; } private compileLine(line: string | null | undefined) { const result: byte[] = []; if (!line) { return; } const lineBuffer = new LineBuffer(line); let state: KnownValues = STATES.NORMAL; const lineNumber = this.readLineNumber(lineBuffer); if (lineNumber < 0 || lineNumber > 65535) { throw new Error('Line number out of range'); } // Read the rest of the line for (const character of lineBuffer) { const charCode = character.charCodeAt(0); switch (state) { case STATES.NORMAL: // Skip spaces if (character === ' ') { break; } // Transition to parsing a string if (character === '"') { result.push(charCode); state = STATES.STRING; break; } // Shorthand for PRINT (D580 in Apple //e ROM) if (character === '?') { result.push(STRING_TO_TOKEN['PRINT']); break; } // Try to parse a token or character lineBuffer.backup(); { const token = this.readToken(lineBuffer); if (token === STRING_TO_TOKEN['REM']) { state = STATES.COMMENT; } if (token === STRING_TO_TOKEN['DATA']) { state = STATES.DATA; } result.push(token); } break; case STATES.COMMENT: result.push(character.charCodeAt(0)); break; case STATES.STRING: if (character === '"') { state = STATES.NORMAL; } result.push(character.charCodeAt(0)); break; case STATES.DATA: if (character === ':') { state = STATES.NORMAL; } if (character === '"') { state = STATES.DATA_QUOTE; } result.push(character.charCodeAt(0)); break; case STATES.DATA_QUOTE: if (character === '"') { state = STATES.DATA; } result.push(character.charCodeAt(0)); break; } } this.lines.set(lineNumber, result); } compile(program: string) { const lines = program.split(/[\r\n]+/g); while (lines.length) { const line = lines.shift(); this.compileLine(line); } } /** Returns the compiled program at the given start address. */ program(programStart: word = PROGRAM_START): Uint8Array { const result: byte[] = []; // Lines can be inserted out of order, but they should be in order // when tokenized. const lineNumbers = [...this.lines.keys()].sort(); for (const lineNo of lineNumbers) { const lineBytes = this.lines.get(lineNo) || []; const nextLineAddr = programStart + result.length + 4 + lineBytes.length + 1; // +1 for the zero at end of line result.push(nextLineAddr & 0xff, nextLineAddr >> 8); result.push(lineNo & 0xff, lineNo >> 8); result.push(...lineBytes); result.push(0x00); } result.push(0x00, 0x00); return new Uint8Array(result); } }