diff --git a/js/applesoft/compiler.ts b/js/applesoft/compiler.ts index 2fff0fc..63490d3 100644 --- a/js/applesoft/compiler.ts +++ b/js/applesoft/compiler.ts @@ -1,288 +1,321 @@ -import { byte, KnownKeys, KnownValues, Memory, word } from '../types'; +import { byte, KnownValues, Memory, word } from '../types'; +import { STRING_TO_TOKEN } from './tokens'; +import { TXTTAB, PRGEND, VARTAB, ARYTAB, STREND } from './zeropage'; -/** Map from keyword to token. */ -const TOKENS = { - 'END': 0x80, - 'FOR': 0x81, - 'NEXT': 0x82, - 'DATA': 0x83, - 'INPUT': 0x84, - 'DEL': 0x85, - 'DIM': 0x86, - 'READ': 0x87, - 'GR': 0x88, - 'TEXT': 0x89, - 'PR#': 0x8a, - 'IN#': 0x8b, - 'CALL': 0x8c, - 'PLOT': 0x8d, - 'HLIN': 0x8e, - 'VLIN': 0x8f, - 'HGR2': 0x90, - 'HGR': 0x91, - 'HCOLOR=': 0x92, - 'HPLOT': 0x93, - 'DRAW': 0x94, - 'XDRAW': 0x95, - 'HTAB': 0x96, - 'HOME': 0x97, - 'ROT=': 0x98, - 'SCALE=': 0x99, - 'SHLOAD': 0x9a, - 'TRACE': 0x9b, - 'NOTRACE': 0x9c, - 'NORMAL': 0x9d, - 'INVERSE': 0x9e, - 'FLASH': 0x9f, - 'COLOR=': 0xa0, - 'POP=': 0xa1, - 'VTAB': 0xa2, - 'HIMEM:': 0xa3, - 'LOMEM:': 0xa4, - 'ONERR': 0xa5, - 'RESUME': 0xa6, - 'RECALL': 0xa7, - 'STORE': 0xa8, - 'SPEED=': 0xa9, - 'LET': 0xaa, - 'GOTO': 0xab, - 'RUN': 0xac, - 'IF': 0xad, - 'RESTORE': 0xae, - '&': 0xaf, - 'GOSUB': 0xb0, - 'RETURN': 0xb1, - 'REM': 0xb2, - 'STOP': 0xb3, - 'ON': 0xb4, - 'WAIT': 0xb5, - 'LOAD': 0xb6, - 'SAVE': 0xb7, - 'DEF': 0xb8, - 'POKE': 0xb9, - 'PRINT': 0xba, - 'CONT': 0xbb, - 'LIST': 0xbc, - 'CLEAR': 0xbd, - 'GET': 0xbe, - 'NEW': 0xbf, - 'TAB(': 0xc0, - 'TO': 0xc1, - 'FN': 0xc2, - 'SPC(': 0xc3, - 'THEN': 0xc4, - 'AT': 0xc5, - 'NOT': 0xc6, - 'STEP': 0xc7, - '+': 0xc8, - '-': 0xc9, - '*': 0xca, - '/': 0xcb, - '^': 0xcc, - 'AND': 0xcd, - 'OR': 0xce, - '>': 0xcf, - '=': 0xd0, - '<': 0xd1, - 'SGN': 0xd2, - 'INT': 0xd3, - 'ABS': 0xd4, - 'USR': 0xd5, - 'FRE': 0xd6, - 'SCRN(': 0xd7, - 'PDL': 0xd8, - 'POS': 0xd9, - 'SQR': 0xda, - 'RND': 0xdb, - 'LOG': 0xdc, - 'EXP': 0xdd, - 'COS': 0xde, - 'SIN': 0xdf, - 'TAN': 0xe0, - 'ATN': 0xe1, - 'PEEK': 0xe2, - 'LEN': 0xe3, - 'STR$': 0xe4, - 'VAL': 0xe5, - 'ASC': 0xe6, - 'CHR$': 0xe7, - 'LEFT$': 0xe8, - 'RIGHT$': 0xe9, - 'MID$': 0xea -} as const; - -const LOMEM = 0x69; -const ARRAY_START = 0x6B; -const ARRAY_END = 0x6D; +/** Default address for program start */ const PROGRAM_START = 0x801; -const STATES = { - NORMAL: 0, - STRING: 1, - COMMENT: 2, - DATA: 3 -} as const; +/** Parse states. Starts in `NORMAL`. */ +enum STATES { + /** + * Tries to tokenize the input. Transitions: + * * `"`: `STRING` + * * `REM`: `COMMENT` + * * `DATA`: `DATA` + */ + NORMAL = 0, + /** + * Stores the input exactly. Tranistions: + * * `"`: `NORMAL` + */ + STRING = 1, + /** Stores the input exactly up until the end of the line. No transitions. */ + COMMENT = 2, + /** + * Stores the input exactly. Transitions: + * * `:`: `NORMAL` + * * `"`: `DATA_QUOTE` + */ + DATA = 3, + /** + * Stores the input exactly. Transitions: + * * `"`: `DATA` + */ + DATA_QUOTE = 4, +} -export default class ApplesoftCompiler { - constructor(private mem: Memory) { } +function writeByte(mem: Memory, addr: word, val: byte) { + const page = addr >> 8; + const off = addr & 0xff; - private writeByte(addr: word, val: byte) { - const page = addr >> 8; - const off = addr & 0xff; + return mem.write(page, off, val); +} - return this.mem.write(page, off, val); +function writeWord(mem: Memory, addr: word, val: byte) { + const lsb = val & 0xff; + const msb = val >> 8; + + writeByte(mem, addr, lsb); + writeByte(mem, addr + 1, msb); +} + +class LineBuffer implements IterableIterator { + private prevChar: number = 0; + constructor(private readonly line: string, private curChar: number = 0) { } + + [Symbol.iterator](): IterableIterator { + return this; } - private writeWord(addr: word, val: byte) { - const lsb = val & 0xff; - const msb = val >> 8; + clone(): LineBuffer { + return new LineBuffer(this.line, this.curChar); + } - this.writeByte(addr, lsb); - this.writeByte(addr + 1, msb); + next(): IteratorResult { + if (this.atEnd()) { + return { done: true, value: undefined }; + } + this.prevChar = this.curChar; + return { done: false, value: this.line[this.curChar++] }; + } + + /** + * Tries to match the input token at the current buffer location. If + * the token matches, the current buffer location is advanced passed + * the token and this method returns `true`. Otherwise, this method + * returns `false`. + * + * The input is assumed to be an all-uppercase string and the tokens + * in the buffer are uppercased before the comparison. + * + * @param token An all-uppercase string to match. + */ + lookingAtToken(token: string): boolean { + const oldCurChar = this.curChar; + const oldPrevChar = this.prevChar; + let possibleToken = ''; + for (const char of this) { + if (char === ' ') { + continue; + } + possibleToken += char; + if (possibleToken.length === token.length) { + break; + } + } + if (possibleToken.toUpperCase() === token) { + // Matched; set prevChar to before the match. + this.prevChar = oldCurChar; + return true; + } + // No match; restore state. + this.curChar = oldCurChar; + this.prevChar = oldPrevChar; + return false; + } + + backup() { + this.curChar = this.prevChar; + } + + peek(): string { + if (this.atEnd()) { + throw new RangeError(`Reading past the end of ${this.line}`); + } + return this.line[this.curChar]; + } + + atEnd(): boolean { + return this.curChar >= this.line.length; + } +} + +export default class ApplesoftCompiler { + private lines: Map = new Map(); + + constructor() { } + + /** + * Loads an AppleSoft BASIC program into memory. + * + * @param mem Memory, including zero page, into which the program is + * loaded. + * @param program A string with a BASIC program to compile (tokenize). + * @param programStart Optional start address of the program. Defaults to + * standard AppleSoft program address, 0x801. + */ + static compileToMemory(mem: Memory, program: string, programStart: word = PROGRAM_START) { + const compiler = new ApplesoftCompiler(); + compiler.compile(program); + const compiledProgram: Uint8Array = compiler.program(programStart); + + for (let i = 0; i < compiledProgram.byteLength; i++) { + writeByte(mem, programStart + i, compiledProgram[i]); + } + // Set zero page locations. Applesoft is weird because, when a line + // is inserted, PRGEND is copied to VARTAB in the beginning, but then + // VARTAB is manipulated to make space for the line, then PRGEND is + // set from VARTAB. There's also a bug in NEW at D657 where the carry + // flag is not cleared, so it can add 2 or 3. The upshot, though, is + // that PRGEND and VARTAB end up being 1 or 2 bytes past the end of + // the program. From my tests is the emulator, it's usually 1, so + // that's what we're going with here. + const prgend = programStart + compiledProgram.byteLength + 1; + writeWord(mem, TXTTAB, programStart); + writeWord(mem, PRGEND, prgend); + writeWord(mem, VARTAB, prgend); + writeWord(mem, ARYTAB, prgend); + writeWord(mem, STREND, prgend); + } + + private readLineNumber(lineBuffer: LineBuffer): number { + let lineNoStr = ''; + + for (const character of lineBuffer) { + if (/\d/.test(character)) { + lineNoStr += character; + } else { + lineBuffer.backup(); + break; + } + } + if (lineNoStr.length === 0) { + throw new Error('Missing line number'); + } + + return parseInt(lineNoStr, 10); + } + + private readToken(lineBuffer: LineBuffer): byte { + // Try to match a token + for (const possibleToken in STRING_TO_TOKEN) { + if (lineBuffer.lookingAtToken(possibleToken)) { + // NOTE(flan): This special token-preference + // logic is straight from the AppleSoft BASIC + // code (D5BE-D5CA in the Apple //e ROM). + + // Found a token + if (possibleToken === 'AT' && !lineBuffer.atEnd()) { + const lookAhead = lineBuffer.peek(); + // ATN takes precedence over AT + if (lookAhead === 'N') { + lineBuffer.next(); + return STRING_TO_TOKEN['ATN']; + } + // TO takes precedence over AT + if (lookAhead === 'O') { + // Backup to before the token + lineBuffer.backup(); + // and emit the 'A' (upper- or lower-case) + return lineBuffer.next().value.charCodeAt(0); + } + } + return STRING_TO_TOKEN[possibleToken]; + } + } + + // If not a token, output the character upper-cased + return lineBuffer.next().value.toUpperCase().charCodeAt(0); + } + + private compileLine(line: string | null | undefined) { + const result: byte[] = []; + if (!line) { + return; + } + + const lineBuffer = new LineBuffer(line); + let state: KnownValues = STATES.NORMAL; + + const lineNumber = this.readLineNumber(lineBuffer); + if (lineNumber < 0 || lineNumber > 65535) { + throw new Error('Line number out of range'); + } + + // Read the rest of the line + for (const character of lineBuffer) { + const charCode = character.charCodeAt(0); + switch (state) { + case STATES.NORMAL: + // Skip spaces + if (character === ' ') { + break; + } + + // Transition to parsing a string + if (character === '"') { + result.push(charCode); + state = STATES.STRING; + break; + } + + // Shorthand for PRINT (D580 in Apple //e ROM) + if (character === '?') { + result.push(STRING_TO_TOKEN['PRINT']); + break; + } + + // Try to parse a token or character + lineBuffer.backup(); + { + const token = this.readToken(lineBuffer); + if (token === STRING_TO_TOKEN['REM']) { + state = STATES.COMMENT; + } + if (token === STRING_TO_TOKEN['DATA']) { + state = STATES.DATA; + } + result.push(token); + } + break; + case STATES.COMMENT: + result.push(character.charCodeAt(0)); + break; + case STATES.STRING: + if (character === '"') { + state = STATES.NORMAL; + } + result.push(character.charCodeAt(0)); + break; + case STATES.DATA: + if (character === ':') { + state = STATES.NORMAL; + } + if (character === '"') { + state = STATES.DATA_QUOTE; + } + result.push(character.charCodeAt(0)); + break; + case STATES.DATA_QUOTE: + if (character === '"') { + state = STATES.DATA; + } + result.push(character.charCodeAt(0)); + break; + } + } + + this.lines.set(lineNumber, result); } compile(program: string) { - const lineNos: { [line: string]: byte[]} = {}; - - function compileLine(line: string | null | undefined, offset: number) { - if (!line) { - return []; - } - - let state: KnownValues = STATES.NORMAL; - const result = [0, 0, 0, 0]; - let curChar = 0; - let character; - let lineNoStr = ''; - - while (line.length) { - character = line.charAt(curChar); - if (/\d/.test(character)) { - lineNoStr += character; - curChar++; - } else { - break; - } - } - - while (curChar < line.length) { - character = line.charAt(curChar).toUpperCase(); - switch (state) { - case STATES.NORMAL: - if (character !== ' ') { - if (character === '"') { - result.push(character.charCodeAt(0)); - state = STATES.STRING; - curChar++; - } else { - let foundToken = ''; - let tokenIdx = -1; - for (const possibleToken in TOKENS) { - if (possibleToken.charAt(0) === character) { - tokenIdx = curChar + 1; - let idx = 1; - while (idx < possibleToken.length) { - if (line.charAt(tokenIdx) !== ' ') { - if (line.charAt(tokenIdx).toUpperCase() !== possibleToken.charAt(idx)) { - break; - } - idx++; - } - tokenIdx++; - } - if (idx === possibleToken.length) { - // Found a token - if (possibleToken === 'AT') { - const lookAhead = line.charAt(tokenIdx + 1).toUpperCase(); - // ATN takes precedence over AT - if (lookAhead === 'N') { - foundToken = 'ATN'; - tokenIdx++; - } - // TO takes precedence over AT - if (lookAhead === 'O') { - result.push(lookAhead.charCodeAt(0)); - foundToken = 'TO'; - tokenIdx++; - } - } - foundToken = possibleToken; - } - } - if (foundToken) { - break; - } - } - if (foundToken) { - result.push(TOKENS[foundToken as KnownKeys]); - curChar = tokenIdx; - if (foundToken === 'REM') { - state = STATES.COMMENT; - } - } else { - result.push(character.charCodeAt(0)); - curChar++; - } - } - } else { - curChar++; - } - break; - case STATES.COMMENT: - result.push(character.charCodeAt(0)); - curChar++; - break; - case STATES.STRING: - result.push(character.charCodeAt(0)); - if (character === '"') { - state = STATES.NORMAL; - } - curChar++; - break; - } - } - - if (lineNoStr.length) { - const lineNo = parseInt(lineNoStr, 10); - if (lineNo < 0 || lineNo > 65535) { - throw new Error('Line number out of range'); - } - if (lineNos[lineNoStr]) { - throw new Error('Duplicate line number'); - } - lineNos[lineNoStr] = result; - - // Next line pointer - result.push(0); - const nextLine = offset + result.length; - result[0] = nextLine & 0xff; - result[1] = nextLine >> 8; - - // Line number - result[2] = lineNo & 0xff; - result[3] = lineNo >> 8; - } else { - throw new Error('Missing line number'); - } - - return result; - } - - let compiled: number[] = []; const lines = program.split(/[\r\n]+/g); while (lines.length) { const line = lines.shift(); - const compiledLine = compileLine(line, PROGRAM_START + compiled.length); - compiled = compiled.concat(compiledLine); + this.compileLine(line); } - compiled.push(0, 0); + } - for (let idx = 0; idx < compiled.length; idx++) { - this.writeByte(PROGRAM_START + idx, compiled[idx]); + /** Returns the compiled program at the given start address. */ + program(programStart: word = PROGRAM_START): Uint8Array { + const result: byte[] = []; + + // Lines can be inserted out of order, but they should be in order + // when tokenized. + const lineNumbers = [...this.lines.keys()].sort(); + + for (const lineNo of lineNumbers) { + const lineBytes = this.lines.get(lineNo)!; + const nextLineAddr = programStart + result.length + 4 + + lineBytes.length + 1; // +1 for the zero at end of line + result.push(nextLineAddr & 0xff, nextLineAddr >> 8); + result.push(lineNo & 0xff, lineNo >> 8); + result.push(...lineBytes); + result.push(0x00); } - this.writeWord(LOMEM, PROGRAM_START + compiled.length); - this.writeWord(ARRAY_START, PROGRAM_START + compiled.length); - this.writeWord(ARRAY_END, PROGRAM_START + compiled.length); + result.push(0x00, 0x00); + + return new Uint8Array(result); } } diff --git a/js/applesoft/decompiler.ts b/js/applesoft/decompiler.ts index d06d5e5..a1124c2 100644 --- a/js/applesoft/decompiler.ts +++ b/js/applesoft/decompiler.ts @@ -1,4 +1,6 @@ -import { byte, KnownKeys, Memory, word } from '../types'; +import { byte, word, ReadonlyUint8Array, Memory } from '../types'; +import { TOKEN_TO_STRING, STRING_TO_TOKEN } from './tokens'; +import { TXTTAB, PRGEND } from './zeropage'; const LETTERS = ' ' + @@ -6,166 +8,286 @@ const LETTERS = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_' + '`abcdefghijklmnopqrstuvwxyz{|}~ '; -const TOKENS = { - 0x80: 'END', - 0x81: 'FOR', - 0x82: 'NEXT', - 0x83: 'DATA', - 0x84: 'INPUT', - 0x85: 'DEL', - 0x86: 'DIM', - 0x87: 'READ', - 0x88: 'GR', - 0x89: 'TEXT', - 0x8a: 'PR#', - 0x8b: 'IN#', - 0x8c: 'CALL', - 0x8d: 'PLOT', - 0x8e: 'HLIN', - 0x8f: 'VLIN', - 0x90: 'HGR2', - 0x91: 'HGR', - 0x92: 'HCOLOR=', - 0x93: 'HPLOT', - 0x94: 'DRAW', - 0x95: 'XDRAW', - 0x96: 'HTAB', - 0x97: 'HOME', - 0x98: 'ROT=', - 0x99: 'SCALE=', - 0x9a: 'SHLOAD', - 0x9b: 'TRACE', - 0x9c: 'NOTRACE', - 0x9d: 'NORMAL', - 0x9e: 'INVERSE', - 0x9f: 'FLASH', - 0xa0: 'COLOR=', - 0xa1: 'POP=', - 0xa2: 'VTAB', - 0xa3: 'HIMEM:', - 0xa4: 'LOMEM:', - 0xa5: 'ONERR', - 0xa6: 'RESUME', - 0xa7: 'RECALL', - 0xa8: 'STORE', - 0xa9: 'SPEED=', - 0xaa: 'LET', - 0xab: 'GOTO', - 0xac: 'RUN', - 0xad: 'IF', - 0xae: 'RESTORE', - 0xaf: '&', - 0xb0: 'GOSUB', - 0xb1: 'RETURN', - 0xb2: 'REM', - 0xb3: 'STOP', - 0xb4: 'ON', - 0xb5: 'WAIT', - 0xb6: 'LOAD', - 0xb7: 'SAVE', - 0xb8: 'DEF', - 0xb9: 'POKE', - 0xba: 'PRINT', - 0xbb: 'CONT', - 0xbc: 'LIST', - 0xbd: 'CLEAR', - 0xbe: 'GET', - 0xbf: 'NEW', - 0xc0: 'TAB(', - 0xc1: 'TO', - 0xc2: 'FN', - 0xc3: 'SPC(', - 0xc4: 'THEN', - 0xc5: 'AT', - 0xc6: 'NOT', - 0xc7: 'STEP', - 0xc8: '+', - 0xc9: '-', - 0xca: '*', - 0xcb: '/', - 0xcc: '^', - 0xcd: 'AND', - 0xce: 'OR', - 0xcf: '>', - 0xd0: '=', - 0xd1: '<', - 0xd2: 'SGN', - 0xd3: 'INT', - 0xd4: 'ABS', - 0xd5: 'USR', - 0xd6: 'FRE', - 0xd7: 'SCRN(', - 0xd8: 'PDL', - 0xd9: 'POS', - 0xda: 'SQR', - 0xdb: 'RND', - 0xdc: 'LOG', - 0xdd: 'EXP', - 0xde: 'COS', - 0xdf: 'SIN', - 0xe0: 'TAN', - 0xe1: 'ATN', - 0xe2: 'PEEK', - 0xe3: 'LEN', - 0xe4: 'STR$', - 0xe5: 'VAL', - 0xe6: 'ASC', - 0xe7: 'CHR$', - 0xe8: 'LEFT$', - 0xe9: 'RIGHT$', - 0xea: 'MID$' -} as const; - -export default class ApplesoftDump { - constructor(private mem: Memory) { } - - private readByte(addr: word): byte { - const page = addr >> 8; - const off = addr & 0xff; - - return this.mem.read(page, off); - } - - private readWord(addr: word): word { - const lsb = this.readByte(addr); - const msb = this.readByte(addr + 1); - - return (msb << 8) | lsb; - } - - toString() { - let str = ''; - const start = this.readWord(0x67); // Start - const end = this.readWord(0xaf); // End of program - let addr = start; - do { - let line = ''; - const next = this.readWord(addr); - addr += 2; - const lineno = this.readWord(addr); - addr += 2; - - line += lineno; - line += ' '; - let val = 0; - do { - if (addr < start || addr > end) - return str; - - val = this.readByte(addr++); - if (val >= 0x80) { - line += ' '; - line += TOKENS[val as KnownKeys]; - line += ' '; - } - else - line += LETTERS[val]; - } while (val); - line += '\n'; - str += line; - addr = next; - } while (addr && addr >= start && addr < end); - - return str; - } +interface ListOptions { + apple2: 'e' | 'plus'; + columns: number; // usually 40 or 80 } + +const DEFAULT_LIST_OPTIONS: ListOptions = { + apple2: 'e', + columns: 40, +}; + +interface DecompileOptions { + style: 'compact' | 'pretty'; +} + +const DEFAULT_DECOMPILE_OPTIONS: DecompileOptions = { + style: 'pretty', +}; + +export default class ApplesoftDecompiler { + + /** + * Returns a decompiler for the program in the given memory. + * + * The memory is assumed to have set `TXTTAB` and `PRGEND` correctly. + */ + static decompilerFromMemory(ram: Memory): ApplesoftDecompiler { + const program: byte[] = []; + + const start = ram.read(0x00, TXTTAB) + (ram.read(0x00, TXTTAB + 1) << 8); + const end = ram.read(0x00, PRGEND) + (ram.read(0x00, PRGEND + 1) << 8); + for (let addr = start; addr <= end; addr++) { + program.push(ram.read(addr >> 8, addr & 0xff)); + } + + return new ApplesoftDecompiler(new Uint8Array(program), start); + } + + /** + * Constructs a decompiler for the given program data. The data is + * assumed to be a dump of memory beginning at `base`. If the data + * does not cover the whole program, attempting to decompile will + * fail. + * + * @param program The program bytes. + * @param base + */ + constructor(private readonly program: ReadonlyUint8Array, + private readonly base: word = 0x801) { + } + + /** Returns the 2-byte word at the given offset. */ + private wordAt(offset: word): word { + return this.program[offset] + (this.program[offset + 1] << 8); + } + + /** + * Iterates through the lines of the given program in the order of + * the linked list of lines, starting from the first line. This + * does _not_ mean that all lines in memory will + * + * @param from First line for which to call the callback. + * @param to Last line for which to call the callback. + * @param callback A function to call for each line. The first parameter + * is the offset of the line number of the line; the tokens follow. + */ + private forEachLine(from: number, to: number, + callback: (offset: word) => void): void { + + let offset = 0; + let nextLineAddr = this.wordAt(offset); + let nextLineNo = this.wordAt(offset + 2); + while (nextLineAddr != 0 && nextLineNo < from) { + offset = nextLineAddr; + nextLineAddr = this.wordAt(offset); + nextLineNo = this.wordAt(offset + 2); + } + while (nextLineAddr != 0 && nextLineNo <= to) { + callback(offset + 2); + offset = nextLineAddr - this.base; + nextLineAddr = this.wordAt(offset); + nextLineNo = this.wordAt(offset + 2); + } + } + + /** Lists a single line like an Apple II. */ + listLine(offset: word, options: ListOptions): string { + const lines: string[] = []; + let line = ''; + + const lineNo = this.wordAt(offset); + // The Apple //e prints a space before each line number to make + // it easier to edit the lines. The change is at the subroutine + // called at D6F9: on the //e it is SPCLIN (F7AA), on the ][+ it + // is LINPRT (ED24). + if (options.apple2 === 'e') { + line += ' '; // D6F9: JSR SPCLIN + } + line += lineNo + ' '; // D6FC, always 1 space after line number + offset += 2; + + // In the original ROM, the line length is checked immediately + // after the line number is printed. For simplicity, this method + // always assumes that there is space for one token—which would + // have been the case on a realy Apple. + while (this.program[offset] != 0) { + const token = this.program[offset]; + if (token >= 0x80 && token <= 0xea) { + line += ' '; // D750, always put a space in front of token + line += TOKEN_TO_STRING[token]; + line += ' '; // D762, always put a trailing space + } else { + line += LETTERS[token]; + } + offset++; + + // The Apple //e and ][+ differ in how they choose to break + // long lines. In the ][+, D705 prints a newline if the + // current column (MON_CH, $24) is greater than or equal to + // 33. In the //e, control is passed to GETCH (F7B4), which + // uses column 33 in 40-column mode and column 73 in 80-column + // mode. + // + // The ][+ behaves more like a //e when there is an 80-column + // card active (programs wrap at column 73). From what I can + // tell (using Virtual ]['s inspector), the 80-column card + // keeps MON_CH at zero until the actual column is >= 71, when + // it sets it to the actual cursor position - 40. In the + // Videx Videoterm ROM, this fixup happens in BASOUT (CBCD) at + // CBE6 by getting the 80-column horizontal cursor position and + // subtracting 0x47 (71). If the result is less than zero, then + // 0x00 is stored in MON_CH, otherwise 31 is added back and the + // result is stored in MON_CH. (The manual is archived at + // http://www.apple-iigs.info/doc/fichiers/videoterm.pdf, among + // other places.) + // + // For out purposes, we're just going to use the number of + // columns - 7. + if (line.length >= options.columns - 7) { + line += '\n'; + lines.push(line); + line = ' '; + } + } + lines.push(line + '\n'); + return lines.join(''); + } + + /** + * Lists the program in the same format that an Apple II prints to the + * screen. + * + * This method also accepts a starting and ending line number. Like on + * an Apple II, this will print all of the lines between `from` and `to` + * (inclusive) regardless of the actual line numbers between them. + * + * To list a single line, pass the same number for both `from` and `to`. + * + * @param options The options for formatting the output. + * @param from The first line to print (default 0). + * @param to The last line to print (default end of program). + */ + list(options: Partial = {}, + from: number = 0, to: number = 65536): string { + const allOptions = { ...DEFAULT_LIST_OPTIONS, ...options }; + + let result = ''; + this.forEachLine(from, to, offset => { + result += this.listLine(offset, allOptions); + }); + return result; + } + + /** + * Returns a single line for the given compiled line in as little + * space as possible. + */ + compactLine(offset: word): string { + let result = ''; + let spaceIf: (nextToken: string) => boolean = () => false; + + const lineNo = this.wordAt(offset); + result += lineNo; + spaceIf = (nextToken: string) => /^\d/.test(nextToken); + offset += 2; + + while (this.program[offset] != 0) { + const token = this.program[offset]; + let tokenString: string; + if (token >= 0x80 && token <= 0xea) { + tokenString = TOKEN_TO_STRING[token]; + if (tokenString === 'PRINT') { + tokenString = '?'; + } + } else { + tokenString = LETTERS[token]; + } + + if (spaceIf(tokenString)) { + result += ' '; + } + + result += tokenString; + + spaceIf = () => false; + if (token === STRING_TO_TOKEN['AT']) { + spaceIf = (nextToken) => nextToken.toUpperCase().startsWith('N'); + } + + offset++; + } + return result; + } + + /** + * Returns a single line for the compiled line, but with even spacing: + * * space after line number (not before) + * * space before and after colons (`:`) + * * space around equality and assignment operators (`=`, `<=`, etc.) + * * space after tokens, unless it looks like a function call + * * space after commas, but not before + */ + prettyLine(offset: word): string { + let result = ''; + let inString = false; + let spaceIf: (char: byte) => boolean = () => false; + + const lineNo = this.wordAt(offset); + result += lineNo + ' '; + offset += 2; + + while (this.program[offset] != 0) { + const token = this.program[offset]; + let tokenString: string; + if (token >= 0x80 && token <= 0xea) { + tokenString = TOKEN_TO_STRING[token]; + } else { + tokenString = LETTERS[token]; + } + if (tokenString === '"') { + inString = !inString; + } + + if (spaceIf(token) || (!inString && tokenString === ':')) { + result += ' '; + } + + result += tokenString; + + if (!inString && tokenString === ':') { + spaceIf = () => true; + } else if (!inString && tokenString === ',') { + spaceIf = () => true; + } else if (token >= 0xcf && token <= 0xd1) { + // For '<', '=', '>', don't add a space between them. + spaceIf = (token: byte) => token < 0xcf || token > 0xd1; + } else if (token > 0x80 && token < 0xea) { + // By default, if a token is followed by an open paren, don't + // add a space. + spaceIf = (token: byte) => token !== 0x28; + } else { + // By default, if a literal is followed by a token, add a space. + spaceIf = (token: byte) => token >= 0x80 && token <= 0xea; + } + + offset++; + } + return result; + } + + /** + * Decompiles the program based on the given options. + */ + decompile(options: Partial = {}, + from: number = 0, to: number = 65536): string { + const allOptions = { ...DEFAULT_DECOMPILE_OPTIONS, ...options }; + + const results: string[] = []; + this.forEachLine(from, to, offset => { + results.push(allOptions.style === 'compact' ? this.compactLine(offset) : this.prettyLine(offset)); + }); + return results.join('\n'); + } +} \ No newline at end of file diff --git a/js/applesoft/tokens.ts b/js/applesoft/tokens.ts new file mode 100644 index 0000000..03115cf --- /dev/null +++ b/js/applesoft/tokens.ts @@ -0,0 +1,223 @@ +import { byte } from 'js/types'; + +/** Map from token to keyword */ +export const TOKEN_TO_STRING: Record = { + 0x80: 'END', + 0x81: 'FOR', + 0x82: 'NEXT', + 0x83: 'DATA', + 0x84: 'INPUT', + 0x85: 'DEL', + 0x86: 'DIM', + 0x87: 'READ', + 0x88: 'GR', + 0x89: 'TEXT', + 0x8a: 'PR#', + 0x8b: 'IN#', + 0x8c: 'CALL', + 0x8d: 'PLOT', + 0x8e: 'HLIN', + 0x8f: 'VLIN', + 0x90: 'HGR2', + 0x91: 'HGR', + 0x92: 'HCOLOR=', + 0x93: 'HPLOT', + 0x94: 'DRAW', + 0x95: 'XDRAW', + 0x96: 'HTAB', + 0x97: 'HOME', + 0x98: 'ROT=', + 0x99: 'SCALE=', + 0x9a: 'SHLOAD', + 0x9b: 'TRACE', + 0x9c: 'NOTRACE', + 0x9d: 'NORMAL', + 0x9e: 'INVERSE', + 0x9f: 'FLASH', + 0xa0: 'COLOR=', + 0xa1: 'POP=', + 0xa2: 'VTAB', + 0xa3: 'HIMEM:', + 0xa4: 'LOMEM:', + 0xa5: 'ONERR', + 0xa6: 'RESUME', + 0xa7: 'RECALL', + 0xa8: 'STORE', + 0xa9: 'SPEED=', + 0xaa: 'LET', + 0xab: 'GOTO', + 0xac: 'RUN', + 0xad: 'IF', + 0xae: 'RESTORE', + 0xaf: '&', + 0xb0: 'GOSUB', + 0xb1: 'RETURN', + 0xb2: 'REM', + 0xb3: 'STOP', + 0xb4: 'ON', + 0xb5: 'WAIT', + 0xb6: 'LOAD', + 0xb7: 'SAVE', + 0xb8: 'DEF', + 0xb9: 'POKE', + 0xba: 'PRINT', + 0xbb: 'CONT', + 0xbc: 'LIST', + 0xbd: 'CLEAR', + 0xbe: 'GET', + 0xbf: 'NEW', + 0xc0: 'TAB(', + 0xc1: 'TO', + 0xc2: 'FN', + 0xc3: 'SPC(', + 0xc4: 'THEN', + 0xc5: 'AT', + 0xc6: 'NOT', + 0xc7: 'STEP', + 0xc8: '+', + 0xc9: '-', + 0xca: '*', + 0xcb: '/', + 0xcc: '^', + 0xcd: 'AND', + 0xce: 'OR', + 0xcf: '>', + 0xd0: '=', + 0xd1: '<', + 0xd2: 'SGN', + 0xd3: 'INT', + 0xd4: 'ABS', + 0xd5: 'USR', + 0xd6: 'FRE', + 0xd7: 'SCRN(', + 0xd8: 'PDL', + 0xd9: 'POS', + 0xda: 'SQR', + 0xdb: 'RND', + 0xdc: 'LOG', + 0xdd: 'EXP', + 0xde: 'COS', + 0xdf: 'SIN', + 0xe0: 'TAN', + 0xe1: 'ATN', + 0xe2: 'PEEK', + 0xe3: 'LEN', + 0xe4: 'STR$', + 0xe5: 'VAL', + 0xe6: 'ASC', + 0xe7: 'CHR$', + 0xe8: 'LEFT$', + 0xe9: 'RIGHT$', + 0xea: 'MID$' +}; + +/** Map from keyword to token. */ +export const STRING_TO_TOKEN: Record = { + 'END': 0x80, + 'FOR': 0x81, + 'NEXT': 0x82, + 'DATA': 0x83, + 'INPUT': 0x84, + 'DEL': 0x85, + 'DIM': 0x86, + 'READ': 0x87, + 'GR': 0x88, + 'TEXT': 0x89, + 'PR#': 0x8a, + 'IN#': 0x8b, + 'CALL': 0x8c, + 'PLOT': 0x8d, + 'HLIN': 0x8e, + 'VLIN': 0x8f, + 'HGR2': 0x90, + 'HGR': 0x91, + 'HCOLOR=': 0x92, + 'HPLOT': 0x93, + 'DRAW': 0x94, + 'XDRAW': 0x95, + 'HTAB': 0x96, + 'HOME': 0x97, + 'ROT=': 0x98, + 'SCALE=': 0x99, + 'SHLOAD': 0x9a, + 'TRACE': 0x9b, + 'NOTRACE': 0x9c, + 'NORMAL': 0x9d, + 'INVERSE': 0x9e, + 'FLASH': 0x9f, + 'COLOR=': 0xa0, + 'POP=': 0xa1, + 'VTAB': 0xa2, + 'HIMEM:': 0xa3, + 'LOMEM:': 0xa4, + 'ONERR': 0xa5, + 'RESUME': 0xa6, + 'RECALL': 0xa7, + 'STORE': 0xa8, + 'SPEED=': 0xa9, + 'LET': 0xaa, + 'GOTO': 0xab, + 'RUN': 0xac, + 'IF': 0xad, + 'RESTORE': 0xae, + '&': 0xaf, + 'GOSUB': 0xb0, + 'RETURN': 0xb1, + 'REM': 0xb2, + 'STOP': 0xb3, + 'ON': 0xb4, + 'WAIT': 0xb5, + 'LOAD': 0xb6, + 'SAVE': 0xb7, + 'DEF': 0xb8, + 'POKE': 0xb9, + 'PRINT': 0xba, + 'CONT': 0xbb, + 'LIST': 0xbc, + 'CLEAR': 0xbd, + 'GET': 0xbe, + 'NEW': 0xbf, + 'TAB(': 0xc0, + 'TO': 0xc1, + 'FN': 0xc2, + 'SPC(': 0xc3, + 'THEN': 0xc4, + 'AT': 0xc5, + 'NOT': 0xc6, + 'STEP': 0xc7, + '+': 0xc8, + '-': 0xc9, + '*': 0xca, + '/': 0xcb, + '^': 0xcc, + 'AND': 0xcd, + 'OR': 0xce, + '>': 0xcf, + '=': 0xd0, + '<': 0xd1, + 'SGN': 0xd2, + 'INT': 0xd3, + 'ABS': 0xd4, + 'USR': 0xd5, + 'FRE': 0xd6, + 'SCRN(': 0xd7, + 'PDL': 0xd8, + 'POS': 0xd9, + 'SQR': 0xda, + 'RND': 0xdb, + 'LOG': 0xdc, + 'EXP': 0xdd, + 'COS': 0xde, + 'SIN': 0xdf, + 'TAN': 0xe0, + 'ATN': 0xe1, + 'PEEK': 0xe2, + 'LEN': 0xe3, + 'STR$': 0xe4, + 'VAL': 0xe5, + 'ASC': 0xe6, + 'CHR$': 0xe7, + 'LEFT$': 0xe8, + 'RIGHT$': 0xe9, + 'MID$': 0xea +}; diff --git a/js/applesoft/zeropage.ts b/js/applesoft/zeropage.ts new file mode 100644 index 0000000..e347bb4 --- /dev/null +++ b/js/applesoft/zeropage.ts @@ -0,0 +1,21 @@ +/* + * Zero page locations used by Applesoft. The names come from + * the commented decompilation produced by the Merlin Pro + * assembler, revision 4/27/84. There is evidence from + * https://www.pagetable.com/?p=774 that the original Microsoft + * BASIC source code used these names as well. + */ + +/** Start of program (word) */ +export const TXTTAB = 0x67; +/** Start of variables (word) */ +export const VARTAB = 0x69; +/** Start of arrays (word) */ +export const ARYTAB = 0x6B; +/** End of strings (word). (Strings are allocated down from HIMEM.) */ +export const STREND = 0x6D; +/** + * End of program (word). This is actually 1 or 2 bytes past the three + * zero bytes that end the program. + */ +export const PRGEND = 0xAF; diff --git a/js/ui/apple2.ts b/js/ui/apple2.ts index e278b2d..431bb4f 100644 --- a/js/ui/apple2.ts +++ b/js/ui/apple2.ts @@ -20,7 +20,7 @@ import { initGamepad } from './gamepad'; import KeyBoard from './keyboard'; import Tape, { TAPE_TYPES } from './tape'; -import ApplesoftDump from '../applesoft/decompiler'; +import ApplesoftDecompiler from '../applesoft/decompiler'; import ApplesoftCompiler from '../applesoft/compiler'; import { debug } from '../util'; @@ -90,14 +90,17 @@ let ready: Promise<[void, void]>; export const driveLights = new DriveLights(); +/** Start of program (word) */ +const TXTTAB = 0x67; + export function dumpAppleSoftProgram() { - const dumper = new ApplesoftDump(cpu); - debug(dumper.toString()); + const decompiler = ApplesoftDecompiler.decompilerFromMemory(cpu); + debug(decompiler.list({apple2: _e ? 'e' : 'plus'})); } export function compileAppleSoftProgram(program: string) { - const compiler = new ApplesoftCompiler(cpu); - compiler.compile(program); + const start = cpu.read(TXTTAB) + (cpu.read(TXTTAB + 1) << 8); + ApplesoftCompiler.compileToMemory(cpu, program, start); dumpAppleSoftProgram(); } diff --git a/test/js/applesoft/compiler.spec.ts b/test/js/applesoft/compiler.spec.ts new file mode 100644 index 0000000..cae444c --- /dev/null +++ b/test/js/applesoft/compiler.spec.ts @@ -0,0 +1,431 @@ +import ApplesoftCompiler from 'js/applesoft/compiler'; +import RAM from 'js/ram'; +import { Memory } from 'js/types'; + +// Zero page locations used by Applesoft. The names come from +// the commented decompilation produced by the Merlin Pro +// assembler, revision 4/27/84. There is evidence from +// https://www.pagetable.com/?p=774 that the original Microsoft +// BASIC source code used these names as well. +const TXTTAB = 0x67; // start of program, word +const VARTAB = 0x69; // start of variables, word +const ARYTAB = 0x6B; // start of arrays, word +const STREND = 0x6D; // end of strings, word +const PRGEND = 0xAF; // end of program, word + +function compileToMemory(ram: Memory, program: string) { + ApplesoftCompiler.compileToMemory(ram, program); +} + +// Manual decompilation based on "Applesoft Internal Structure" +// by C.K. Mesztenyi/Washington Apple Pi, from Call—A.P.P.L.E., +// January, 1982. Archived at: +// https://archive.org/details/DTCA2DOC-045_applesoft_internal +// Decompilation verified on the emulator by typing in the +// program, then: +// ]CALL -151 +// *800.820 +// and comparing the resulting bytes (starting at 801). + +describe('ApplesoftCompiler', () => { + it('compiles a one-line hello world', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 PRINT "HELLO, WORLD!"'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x16, 0x08, 0x0a, 0x00, 0xba, 0x22, 0x48, 0x45, + 0x4c, 0x4c, 0x4f, 0x2c, 0x20, 0x57, 0x4f, 0x52, + 0x4c, 0x44, 0x21, 0x22, 0x00, 0x00, 0x00 + ])); + }); + + it('compiles a one-line hello world into memory', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 PRINT "HELLO, WORLD!"'); + expect(ram.read(0x08, 0x01)).toBe(0x16); // pointer to next line low + expect(ram.read(0x08, 0x02)).toBe(0x08); // pointer to next line high + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0xba); // PRINT + expect(ram.read(0x08, 0x06)).toBe(0x22); // " + expect(ram.read(0x08, 0x07)).toBe(0x48); // H + expect(ram.read(0x08, 0x08)).toBe(0x45); // E + expect(ram.read(0x08, 0x09)).toBe(0x4C); // L + expect(ram.read(0x08, 0x0a)).toBe(0x4C); // L + expect(ram.read(0x08, 0x0b)).toBe(0x4F); // O + expect(ram.read(0x08, 0x0c)).toBe(0x2C); // , + expect(ram.read(0x08, 0x0d)).toBe(0x20); // space + expect(ram.read(0x08, 0x0e)).toBe(0x57); // W + expect(ram.read(0x08, 0x0f)).toBe(0x4F); // O + expect(ram.read(0x08, 0x10)).toBe(0x52); // R + expect(ram.read(0x08, 0x11)).toBe(0x4C); // L + expect(ram.read(0x08, 0x12)).toBe(0x44); // D + expect(ram.read(0x08, 0x13)).toBe(0x21); // ! + expect(ram.read(0x08, 0x14)).toBe(0x22); // " + expect(ram.read(0x08, 0x15)).toBe(0x00); // end of line + expect(ram.read(0x08, 0x16)).toBe(0x00); // end of program low + expect(ram.read(0x08, 0x17)).toBe(0x00); // end of program high + + expect(ram.read(0x00, TXTTAB)).toBe(0x01); // start of program low + expect(ram.read(0x00, TXTTAB + 1)).toBe(0x08); // start of program high + expect(ram.read(0x00, VARTAB)).toBe(0x19); // start of variables low + expect(ram.read(0x00, VARTAB + 1)).toBe(0x08); // start of variables high + expect(ram.read(0x00, ARYTAB)).toBe(0x19); // start of arrays low + expect(ram.read(0x00, ARYTAB + 1)).toBe(0x08); // start of arrays high + expect(ram.read(0x00, STREND)).toBe(0x19); // end of strings low + expect(ram.read(0x00, STREND + 1)).toBe(0x08); // end of strings high + expect(ram.read(0x00, PRGEND)).toBe(0x19); // end of program low + expect(ram.read(0x00, PRGEND + 1)).toBe(0x08); // end of program high + }); + + it('uppercases normal-mode text, like variables', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 fori=xtoz'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x81); // FOR + expect(ram.read(0x08, 0x06)).toBe(0x49); // I + expect(ram.read(0x08, 0x07)).toBe(0xd0); // = (token) + expect(ram.read(0x08, 0x08)).toBe(0x58); // X + expect(ram.read(0x08, 0x09)).toBe(0xc1); // TO + expect(ram.read(0x08, 0x0a)).toBe(0x5a); // Z + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('allows lower-case characters in strings', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 PRINT "Hello!"'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0xba); // PRINT + expect(ram.read(0x08, 0x06)).toBe(0x22); // " + expect(ram.read(0x08, 0x07)).toBe(0x48); // H + expect(ram.read(0x08, 0x08)).toBe(0x65); // e + expect(ram.read(0x08, 0x09)).toBe(0x6C); // l + expect(ram.read(0x08, 0x0a)).toBe(0x6C); // l + expect(ram.read(0x08, 0x0b)).toBe(0x6F); // o + }); + + it('allows lower-case characters in comments', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 REM Hello!'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0xb2); // REM + expect(ram.read(0x08, 0x06)).toBe(0x20); // space + expect(ram.read(0x08, 0x07)).toBe(0x48); // H + expect(ram.read(0x08, 0x08)).toBe(0x65); // e + expect(ram.read(0x08, 0x09)).toBe(0x6C); // l + expect(ram.read(0x08, 0x0a)).toBe(0x6C); // l + expect(ram.read(0x08, 0x0b)).toBe(0x6F); // o + }); + + it('allows lower-case tokens', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 print "Hello!"'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0xba); // PRINT + }); + + it('accepts out-of-order lines', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '20 GOTO 10\n10 PRINT "HELLO'); + expect(ram.read(0x08, 0x01)).toBe(0x0d); // pointer to next line low + expect(ram.read(0x08, 0x02)).toBe(0x08); // pointer to next line high + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0xba); // PRINT + expect(ram.read(0x08, 0x06)).toBe(0x22); // " + expect(ram.read(0x08, 0x07)).toBe(0x48); // H + expect(ram.read(0x08, 0x08)).toBe(0x45); // E + expect(ram.read(0x08, 0x09)).toBe(0x4C); // L + expect(ram.read(0x08, 0x0a)).toBe(0x4C); // L + expect(ram.read(0x08, 0x0b)).toBe(0x4F); // O + expect(ram.read(0x08, 0x0c)).toBe(0x00); // end of line + expect(ram.read(0x08, 0x0d)).toBe(0x15); // pointer to next line low + expect(ram.read(0x08, 0x0e)).toBe(0x08); // pointer to next line high + expect(ram.read(0x08, 0x0f)).toBe(0x14); // line number low + expect(ram.read(0x08, 0x10)).toBe(0x00); // line number high + expect(ram.read(0x08, 0x11)).toBe(0xab); // GOTO + expect(ram.read(0x08, 0x12)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x13)).toBe(0x30); // 0 + expect(ram.read(0x08, 0x14)).toBe(0x00); // end of line + expect(ram.read(0x08, 0x15)).toBe(0x00); // end of program low + expect(ram.read(0x08, 0x16)).toBe(0x00); // end of program high + }); + + it('prefers ATN to AT', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 X = ATN(20)'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x58); // X + expect(ram.read(0x08, 0x06)).toBe(0xd0); // = (token) + expect(ram.read(0x08, 0x07)).toBe(0xe1); // ATN + expect(ram.read(0x08, 0x08)).toBe(0x28); // ( + expect(ram.read(0x08, 0x09)).toBe(0x32); // 2 + expect(ram.read(0x08, 0x0a)).toBe(0x30); // 0 + expect(ram.read(0x08, 0x0b)).toBe(0x29); // ) + expect(ram.read(0x08, 0x0c)).toBe(0x00); // end of line + }); + + it('prefers TO to AT', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 FORI=ATOZ'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x81); // FOR + expect(ram.read(0x08, 0x06)).toBe(0x49); // I + expect(ram.read(0x08, 0x07)).toBe(0xd0); // = (token) + expect(ram.read(0x08, 0x08)).toBe(0x41); // A + expect(ram.read(0x08, 0x09)).toBe(0xc1); // TO + expect(ram.read(0x08, 0x0a)).toBe(0x5a); // Z + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('parses DATA statements that start with space', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATA 1,2,3'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x20); // space + expect(ram.read(0x08, 0x07)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x08)).toBe(0x2c); // , + expect(ram.read(0x08, 0x09)).toBe(0x32); // 2 + expect(ram.read(0x08, 0x0a)).toBe(0x2c); // , + expect(ram.read(0x08, 0x0b)).toBe(0x33); // 3 + expect(ram.read(0x08, 0x0c)).toBe(0x00); // end of line + }); + + it('parses DATA statements with numbers', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATA1,2,3'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x07)).toBe(0x2c); // , + expect(ram.read(0x08, 0x08)).toBe(0x32); // 2 + expect(ram.read(0x08, 0x09)).toBe(0x2c); // , + expect(ram.read(0x08, 0x0a)).toBe(0x33); // 3 + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('parses DATA statements with strings including lower-case', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATA"abc"'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x22); // " + expect(ram.read(0x08, 0x07)).toBe(0x61); // a + expect(ram.read(0x08, 0x08)).toBe(0x62); // b + expect(ram.read(0x08, 0x09)).toBe(0x63); // c + expect(ram.read(0x08, 0x0a)).toBe(0x22); // " + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('parses DATA statements with literals', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAHELLO'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x48); // H + expect(ram.read(0x08, 0x07)).toBe(0x45); // E + expect(ram.read(0x08, 0x08)).toBe(0x4C); // L + expect(ram.read(0x08, 0x09)).toBe(0x4C); // L + expect(ram.read(0x08, 0x0a)).toBe(0x4F); // O + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('parses DATA statements with literals including lower-case', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAHello'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x48); // H + expect(ram.read(0x08, 0x07)).toBe(0x65); // e + expect(ram.read(0x08, 0x08)).toBe(0x6C); // l + expect(ram.read(0x08, 0x09)).toBe(0x6C); // l + expect(ram.read(0x08, 0x0a)).toBe(0x6F); // o + expect(ram.read(0x08, 0x0b)).toBe(0x00); // end of line + }); + + it('parses DATA statements with literals including quotes', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAAA"B'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x41); // A + expect(ram.read(0x08, 0x07)).toBe(0x41); // A + expect(ram.read(0x08, 0x08)).toBe(0x22); // " + expect(ram.read(0x08, 0x09)).toBe(0x42); // B + expect(ram.read(0x08, 0x0a)).toBe(0x00); // end of line + }); + + it('parses DATA statements with literals including spaces', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAA B'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x41); // A + expect(ram.read(0x08, 0x07)).toBe(0x20); // space + expect(ram.read(0x08, 0x08)).toBe(0x20); // space + expect(ram.read(0x08, 0x09)).toBe(0x42); // B + expect(ram.read(0x08, 0x0a)).toBe(0x00); // end of line + }); + + it('terminates DATA statements at colons', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAAA:FORI=1TO1'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x41); // A + expect(ram.read(0x08, 0x07)).toBe(0x41); // A + expect(ram.read(0x08, 0x08)).toBe(0x3a); // : + expect(ram.read(0x08, 0x09)).toBe(0x81); // FOR + expect(ram.read(0x08, 0x0a)).toBe(0x49); // I + expect(ram.read(0x08, 0x0b)).toBe(0xd0); // = (token) + expect(ram.read(0x08, 0x0c)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x0d)).toBe(0xc1); // TO + expect(ram.read(0x08, 0x0e)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x0f)).toBe(0x00); // end of line + }); + + it('does not terminate DATA statements with a literal with a quote at colon', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAA":FORI=1TO1'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x41); // A + expect(ram.read(0x08, 0x07)).toBe(0x22); // " + expect(ram.read(0x08, 0x08)).toBe(0x3a); // : + expect(ram.read(0x08, 0x09)).toBe(0x46); // F + expect(ram.read(0x08, 0x0a)).toBe(0x4F); // O + expect(ram.read(0x08, 0x0b)).toBe(0x52); // R + expect(ram.read(0x08, 0x0c)).toBe(0x49); // I + expect(ram.read(0x08, 0x0d)).toBe(0x3D); // = + expect(ram.read(0x08, 0x0e)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x0f)).toBe(0x54); // T + expect(ram.read(0x08, 0x10)).toBe(0x4F); // O + expect(ram.read(0x08, 0x11)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x12)).toBe(0x00); // end of line + }); + + it('does terminate DATA statements with a literal with two quotes at colon', () => { + const ram = new RAM(0, 0xff); // 64K of RAM + + compileToMemory(ram, '10 DATAA"":FORI=1TO1'); + expect(ram.read(0x08, 0x03)).toBe(10); // line number low + expect(ram.read(0x08, 0x04)).toBe(0); // line number high + expect(ram.read(0x08, 0x05)).toBe(0x83); // DATA + expect(ram.read(0x08, 0x06)).toBe(0x41); // A + expect(ram.read(0x08, 0x07)).toBe(0x22); // " + expect(ram.read(0x08, 0x08)).toBe(0x22); // " + expect(ram.read(0x08, 0x09)).toBe(0x3a); // : + expect(ram.read(0x08, 0x0a)).toBe(0x81); // FOR + expect(ram.read(0x08, 0x0b)).toBe(0x49); // I + expect(ram.read(0x08, 0x0c)).toBe(0xd0); // = (token) + expect(ram.read(0x08, 0x0d)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x0e)).toBe(0xc1); // TO + expect(ram.read(0x08, 0x0f)).toBe(0x31); // 1 + expect(ram.read(0x08, 0x10)).toBe(0x00); // end of line + }); + + it('does not require a space after line number', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10PRINT"HELLO, WORLD!"'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x16, 0x08, 0x0a, 0x00, 0xba, 0x22, 0x48, 0x45, + 0x4c, 0x4c, 0x4f, 0x2c, 0x20, 0x57, 0x4f, 0x52, + 0x4c, 0x44, 0x21, 0x22, 0x00, 0x00, 0x00 + ])); + }); + + it('parses ? as PRINT', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 ?"HELLO, WORLD!"'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x16, 0x08, 0x0a, 0x00, 0xba, 0x22, 0x48, 0x45, + 0x4c, 0x4c, 0x4f, 0x2c, 0x20, 0x57, 0x4f, 0x52, + 0x4c, 0x44, 0x21, 0x22, 0x00, 0x00, 0x00 + ])); + }); + + it('skips spaces when reading tokens', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 T H E N'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x07, 0x08, 0x0a, 0x00, 0xc4, 0x00, 0x00, 0x00, + ])); + }); + + it('skips spaces and ignores case when reading tokens', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 T h E n'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x07, 0x08, 0x0a, 0x00, 0xc4, 0x00, 0x00, 0x00, + ])); + }); + + it('smashes tokens together', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 NOT RACE A THEN B'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x0c, 0x08, 0x0a, 0x00, 0x9c, 0xc5, 0x48, 0x45, + 0x4e, 0x42, 0x00, 0x00, 0x00, + ])); + }); + + it('parses 10ATOZ correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10ATOZ'); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x09, 0x08, 0x0a, 0x00, 0x41, 0xc1, 0x5a, 0x00, + 0x00, 0x00, + ])); + }); + + it('parses a bunch of crazy correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile([ + '10 A THEN B', + '30 A TO Z', + '40 AT N', + '50 A TN', + '60 N O T R A C E', + '70 NOT RACE'].join('\n')); + expect(compiler.program()).toEqual(new Uint8Array([ + 0x0b, 0x08, 0x0a, 0x00, 0xc5, 0x48, 0x45, + 0x4e, 0x42, 0x00, 0x13, 0x08, 0x1e, 0x00, 0x41, + 0xc1, 0x5a, 0x00, 0x1a, 0x08, 0x28, 0x00, 0xc5, + 0x4e, 0x00, 0x20, 0x08, 0x32, 0x00, 0xe1, 0x00, + 0x26, 0x08, 0x3c, 0x00, 0x9c, 0x00, 0x2c, 0x08, + 0x46, 0x00, 0x9c, 0x00, 0x00, 0x00, + ])); + }); +}); diff --git a/test/js/applesoft/decompiler.spec.ts b/test/js/applesoft/decompiler.spec.ts new file mode 100644 index 0000000..9c18085 --- /dev/null +++ b/test/js/applesoft/decompiler.spec.ts @@ -0,0 +1,222 @@ +import ApplesoftDecompiler from 'js/applesoft/decompiler'; +import ApplesoftCompiler from 'js/applesoft/compiler'; +import RAM from 'js/ram'; +import { Memory } from 'js/types'; + +function decompileFromMemory(ram: Memory): string { + const decompiler = ApplesoftDecompiler.decompilerFromMemory(ram); + return decompiler.list(); +} + +describe('ApplesoftDecompiler', () => { + it('decompiles one-line program from memory', () => { + const ram = new RAM(0x00, 0xff); // 64K + ApplesoftCompiler.compileToMemory(ram, '10 PRINT "Hello, World!"'); + + const program = decompileFromMemory(ram); + expect(program).toEqual(' 10 PRINT "Hello, World!"\n'); + }); + + it('decompiles REM statements correctly', () => { + const ram = new RAM(0x00, 0xff); // 64K + ApplesoftCompiler.compileToMemory(ram, '10 REMNo space before\n20 REM with space'); + + const program = decompileFromMemory(ram); + expect(program).toEqual(' 10 REM No space before\n 20 REM with space\n'); + }); + + it('lists a one-line program', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 PRINT "Hello, World!"'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list(); + expect(program).toEqual(' 10 PRINT "Hello, World!"\n'); + }); + + it('lists a program with a long line', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 PRINT "Hello, World!"\n' + + '20 PRINT "Hello, again, with a much longer line this time."\n' + + '30 REM1234567890123456789012345678901234567890'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list(); + expect(program).toEqual(' 10 PRINT "Hello, World!"\n' + + ' 20 PRINT "Hello, again, with a \n' + + ' much longer line this time."\n' + + ' \n' + + ' 30 REM 123456789012345678901234\n' + + ' 5678901234567890\n'); + }); + + it('lists a program with a long line Apple ][+-style', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 PRINT "Hello, World!"\n' + + '20 PRINT "Hello, again, with a much longer line this time."\n' + + '30 REM1234567890123456789012345678901234567890'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list({ apple2: 'plus' }); + expect(program).toEqual('10 PRINT "Hello, World!"\n' + + '20 PRINT "Hello, again, with a m\n' + + ' uch longer line this time."\n' + + '30 REM 1234567890123456789012345\n' + + ' 678901234567890\n'); + }); + + it('lists a range of lines', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 PRINT "Hello, World!"\n' + + '20 PRINT "Hello, again, with a much longer line this time."\n' + + '30 REM1234567890123456789012345678901234567890'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list({}, 10, 20); + expect(program).toEqual(' 10 PRINT "Hello, World!"\n' + + ' 20 PRINT "Hello, again, with a \n' + + ' much longer line this time."\n' + + ' \n'); + }); + + it('lists weird code correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 NOT RACE A THEN B'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list(); + expect(program).toEqual(' 10 NOTRACE AT HENB\n'); + }); + + it('lists 10ATOZ correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10ATOZ'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list(); + expect(program).toEqual(' 10 A TO Z\n'); + }); + + it('wraps correctly in 80-column mode', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 ?:?:?:?:?:?:?:?:?:?:?:?:?:?:?:?'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.list({ columns: 80 }); + expect(program).toEqual(' 10 PRINT : PRINT : PRINT : PRINT : ' + + 'PRINT : PRINT : PRINT : PRINT : PRINT \n' + + ' : PRINT : PRINT : PRINT : PRINT : PRINT : PRINT : ' + + 'PRINT \n'); + }); + + it('decompiles compactly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 ?:?:?:?:?:?:?:?:?:?:?:?:?:?:?:?'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual('10?:?:?:?:?:?:?:?:?:?:?:?:?:?:?:?'); + }); + + it('when decompiling compactly, adds a space after the line', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 12345'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual('10 12345'); + }); + + it('when decompiling compactly, adds a space after AT for token', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 AT NEXT'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual('10AT NEXT'); + }); + + it('when decompiling compactly, adds a space after AT for literal', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 AT n'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual('10AT N'); + }); + + it('when decompiling compactly, decompiles 10ATOZ correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10ATOZ'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual('10ATOZ'); + }); + + it('when decompiling compactly, adds a space to disambiguate tokens', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile([ + '10 A THEN B', + '30 A TO Z', + '40 AT N', + '50 A TN', + '60 N O T R A C E', + '70 NOT RACE'].join('\n')); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'compact' }); + expect(program).toEqual([ + '10ATHENB', + '30ATOZ', + '40AT N', + '50ATN', + '60NOTRACE', + '70NOTRACE'].join('\n')); + }); + + it('when decompiling prettily, formats reasonably well', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 FORI=1TO10:PRINTI:NEXT'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'pretty' }); + expect(program).toEqual('10 FOR I = 1 TO 10 : PRINT I : NEXT'); + }); + + it('when decompiling prettily, formats relations', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 IFA=BORB<=AORB=A THEN'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'pretty' }); + expect(program).toEqual('10 IF A < B OR A >= B OR B <= A OR B = AT HEN'); + }); + + it('when decompiling prettily, decompiles 10ATOZ correctly', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10ATOZ'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'pretty' }); + expect(program).toEqual('10 A TO Z'); + }); + + it('when decompiling prettily, does not insert extra spaces in strings', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10A="::::":B=","'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'pretty' }); + expect(program).toEqual('10 A = "::::" : B = ","'); + }); + + it('when decompiling prettily, inserts space after comma', () => { + const compiler = new ApplesoftCompiler(); + compiler.compile('10 HPLOTX,Y:GOTO10'); + + const decompiler = new ApplesoftDecompiler(compiler.program()); + const program = decompiler.decompile({ style: 'pretty' }); + expect(program).toEqual('10 HPLOT X, Y : GOTO 10'); + }); +}); \ No newline at end of file