diff --git a/css/ui.css b/css/ui.css index eea55a27..e2fc2540 100644 --- a/css/ui.css +++ b/css/ui.css @@ -444,6 +444,12 @@ div.markdown th { -webkit-background-clip: text; color:rgba(0,0,0,0); } +.logo-gradient:hover { + text-shadow: 0px 0px 0.5em rgba(255,255,255,1); + text-decoration: underline; + text-decoration-color: #ddd; + text-decoration-thickness: 1px; +} .disable-select { user-select: none; } diff --git a/index.html b/index.html index 2d6e0263..70c29e49 100644 --- a/index.html +++ b/index.html @@ -635,12 +635,5 @@ $( ".dropdown-submenu" ).click(function(event) { startUI(); - - diff --git a/src/codemirror/basic.js b/src/codemirror/basic.js index d384f471..bf9fb1fb 100644 --- a/src/codemirror/basic.js +++ b/src/codemirror/basic.js @@ -15,8 +15,9 @@ CodeMirror.defineMode("basic", function(conf, parserConf) { var ERRORCLASS = 'error'; function wordRegexp(words, crunch) { - return new RegExp("^((" + words.join(")|(") + "))", "i"); - //return new RegExp("^((" + words.join(")|(") + "))\\b", "i"); + // for token crunching + //return new RegExp("^((" + words.join(")|(") + "))", "i"); + return new RegExp("^((" + words.join(")|(") + "))\\b", "i"); } var singleOperators = new RegExp("^[\\+\\-\\*/%&\\\\|\\^~<>!]"); @@ -26,16 +27,17 @@ CodeMirror.defineMode("basic", function(conf, parserConf) { var tripleDelimiters = new RegExp("^((//=)|(>>=)|(<<=)|(\\*\\*=))"); var identifiers = new RegExp("^[_A-Za-z][_A-Za-z0-9]*"); - var openingKeywords = ['if','for']; - var middleKeywords = ['to','then']; - var endKeywords = ['next','end']; + var openingKeywords = ['if','for','while']; + var middleKeywords = ['to','then','else']; + var endKeywords = ['next','end','wend']; - var operatorKeywords = ['and', 'or', 'not', 'xor', 'eqv', 'imp']; + var operatorKeywords = ['and', 'or', 'not', 'xor', 'eqv', 'imp', 'mod']; var wordOperators = wordRegexp(operatorKeywords); var commonKeywords = [ 'BASE','DATA','DEF','DIM', 'GO','GOSUB','GOTO','INPUT','LET','ON','OPTION','PRINT', - 'RANDOMIZE','READ','REM','RESTORE','RETURN','STEP','STOP','SUB' + 'RANDOMIZE','READ','REM','RESTORE','RETURN','STEP','STOP','SUB', + 'CALL','CHANGE','CONVERT','CLEAR','DIALECT','SELECT','CASE' ]; var commontypes = ['xxxxbyte','xxxxword']; @@ -278,6 +280,6 @@ CodeMirror.defineMode("basic", function(conf, parserConf) { return external; }); -CodeMirror.defineMIME("text/x-vb", "vb"); +CodeMirror.defineMIME("text/x-basic", "basic"); }); diff --git a/src/common/baseplatform.ts b/src/common/baseplatform.ts index d037793f..335cd778 100644 --- a/src/common/baseplatform.ts +++ b/src/common/baseplatform.ts @@ -172,7 +172,9 @@ export class BreakpointList { } } } -export type Breakpoint = {cond:DebugCondition}; +export interface Breakpoint { + cond: DebugCondition; +}; export interface EmuRecorder { frameRequested() : boolean; diff --git a/src/common/basic/README.md b/src/common/basic/README.md new file mode 100644 index 00000000..7890d468 --- /dev/null +++ b/src/common/basic/README.md @@ -0,0 +1,80 @@ +# BASIC Compiler Internals + +If you want to know more about the internals of a BASIC compiler written in TypeScript, then read on. + +## Tokenizer + +The tokenizer is powered by one huge gnarly regular expression. +Each token type is a separate capture group, and we just look for the first one that matched. +Here's a sample of the regex: + +~~~ + comment identifier string +... (['].*) | ([A-Z_]\w*[$]?) | (".*?") ... +~~~ + +In some tokenizers, like Microsoft BASIC, each keyword supported by the language is matched individually, +so whitespace is not required around keywords. +For example, `FORI=ATOB` would be matched `FOR I = A TO B`. +This was sometimes called "crunching." +We have a special case in the tokenizer to enable this for these dialects. + +The tokenizer also has special cases for `REM`, `DATA`, and `OPTION` which require tokens be untouched +-- and in the case of `DATA`, whitespace preserved. + +Since BASIC is a line-oriented language, the tokenizer operates on one line at a time, +and each line is then fed to the parser. +For block-oriented languages, we'd probably want to tokenize the entire file before the parsing stage. + + +## Parser + +The parser is a hand-coded recursive descent parser. +Why? +There was no `yacc` nor `bison` when BASIC was invented, so the language was not designed for these tools. +In fact, BASIC is a little gnarly when you get into the details, so having a bit of control is nice, +and error messages can be more informative. +Both clang and gcc use recursive descent parsers, so it can't be that bad, right? + +The program is parsed one line at a time. +After line tokenization, the tokens go into an array. +We can consume tokens (remove from the list), peek at tokens (check the next token without removing), and pushback (return them to the list). +We don't have to check for `null`; we will always get the EOL (end-of-line) empty-string token if we run out. + +Expressions are parsed with an [operator-precedence parser](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pseudocode), which isn't really that complicated. +We also infer types at this type (number or string). +We have a list of function signatures, and we know that "$" means a string variable, so we can check types. +The tricky part is that `FNA(X)` is a user-defined function, while `INT(X)` is a function, and `I1(X)` could be a dimensioned array. + +Tokens carry their source code location with them, so we can assign a precise source code location to each statement. +This is used for error messages and for debugging. + +The compiler generates an AST (Abstract Syntax Tree) and not any kind of VM bytecode. +The top-level of the AST is a list of statements, and an associated mapping of labels (line numbers) to statements. +AST nodes must refer to other nodes by index, not by reference, as the worker transfers it to the main thread using `JSON.stringify()`. + + +## Runtime + +The runtime interprets the AST generated by the compiler. +It compiles each statement (PRINT, LET, etc.) into JavaScript. +The methods `expr2js()` converts expression trees to JavaScript, and `assign2js()` handles assignments like `LET`, `READ` and `INPUT`. + +One statement is executed every time step. +There's a "program counter", which is the index of the next-to-run Statement node in the list. + +Early BASICs were compiled languages, but the most popular BASICs for microcomputers were tokenized and interpreted. +There are subtle differences between the two. +For example, interpreted BASIC supports NEXT statements without a variable, +which always jump back to the most recent FOR even if you GOTO a different NEXT. +This requires the runtime maintain a stack of FOR loops. +Compiled BASIC dialects will verify loop structures at compile time. + +For INPUT commands, the runtime calls the `input()` method, which returns a Promise. +The IDE overriddes this method to show a text field to the user, and resolve the Promise when data is entered. +The runtime might call multiple times until valid data is entered. + +The compiler and runtime are each about [1300 lines of TypeScript](https://github.com/sehugg/8bitworkshop/tree/master/src/common/basic), +excluding the definitions of the BASIC dialects. +It's tested with a [test suite](https://github.com/sehugg/nbs-ecma55-test) +and with a [coverage-guided fuzzer](https://github.com/fuzzitdev/jsfuzz). diff --git a/src/common/basic/compiler.ts b/src/common/basic/compiler.ts index 8e7e898f..9db99d63 100644 --- a/src/common/basic/compiler.ts +++ b/src/common/basic/compiler.ts @@ -44,6 +44,7 @@ export interface BASICOptions { restoreWithLabel : boolean; // RESTORE