diff --git a/Makefile b/Makefile index e7a2847b..80d0ffd9 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,21 @@ TSC=./node_modules/typescript/bin/tsc --build +LEZER=./node_modules/.bin/lezer-generator TMP=./tmp/dist -buildtsc: +buildgrammars: + mkdir -p gen/parser + $(LEZER) src/parser/lang-6502.grammar -o gen/parser/lang-6502.grammar.js + +watchgrammars: + while true; do \ + if [ src/parser/lang-6502.grammar -nt gen/parser/lang-6502.grammar.js ]; then \ + make buildgrammars; \ + fi; \ + sleep 1; \ + done + +buildtsc: buildgrammars npm run esbuild-clean $(TSC) tsconfig.json npm run esbuild @@ -30,6 +43,7 @@ tsweb: npm run esbuild-clean (ip addr || ifconfig) | grep inet $(TSC) -w --preserveWatchOutput & + make watchgrammars & sleep 9999999 | npm run esbuild-worker -- --watch & sleep 9999999 | npm run esbuild-ui -- --watch & python3 scripts/serveit.py 2>> /dev/null #http.out diff --git a/src/parser/lang-6502.grammar b/src/parser/lang-6502.grammar new file mode 100644 index 00000000..5dbee73e --- /dev/null +++ b/src/parser/lang-6502.grammar @@ -0,0 +1,162 @@ +@top Program { Line* } + +@skip { space | Comment } + +Line { + Label? Statement? eol +} + +Statement { + Instruction | + Directive | + MacroDef | + MacEnd | + ControlOp | + ErrorOp +} + +Label { Identifier ":" | Identifier } + +Instruction { + Opcode Operand? +} + +Register { + @specialize +} + +Directive { + PseudoOp (Expression)* +} + +PseudoOp { + @specialize +} + +Mac { @specialize } +MacEnd { @specialize } + +ControlOp { @specialize } +ErrorOp { @specialize } + +MacroDef { + Mac Identifier +} + +CurrentAddress { + @specialize +} + +Opcode { + @specialize +} + +Expression { + Expression !logic LogicOp Expression | + Expression !bit BitOp Expression | + Expression !compare (CompareOp | BinaryLt | BinaryGt) Expression | + Expression !term (ArithOp | Plus | Minus | Percent) Expression | + UnaryExpression | + Value | + "(" Expression ")" +} + +UnaryExpression { + (Plus | Minus | Not | Tilde | UnaryLt | UnaryGt) Expression +} + +BinaryLt { lt !bin } +BinaryGt { gt !bin } +UnaryLt { lt !un } +UnaryGt { gt !un } + +Value { + Number | + Identifier | + CurrentAddress | + String | + Char +} + +Operand { + "#" Expression | + "(" Expression Comma Register ")" | + Expression (Comma Register)? | + Register +} + +@tokens { + Identifier { $[a-zA-Z_.] $[a-zA-Z0-9_.]* } + + Number { + "$" $[0-9a-fA-F]+ | + "%" $[01]+ | + $[0-9]+ + } + + String { '"' (!["\\\n] | "\\" _)* '"' } + + Char { "'" ![\n] "'"? } + + Comment { ";" ![\n]* } + + space { $[ \t]+ } + eol { $[\n\r]+ } + + Comma { "," } + "#" + "(" ")" + + ArithOp { "*" | "/" } + Percent { "%" } + Plus { "+" } + Minus { "-" } + + BitOp { "&" | "|" | "^" | "<<" | ">>" } + Tilde { "~" } + + LogicOp { "&&" | "||" } + Not { "!" } + + CompareOp { "==" | "!=" | "<=" | ">=" } + lt { "<" } + gt { ">" } + + @precedence { String, Char, Number, Percent, Identifier } +} + +@precedence { + un, + term @left, + compare @left, + bit @left, + logic @left, + bin @left, + PseudoOp, + Opcode, + Label +} + +@detectDelim diff --git a/src/parser/lang-6502.ts b/src/parser/lang-6502.ts index 4791f843..17c8146b 100644 --- a/src/parser/lang-6502.ts +++ b/src/parser/lang-6502.ts @@ -1,96 +1,55 @@ -// CodeMirror 6 language support for 6502 assembly -// Migrated from CodeMirror 5 mode -// Original copyright (c) by Marijn Haverbeke and others -// Distributed under an MIT license: https://codemirror.net/5/LICENSE +import { LRLanguage, LanguageSupport, delimitedIndent, foldInside, foldNodeProp, indentNodeProp } from "@codemirror/language" +import { styleTags, tags as t } from "@lezer/highlight" +import { parser } from "../../gen/parser/lang-6502.grammar.js" -import { StreamLanguage, StreamParser } from "@codemirror/language"; -import { LanguageSupport } from "@codemirror/language"; - -// TODO: Migrate to CodeMirror 6 Lezer parser. -const asm6502Parser: StreamParser<{ context: number }> = { - startState() { - return { - context: 0 - }; - }, - - token(stream, state) { - // Labels at start of line - if (!stream.column()) { - state.context = 0; - if (stream.eatWhile(/[\w.]/)) - return 'labelName'; +export const Lezer6502: LRLanguage = LRLanguage.define({ + parser: parser.configure({ + props: [ + indentNodeProp.add({ + Application: delimitedIndent({ closing: ")", align: false }) + }), + foldNodeProp.add({ + Application: foldInside + }), + styleTags({ + Identifier: t.variableName, + CurrentAddress: t.self, + PseudoOp: t.definition(t.variableName), + Opcode: t.keyword, + Label: t.labelName, + String: t.string, + Char: t.number, + Number: t.number, + Register: t.typeName, + Comment: t.lineComment, + ArithOp: t.arithmeticOperator, + Plus: t.arithmeticOperator, + Minus: t.arithmeticOperator, + Percent: t.arithmeticOperator, + BitOp: t.bitwiseOperator, + Tilde: t.bitwiseOperator, + LogicOp: t.logicOperator, + Not: t.logicOperator, + CompareOp: t.compareOperator, + BinaryLt: t.compareOperator, + BinaryGt: t.compareOperator, + UnaryLt: t.arithmeticOperator, + UnaryGt: t.arithmeticOperator, + Mac: t.definitionKeyword, + MacEnd: t.definitionKeyword, + "MacroDef/Identifier": t.macroName, + ControlOp: t.controlKeyword, + ErrorOp: t.keyword, + Comma: t.separator, + "( )": t.paren + }) + ] + }), + languageData: { + commentTokens: { line: ";" } } +}) - if (stream.eatSpace()) - return null; - - var w; - if (stream.eatWhile(/\w/)) { - w = stream.current(); - var cur = w.toLowerCase(); - - // Check for directives - var style = directives.get(cur); - if (style) - return style; - - // Check for opcodes (3-letter mnemonics) - if (opcodes.test(w)) { - state.context = 4; - return 'keyword'; - } else if (state.context == 4 && numbers.test(w)) { - return 'number'; - } else if (stream.match(numbers)) { - return 'number'; - } else { - return null; - } - } else if (stream.eat(';')) { - stream.skipToEnd(); - return 'comment'; - } else if (stream.eat('"')) { - while (w = stream.next()) { - if (w == '"') - break; - - if (w == '\\') - stream.next(); - } - return 'string'; - } else if (stream.eat('\'')) { - if (stream.match(/\\?.'/) || stream.match(/\\?.'/)) - return 'number'; - } else if (stream.eat('$') || stream.eat('#')) { - if (stream.eatWhile(/[^;]/i)) - return 'number'; - } else if (stream.eat('%')) { - if (stream.eatWhile(/[01]/)) - return 'number'; - } else { - stream.next(); - } - return null; - } -}; - -// Directive keywords -const directives_list = [ - 'processor', - 'byte', 'word', 'long', - 'include', 'seg', 'dc', 'ds', 'dv', 'hex', 'err', 'org', 'rorg', 'echo', 'rend', - 'align', 'subroutine', 'equ', 'eqm', 'set', 'mac', 'endm', 'mexit', 'ifconst', - 'ifnconst', 'if', 'else', 'endif', 'eif', 'repeat', 'repend' -]; -const directives = new Map(); -directives_list.forEach(function (s) { directives.set(s, 'keyword'); }); - -const opcodes = /^[a-z][a-z][a-z]\b/i; -const numbers = /^([\da-f]+h|[0-7]+o|[01]+b|\d+d?)\b/i; - -/** - * Language support for 6502 assembly language - */ export function asm6502(): LanguageSupport { - return new LanguageSupport(StreamLanguage.define(asm6502Parser)); + return new LanguageSupport(Lezer6502) } diff --git a/test/parsers/testparser6502.js b/test/parsers/testparser6502.js new file mode 100644 index 00000000..7513b416 --- /dev/null +++ b/test/parsers/testparser6502.js @@ -0,0 +1,40 @@ + +const assert = require('assert'); +const { EditorState } = require("@codemirror/state"); +const { syntaxTree } = require("@codemirror/language"); +const { asm6502 } = require("../../gen/parser/lang-6502.js"); + + +describe('6502 Parser', function () { + + it('Should parse basic instructions', function () { + const code = ` + lda #$00 + sta $1234 + rts + `; + + // Create an editor state with the new parser + const state = EditorState.create({ + doc: code, + extensions: [asm6502()] + }); + + // Check if the tree is available (basic check that parser didn't crash) + // In a real environment we might traverse the tree to check specific nodes + // but here we just want to ensure it instantiates and runs without throwing. + assert.ok(syntaxTree(state), "Syntax tree should be generated"); + }); + + it('Should handle labels', function () { + const code = ` + start: + jmp start + `; + const state = EditorState.create({ + doc: code, + extensions: [asm6502()] + }); + assert.ok(syntaxTree(state), "Syntax tree should be generated"); + }); +});