1
0
mirror of https://github.com/sehugg/8bitworkshop.git synced 2026-03-11 13:41:43 +00:00

6502 stream parser -> Lezer grammar parser

This commit is contained in:
Fred Sauer
2026-02-15 22:07:03 -08:00
parent 367cf6a09d
commit b4505dacc7
4 changed files with 267 additions and 92 deletions

View File

@@ -1,8 +1,21 @@
TSC=./node_modules/typescript/bin/tsc --build
LEZER=./node_modules/.bin/lezer-generator
TMP=./tmp/dist
buildtsc:
buildgrammars:
mkdir -p gen/parser
$(LEZER) src/parser/lang-6502.grammar -o gen/parser/lang-6502.grammar.js
watchgrammars:
while true; do \
if [ src/parser/lang-6502.grammar -nt gen/parser/lang-6502.grammar.js ]; then \
make buildgrammars; \
fi; \
sleep 1; \
done
buildtsc: buildgrammars
npm run esbuild-clean
$(TSC) tsconfig.json
npm run esbuild
@@ -30,6 +43,7 @@ tsweb:
npm run esbuild-clean
(ip addr || ifconfig) | grep inet
$(TSC) -w --preserveWatchOutput &
make watchgrammars &
sleep 9999999 | npm run esbuild-worker -- --watch &
sleep 9999999 | npm run esbuild-ui -- --watch &
python3 scripts/serveit.py 2>> /dev/null #http.out

View File

@@ -0,0 +1,162 @@
@top Program { Line* }
@skip { space | Comment }
Line {
Label? Statement? eol
}
Statement {
Instruction |
Directive |
MacroDef |
MacEnd |
ControlOp |
ErrorOp
}
Label { Identifier ":" | Identifier }
Instruction {
Opcode Operand?
}
Register {
@specialize<Identifier, "X" | "Y" | "A" | "x" | "y" | "a">
}
Directive {
PseudoOp (Expression)*
}
PseudoOp {
@specialize<Identifier,
"ORG" | "EQU" | "END" | "org" | "equ" | "end" |
"ds" | "ds.b" | "ds.w" | "dc" | "dc.b" | "dc.w" | "seg" | "seg.u" |
"subroutine" |
"echo" | "repeat" | "repend" | "set" |
"processor" |
".WORD" | ".word" | ".BYTE" | ".byte" | ".END" | ".end"
>
}
Mac { @specialize<Identifier, "mac"> }
MacEnd { @specialize<Identifier, "endm"> }
ControlOp { @specialize<Identifier, "if" | "else" | "endif"> }
ErrorOp { @specialize<Identifier, "err"> }
MacroDef {
Mac Identifier
}
CurrentAddress {
@specialize<Identifier, ".">
}
Opcode {
@specialize<Identifier,
"ADC" | "AND" | "ASL" | "BCC" | "BCS" | "BEQ" | "BIT" | "BMI" |
"BNE" | "BPL" | "BRK" | "BVC" | "BVS" | "CLC" | "CLD" | "CLI" |
"CLV" | "CMP" | "CPX" | "CPY" | "DEC" | "DEX" | "DEY" | "EOR" |
"INC" | "INX" | "INY" | "JMP" | "JSR" | "LDA" | "LDX" | "LDY" |
"LSR" | "NOP" | "ORA" | "PHA" | "PHP" | "PLA" | "PLP" | "ROL" |
"ROR" | "RTI" | "RTS" | "SBC" | "SEC" | "SED" | "SEI" | "STA" |
"STX" | "STY" | "TAX" | "TAY" | "TSX" | "TXA" | "TXS" | "TYA" |
"adc" | "and" | "asl" | "bcc" | "bcs" | "beq" | "bit" | "bmi" |
"bne" | "bpl" | "brk" | "bvc" | "bvs" | "clc" | "cld" | "cli" |
"clv" | "cmp" | "cpx" | "cpy" | "dec" | "dex" | "dey" | "eor" |
"inc" | "inx" | "iny" | "jmp" | "jsr" | "lda" | "ldx" | "ldy" |
"lsr" | "nop" | "ora" | "pha" | "php" | "pla" | "plp" | "rol" |
"ror" | "rti" | "rts" | "sbc" | "sec" | "sed" | "sei" | "sta" |
"stx" | "sty" | "tax" | "tay" | "tsx" | "txa" | "txs" | "tya"
>
}
Expression {
Expression !logic LogicOp Expression |
Expression !bit BitOp Expression |
Expression !compare (CompareOp | BinaryLt | BinaryGt) Expression |
Expression !term (ArithOp | Plus | Minus | Percent) Expression |
UnaryExpression |
Value |
"(" Expression ")"
}
UnaryExpression {
(Plus | Minus | Not | Tilde | UnaryLt | UnaryGt) Expression
}
BinaryLt { lt !bin }
BinaryGt { gt !bin }
UnaryLt { lt !un }
UnaryGt { gt !un }
Value {
Number |
Identifier |
CurrentAddress |
String |
Char
}
Operand {
"#" Expression |
"(" Expression Comma Register ")" |
Expression (Comma Register)? |
Register
}
@tokens {
Identifier { $[a-zA-Z_.] $[a-zA-Z0-9_.]* }
Number {
"$" $[0-9a-fA-F]+ |
"%" $[01]+ |
$[0-9]+
}
String { '"' (!["\\\n] | "\\" _)* '"' }
Char { "'" ![\n] "'"? }
Comment { ";" ![\n]* }
space { $[ \t]+ }
eol { $[\n\r]+ }
Comma { "," }
"#"
"(" ")"
ArithOp { "*" | "/" }
Percent { "%" }
Plus { "+" }
Minus { "-" }
BitOp { "&" | "|" | "^" | "<<" | ">>" }
Tilde { "~" }
LogicOp { "&&" | "||" }
Not { "!" }
CompareOp { "==" | "!=" | "<=" | ">=" }
lt { "<" }
gt { ">" }
@precedence { String, Char, Number, Percent, Identifier }
}
@precedence {
un,
term @left,
compare @left,
bit @left,
logic @left,
bin @left,
PseudoOp,
Opcode,
Label
}
@detectDelim

View File

@@ -1,96 +1,55 @@
// CodeMirror 6 language support for 6502 assembly
// Migrated from CodeMirror 5 mode
// Original copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: https://codemirror.net/5/LICENSE
import { LRLanguage, LanguageSupport, delimitedIndent, foldInside, foldNodeProp, indentNodeProp } from "@codemirror/language"
import { styleTags, tags as t } from "@lezer/highlight"
import { parser } from "../../gen/parser/lang-6502.grammar.js"
import { StreamLanguage, StreamParser } from "@codemirror/language";
import { LanguageSupport } from "@codemirror/language";
// TODO: Migrate to CodeMirror 6 Lezer parser.
const asm6502Parser: StreamParser<{ context: number }> = {
startState() {
return {
context: 0
};
},
token(stream, state) {
// Labels at start of line
if (!stream.column()) {
state.context = 0;
if (stream.eatWhile(/[\w.]/))
return 'labelName';
export const Lezer6502: LRLanguage = LRLanguage.define({
parser: parser.configure({
props: [
indentNodeProp.add({
Application: delimitedIndent({ closing: ")", align: false })
}),
foldNodeProp.add({
Application: foldInside
}),
styleTags({
Identifier: t.variableName,
CurrentAddress: t.self,
PseudoOp: t.definition(t.variableName),
Opcode: t.keyword,
Label: t.labelName,
String: t.string,
Char: t.number,
Number: t.number,
Register: t.typeName,
Comment: t.lineComment,
ArithOp: t.arithmeticOperator,
Plus: t.arithmeticOperator,
Minus: t.arithmeticOperator,
Percent: t.arithmeticOperator,
BitOp: t.bitwiseOperator,
Tilde: t.bitwiseOperator,
LogicOp: t.logicOperator,
Not: t.logicOperator,
CompareOp: t.compareOperator,
BinaryLt: t.compareOperator,
BinaryGt: t.compareOperator,
UnaryLt: t.arithmeticOperator,
UnaryGt: t.arithmeticOperator,
Mac: t.definitionKeyword,
MacEnd: t.definitionKeyword,
"MacroDef/Identifier": t.macroName,
ControlOp: t.controlKeyword,
ErrorOp: t.keyword,
Comma: t.separator,
"( )": t.paren
})
]
}),
languageData: {
commentTokens: { line: ";" }
}
})
if (stream.eatSpace())
return null;
var w;
if (stream.eatWhile(/\w/)) {
w = stream.current();
var cur = w.toLowerCase();
// Check for directives
var style = directives.get(cur);
if (style)
return style;
// Check for opcodes (3-letter mnemonics)
if (opcodes.test(w)) {
state.context = 4;
return 'keyword';
} else if (state.context == 4 && numbers.test(w)) {
return 'number';
} else if (stream.match(numbers)) {
return 'number';
} else {
return null;
}
} else if (stream.eat(';')) {
stream.skipToEnd();
return 'comment';
} else if (stream.eat('"')) {
while (w = stream.next()) {
if (w == '"')
break;
if (w == '\\')
stream.next();
}
return 'string';
} else if (stream.eat('\'')) {
if (stream.match(/\\?.'/) || stream.match(/\\?.'/))
return 'number';
} else if (stream.eat('$') || stream.eat('#')) {
if (stream.eatWhile(/[^;]/i))
return 'number';
} else if (stream.eat('%')) {
if (stream.eatWhile(/[01]/))
return 'number';
} else {
stream.next();
}
return null;
}
};
// Directive keywords
const directives_list = [
'processor',
'byte', 'word', 'long',
'include', 'seg', 'dc', 'ds', 'dv', 'hex', 'err', 'org', 'rorg', 'echo', 'rend',
'align', 'subroutine', 'equ', 'eqm', 'set', 'mac', 'endm', 'mexit', 'ifconst',
'ifnconst', 'if', 'else', 'endif', 'eif', 'repeat', 'repend'
];
const directives = new Map<string, string>();
directives_list.forEach(function (s) { directives.set(s, 'keyword'); });
const opcodes = /^[a-z][a-z][a-z]\b/i;
const numbers = /^([\da-f]+h|[0-7]+o|[01]+b|\d+d?)\b/i;
/**
* Language support for 6502 assembly language
*/
export function asm6502(): LanguageSupport {
return new LanguageSupport(StreamLanguage.define(asm6502Parser));
return new LanguageSupport(Lezer6502)
}

View File

@@ -0,0 +1,40 @@
const assert = require('assert');
const { EditorState } = require("@codemirror/state");
const { syntaxTree } = require("@codemirror/language");
const { asm6502 } = require("../../gen/parser/lang-6502.js");
describe('6502 Parser', function () {
it('Should parse basic instructions', function () {
const code = `
lda #$00
sta $1234
rts
`;
// Create an editor state with the new parser
const state = EditorState.create({
doc: code,
extensions: [asm6502()]
});
// Check if the tree is available (basic check that parser didn't crash)
// In a real environment we might traverse the tree to check specific nodes
// but here we just want to ensure it instantiates and runs without throwing.
assert.ok(syntaxTree(state), "Syntax tree should be generated");
});
it('Should handle labels', function () {
const code = `
start:
jmp start
`;
const state = EditorState.create({
doc: code,
extensions: [asm6502()]
});
assert.ok(syntaxTree(state), "Syntax tree should be generated");
});
});