1
0
mirror of https://github.com/sehugg/8bitworkshop.git synced 2026-03-10 21:25:31 +00:00

z80 stream parser -> Lezer grammar parser

This commit is contained in:
Fred Sauer
2026-02-16 22:31:18 -08:00
parent b4505dacc7
commit e17da367a3
4 changed files with 258 additions and 127 deletions

View File

@@ -6,10 +6,11 @@ TMP=./tmp/dist
buildgrammars:
mkdir -p gen/parser
$(LEZER) src/parser/lang-6502.grammar -o gen/parser/lang-6502.grammar.js
$(LEZER) src/parser/lang-z80.grammar -o gen/parser/lang-z80.grammar.js
watchgrammars:
while true; do \
if [ src/parser/lang-6502.grammar -nt gen/parser/lang-6502.grammar.js ]; then \
if [ src/parser/lang-6502.grammar -nt gen/parser/lang-6502.grammar.js ] || [ src/parser/lang-z80.grammar -nt gen/parser/lang-z80.grammar.js ]; then \
make buildgrammars; \
fi; \
sleep 1; \

166
src/parser/lang-z80.grammar Normal file
View File

@@ -0,0 +1,166 @@
@top Program { Line* }
@skip { space | Comment }
Line {
Label? Statement? eol
}
Statement {
Instruction |
Directive
}
Label { Identifier ":" | Identifier }
Instruction {
Opcode Operand?
}
Directive {
PseudoOp (Expression)*
}
PseudoOp {
@specialize<Identifier, "org" | "equ" | "end" | "public" | "ORG" | "EQU" | "END" | "PUBLIC">
}
Condition {
@specialize<Identifier,
"nz" | "z" | "nc" | "po" | "pe" | "p" | "m" |
"NZ" | "Z" | "NC" | "PO" | "PE" | "P" | "M"
>
}
Register {
@specialize<Identifier,
"a" | "b" | "c" | "d" | "e" | "h" | "l" | "i" | "r" | "af" | "bc" | "de" | "hl" | "ix" | "iy" | "sp" | "pc" | "psw" |
"A" | "B" | "C" | "D" | "E" | "H" | "L" | "I" | "R" | "AF" | "BC" | "DE" | "HL" | "IX" | "IY" | "SP" | "PC" | "PSW"
>
}
Opcode {
@specialize<Identifier,
// Z80 Instructions
"ld" | "push" | "pop" | "inc" | "dec" | "add" | "adc" | "sub" | "sbc" | "and" | "or" | "xor" |
"cp" | "ret" | "jp" | "jr" | "call" | "rst" | "nop" | "halt" | "di" | "ei" |
"im" | "ex" | "exx" | "neg" | "cpl" | "ccf" | "scf" | "rlca" | "rla" | "rrca" | "rra" |
"rlc" | "rl" | "rrc" | "rr" | "sla" | "sra" | "srl" | "bit" | "set" | "res" |
"out" | "in" | "djnz" | "rld" | "rrd" | "ldi" | "ldir" | "ldd" | "lddr" | "cpi" | "cpir" | "cpd" | "cpdr" |
"ini" | "inir" | "ind" | "indr" | "outi" | "otir" | "outd" | "otdr" |
"LD" | "PUSH" | "POP" | "INC" | "DEC" | "ADD" | "ADC" | "SUB" | "SBC" | "AND" | "OR" | "XOR" |
"CP" | "RET" | "JP" | "JR" | "CALL" | "RST" | "NOP" | "HALT" | "DI" | "EI" |
"IM" | "EX" | "EXX" | "NEG" | "CPL" | "CCF" | "SCF" | "RLCA" | "RLA" | "RRCA" | "RRA" |
"RLC" | "RL" | "RRC" | "RR" | "SLA" | "SRA" | "SRL" | "BIT" | "SET" | "RES" |
"OUT" | "IN" | "DJNZ" | "RLD" | "RRD" | "LDI" | "LDIR" | "LDD" | "LDDR" | "CPI" | "CPIR" | "CPD" | "CPDR" |
"INI" | "INIR" | "IND" | "INDR" | "OUTI" | "OTIR" | "OUTD" | "OTDR" |
// 8080 Instructions
"mov" | "mvi" | "lxi" | "lda" | "sta" | "lhld" | "shld" | "ldax" | "stax" |
"adi" | "aci" | "sui" | "sbi" | "sbb" | "ana" | "ani" | "xra" | "xri" | "ora" | "ori" | "cmp" |
"inr" | "dcr" | "inx" | "dcx" | "dad" |
"daa" | "cma" | "stc" | "cmc" | "ral" | "rar" |
"jmp" | "jnz" | "jz" | "jnc" | "jc" | "jpo" | "jpe" | "jm" |
"cnz" | "cz" | "cnc" | "cc" | "cpo" | "cpe" | "cm" |
"rnz" | "rz" | "rnc" | "rc" | "rpo" | "rpe" | "rp" | "rm" |
"pchl" | "sphl" | "xthl" | "xchg" | "hlt" |
"MOV" | "MVI" | "LXI" | "LDA" | "STA" | "LHLD" | "SHLD" | "LDAX" | "STAX" |
"ADI" | "ACI" | "SUI" | "SBI" | "SBB" | "ANA" | "ANI" | "XRA" | "XRI" | "ORA" | "ORI" | "CMP" |
"INR" | "DCR" | "INX" | "DCX" | "DAD" |
"DAA" | "CMA" | "STC" | "CMC" | "RAL" | "RAR" |
"JMP" | "JNZ" | "JZ" | "JNC" | "JC" | "JPO" | "JPE" | "JM" |
"CNZ" | "CZ" | "CNC" | "CC" | "CPO" | "CPE" | "CM" |
"RNZ" | "RZ" | "RNC" | "RC" | "RPO" | "RPE" | "RP" | "RM" |
"PCHL" | "SPHL" | "XTHL" | "XCHG" | "HLT"
>
}
Expression {
Expression !logic LogicOp Expression |
Expression !bit BitOp Expression |
Expression !compare (CompareOp | BinaryLt | BinaryGt) Expression |
Expression !term (ArithOp | Plus | Minus | Percent) Expression |
UnaryExpression |
Value |
"(" Expression ")"
}
UnaryExpression {
(Plus | Minus | Not | Tilde | UnaryLt | UnaryGt) Expression
}
BinaryLt { lt !bin }
BinaryGt { gt !bin }
UnaryLt { lt !un }
UnaryGt { gt !un }
Value {
Number |
Identifier |
Register |
Condition |
String |
Char
}
Operand {
Expression (Comma Expression)*
}
@tokens {
Identifier { $[a-zA-Z_] $[a-zA-Z0-9_]* }
Hex { ("0x" | "$") $[0-9a-fA-F]+ | $[0-9] $[0-9a-fA-F]* "h" }
Bin { "%" $[01]+ | $[01]+ "b" }
Oct { "0o" $[0-7]+ | $[0-7]+ "o" }
Dec { $[0-9]+ }
Number { Hex | Bin | Oct | Dec }
String { '"' (!["\\\n] | "\\" _)* '"' }
Char { "'" !['\\\n] "'"? }
Comment { ";" ![\n]* }
space { $[ \t]+ }
eol { $[\n\r]+ }
Comma { "," }
":"
"#"
"(" ")"
ArithOp { "*" | "/" }
Percent { "%" }
Plus { "+" }
Minus { "-" }
BitOp { "&" | "|" | "^" | "<<" | ">>" }
Tilde { "~" }
LogicOp { "&&" | "||" }
Not { "!" }
CompareOp { "==" | "!=" | "<=" | ">=" }
lt { "<" }
gt { ">" }
@precedence { String, Char, Number, Percent, Identifier }
}
@precedence {
un,
term @left,
compare @left,
bit @left,
logic @left,
bin @left,
PseudoOp,
Opcode,
Label
}
@detectDelim

View File

@@ -1,131 +1,44 @@
// CodeMirror 6 language support for Z80 assembly
// Migrated from CodeMirror 5 mode
// Original copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: https://codemirror.net/5/LICENSE
import { LRLanguage, LanguageSupport } from "@codemirror/language"
import { styleTags, tags as t } from "@lezer/highlight"
import { parser } from "../../gen/parser/lang-z80.grammar.js"
import { StreamLanguage, StreamParser } from "@codemirror/language";
import { LanguageSupport } from "@codemirror/language";
interface Z80State {
context: number;
}
interface Z80Config {
ez80?: boolean;
}
// TODO: Migrate to CodeMirror 6 Lezer parser.
function createZ80Parser(config: Z80Config = {}): StreamParser<Z80State> {
const ez80 = config.ez80 || false;
let keywords1: RegExp, keywords2: RegExp;
if (ez80) {
keywords1 = /^(exx?|(ld|cp)([di]r?)?|[lp]ea|pop|push|ad[cd]|cpl|daa|dec|inc|neg|sbc|sub|and|bit|[cs]cf|x?or|res|set|r[lr]c?a?|r[lr]d|s[lr]a|srl|djnz|nop|[de]i|halt|im|in([di]mr?|ir?|irx|2r?)|ot(dmr?|[id]rx|imr?)|out(0?|[di]r?|[di]2r?)|tst(io)?|slp)(\.([sl]?i)?[sl])?\b/i;
keywords2 = /^(((call|j[pr]|rst|ret[in]?)(\.([sl]?i)?[sl])?)|(rs|st)mix)\b/i;
} else {
keywords1 = /^(exx?|(ld|cp|in)([di]r?)?|pop|push|ad[cd]|cpl|daa|dec|inc|neg|sbc|sub|and|bit|[cs]cf|x?or|res|set|r[lr]c?a?|r[lr]d|s[lr]a|srl|djnz|nop|rst|[de]i|halt|im|ot[di]r|out[di]?)\b/i;
keywords2 = /^(call|j[pr]|ret[in]?|b_?(call|jump))\b/i;
export const LezerZ80: LRLanguage = LRLanguage.define({
parser: parser.configure({
props: [
styleTags({
Identifier: t.variableName,
PseudoOp: t.definition(t.variableName),
Opcode: t.keyword,
Register: t.typeName,
Condition: t.className,
Label: t.labelName,
String: t.string,
Char: t.number,
Number: t.number,
Comment: t.lineComment,
ArithOp: t.arithmeticOperator,
Plus: t.arithmeticOperator,
Minus: t.arithmeticOperator,
Percent: t.arithmeticOperator,
BitOp: t.bitwiseOperator,
Tilde: t.bitwiseOperator,
LogicOp: t.logicOperator,
Not: t.logicOperator,
CompareOp: t.compareOperator,
BinaryLt: t.compareOperator,
BinaryGt: t.compareOperator,
UnaryLt: t.arithmeticOperator,
UnaryGt: t.arithmeticOperator,
Comma: t.separator,
"( )": t.paren
})
]
}),
languageData: {
commentTokens: { line: ";" }
}
})
const variables1 = /^(af?|bc?|c|de?|e|hl?|l|i[xy]?|r|sp)\b/i;
const variables2 = /^(n?[zc]|p[oe]?|m)\b/i;
const errors = /^([hl][xy]|i[xy][hl]|slia|sll)\b/i;
const numbers = /^([\da-f]+h|[0-7]+o|[01]+b|\d+d?)\b/i;
return {
startState(): Z80State {
return {
context: 0
};
},
token(stream, state) {
if (!stream.column())
state.context = 0;
if (stream.eatSpace())
return null;
var w;
if (stream.eatWhile(/\w/)) {
if (ez80 && stream.eat('.')) {
stream.eatWhile(/\w/);
}
w = stream.current();
if (stream.indentation()) {
if ((state.context == 1 || state.context == 4) && variables1.test(w)) {
state.context = 4;
return 'variableName.special';
}
if (state.context == 2 && variables2.test(w)) {
state.context = 4;
return 'variableName.constant';
}
if (keywords1.test(w)) {
state.context = 1;
return 'keyword';
} else if (keywords2.test(w)) {
state.context = 2;
return 'keyword';
} else if (state.context == 4 && numbers.test(w)) {
return 'number';
}
if (errors.test(w))
return 'invalid';
} else if (stream.match(numbers)) {
return 'number';
} else {
return null;
}
} else if (stream.eat(';')) {
stream.skipToEnd();
return 'comment';
} else if (stream.eat('"')) {
while (w = stream.next()) {
if (w == '"')
break;
if (w == '\\')
stream.next();
}
return 'string';
} else if (stream.eat('\'')) {
if (stream.match(/\\?.'/))
return 'number';
} else if (stream.eat('.') || stream.sol() && stream.eat('#')) {
state.context = 5;
if (stream.eatWhile(/\w/))
return 'keyword.control';
} else if (stream.eat('$')) {
if (stream.eatWhile(/[\da-f]/i))
return 'number';
} else if (stream.eat('%')) {
if (stream.eatWhile(/[01]/))
return 'number';
} else {
stream.next();
}
return null;
}
};
}
/**
* Language support for Z80 assembly language
*/
export function asmZ80(): LanguageSupport {
return new LanguageSupport(StreamLanguage.define(createZ80Parser()));
}
/**
* Language support for eZ80 assembly language
*/
export function asmEZ80(): LanguageSupport {
return new LanguageSupport(StreamLanguage.define(createZ80Parser({ ez80: true })));
return new LanguageSupport(LezerZ80)
}

View File

@@ -0,0 +1,51 @@
const assert = require('assert');
const { EditorState } = require("@codemirror/state");
const { syntaxTree } = require("@codemirror/language");
const { asmZ80 } = require("../../gen/parser/lang-z80.js");
describe('Z80 Parser', function () {
it('Should parse basic instructions', function () {
const code = `
ld a, 0
ld hl, $1234
ret
`;
// Create an editor state with the new parser
const state = EditorState.create({
doc: code,
extensions: [asmZ80()]
});
// Check if the tree is available (basic check that parser didn't crash)
assert.ok(syntaxTree(state), "Syntax tree should be generated");
});
it('Should handle labels', function () {
const code = `
start:
jp start
`;
const state = EditorState.create({
doc: code,
extensions: [asmZ80()]
});
assert.ok(syntaxTree(state), "Syntax tree should be generated");
});
it('Should handle 8080 instructions', function () {
const code = `
mvi a, 0
lxi h, $1234
mov a, b
inx h
jmp start
`;
const state = EditorState.create({
doc: code,
extensions: [asmZ80()]
});
assert.ok(syntaxTree(state), "Syntax tree should be generated");
});
});