mirror of
https://github.com/sehugg/8bitworkshop.git
synced 2025-08-15 17:27:22 +00:00
tokenizer is no longer line-based
This commit is contained in:
@@ -38,7 +38,7 @@
|
||||
var numbers = /^(0x[\da-f]+|[\da-f]+h|[0-7]+o|[01]+b|\d+d?)\b/i;
|
||||
var tags = /^\{\{.*\}\}/;
|
||||
var comment = /\/\/.*/;
|
||||
var mlcomment = /^---.+?---\b/i;
|
||||
var mlcomment = /\/\*.*?\*\//s; // TODO
|
||||
|
||||
return {
|
||||
startState: function () {
|
||||
@@ -56,6 +56,9 @@
|
||||
if (stream.match(comment)) {
|
||||
return 'comment';
|
||||
}
|
||||
if (stream.match(mlcomment)) {
|
||||
return 'comment';
|
||||
}
|
||||
|
||||
var w;
|
||||
if (stream.eatWhile(/\w/)) {
|
||||
|
@@ -8,6 +8,7 @@ export enum ECSTokenType {
|
||||
Operator = 'delimiter',
|
||||
QuotedString = 'quoted-string',
|
||||
Integer = 'integer',
|
||||
CodeFragment = 'code-fragment',
|
||||
}
|
||||
|
||||
export class ECSCompiler extends Tokenizer {
|
||||
@@ -19,15 +20,16 @@ export class ECSCompiler extends Tokenizer {
|
||||
super();
|
||||
//this.includeEOL = true;
|
||||
this.setTokenRules([
|
||||
{ type: TokenType.CodeFragment, regex: /---/ },
|
||||
{ type: ECSTokenType.Ellipsis, regex: /\.\./ },
|
||||
{ type: ECSTokenType.Operator, regex: /[#=,:(){}\[\]]/ },
|
||||
{ type: ECSTokenType.QuotedString, regex: /".*?"/ },
|
||||
{ type: ECSTokenType.CodeFragment, regex: /---.*?---/ },
|
||||
{ type: ECSTokenType.Integer, regex: /[-]?0x[A-Fa-f0-9]+/ },
|
||||
{ type: ECSTokenType.Integer, regex: /[-]?\$[A-Fa-f0-9]+/ },
|
||||
{ type: ECSTokenType.Integer, regex: /[-]?\d+/ },
|
||||
{ type: TokenType.Ident, regex: /[A-Za-z_][A-Za-z0-9_]*/ },
|
||||
{ type: TokenType.Ignore, regex: /\/\/.*/ },
|
||||
{ type: TokenType.Ignore, regex: /\/\/.*?[\n\r]/ },
|
||||
{ type: TokenType.Ignore, regex: /\/\*.*?\*\// },
|
||||
{ type: TokenType.Ignore, regex: /\s+/ },
|
||||
]);
|
||||
this.errorOnCatchAll = true;
|
||||
@@ -43,7 +45,11 @@ export class ECSCompiler extends Tokenizer {
|
||||
parseFile(text: string, path: string) {
|
||||
this.tokenizeFile(text, path);
|
||||
while (!this.isEOF()) {
|
||||
this.annotate(() => this.parseTopLevel());
|
||||
let top = this.parseTopLevel();
|
||||
if (top) {
|
||||
let t = top;
|
||||
this.annotate(() => t); // TODO? typescript bug?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,7 +69,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
return this.em.defineSystem(this.parseResource());
|
||||
}
|
||||
if (tok.str == 'comment') {
|
||||
this.expectTokenTypes([TokenType.CodeFragment]);
|
||||
this.expectTokenTypes([ECSTokenType.CodeFragment]);
|
||||
return;
|
||||
}
|
||||
this.compileError(`Unexpected top-level keyword: ${tok.str}`);
|
||||
@@ -97,7 +103,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
return { dtype: 'ref', query: this.parseQuery() } as RefType;
|
||||
}
|
||||
if (this.peekToken().str == 'array') {
|
||||
let index : IntType;
|
||||
let index : IntType | undefined = undefined;
|
||||
this.expectToken('array');
|
||||
if (this.peekToken().type == ECSTokenType.Integer) {
|
||||
index = this.parseDataType() as IntType;
|
||||
@@ -105,7 +111,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
this.expectToken('of');
|
||||
return { dtype: 'array', index, elem: this.parseDataType() } as ArrayType;
|
||||
}
|
||||
this.compileError(`Unknown data type`); // TODO
|
||||
this.internalError(); throw new Error();
|
||||
}
|
||||
|
||||
parseDataValue(field: DataField) : DataValue {
|
||||
@@ -123,6 +129,9 @@ export class ECSCompiler extends Tokenizer {
|
||||
let id = e.id;
|
||||
if (reftype) {
|
||||
// TODO: make this a function? elo ehi etc?
|
||||
if (!this.currentScope) {
|
||||
this.compileError("This type can only exist inside of a scope."); throw new Error()
|
||||
};
|
||||
let atypes = this.em.archetypesMatching(reftype.query);
|
||||
let entities = this.currentScope.entitiesMatching(atypes);
|
||||
if (entities.length == 0) this.compileError(`This entitiy doesn't seem to fit the reference type.`);
|
||||
@@ -130,7 +139,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
}
|
||||
return id;
|
||||
}
|
||||
this.compileError(`Unknown data value`); // TODO
|
||||
this.internalError(); throw new Error();
|
||||
}
|
||||
|
||||
parseDataArray() {
|
||||
@@ -186,7 +195,8 @@ export class ECSCompiler extends Tokenizer {
|
||||
this.expectToken('do');
|
||||
let select = this.expectTokens(['once', 'foreach', 'source', 'join']).str as SelectType; // TODO: type check?
|
||||
let query = this.parseQuery();
|
||||
let join = select == 'join' && this.parseQuery();
|
||||
let join = undefined;
|
||||
if (select == 'join') join = this.parseQuery();
|
||||
let emits;
|
||||
let limit;
|
||||
if (this.peekToken().str == 'limit') {
|
||||
@@ -225,8 +235,8 @@ export class ECSCompiler extends Tokenizer {
|
||||
|
||||
parseCode(): string {
|
||||
// TODO: add $loc
|
||||
let tok = this.expectTokenTypes([TokenType.CodeFragment]);
|
||||
let code = tok.str;
|
||||
let tok = this.expectTokenTypes([ECSTokenType.CodeFragment]);
|
||||
let code = tok.str.substring(3, tok.str.length-3);
|
||||
let lines = code.split('\n');
|
||||
for (let i=0; i<lines.length; i++) {
|
||||
lines[i] = ` .dbg line, "${this.path}", ${tok.$loc.line+i}\n` + lines[i];
|
||||
@@ -236,7 +246,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
|
||||
parseScope() : EntityScope {
|
||||
let name = this.expectIdent().str;
|
||||
let scope = this.em.newScope(name, this.currentScope);
|
||||
let scope = this.em.newScope(name, this.currentScope || undefined);
|
||||
this.currentScope = scope;
|
||||
let cmd;
|
||||
while ((cmd = this.expectTokens(['entity', 'comment', 'end']).str) != 'end') {
|
||||
@@ -244,14 +254,15 @@ export class ECSCompiler extends Tokenizer {
|
||||
this.annotate(() => this.parseEntity());
|
||||
}
|
||||
if (cmd == 'comment') {
|
||||
this.expectTokenTypes([TokenType.CodeFragment]);
|
||||
this.expectTokenTypes([ECSTokenType.CodeFragment]);
|
||||
}
|
||||
}
|
||||
this.currentScope = scope.parent;
|
||||
this.currentScope = scope.parent || null;
|
||||
return scope;
|
||||
}
|
||||
|
||||
parseEntity() : Entity {
|
||||
if (!this.currentScope) { this.internalError(); throw new Error(); }
|
||||
let name = '';
|
||||
if (this.peekToken().type == TokenType.Ident) {
|
||||
name = this.expectIdent().str;
|
||||
@@ -267,7 +278,7 @@ export class ECSCompiler extends Tokenizer {
|
||||
if (comps.length == 0) this.compileError(`I couldn't find a field named "${name}" for this entity.`)
|
||||
if (comps.length > 1) this.compileError(`I found more than one field named "${name}" for this entity.`)
|
||||
let field = comps[0].fields.find(f => f.name == name);
|
||||
if (!field) this.internalError();
|
||||
if (!field) { this.internalError(); throw new Error(); }
|
||||
this.expectToken('=');
|
||||
let value = this.parseDataValue(field);
|
||||
if (cmd == 'const') this.currentScope.setConstValue(e, comps[0], name, value);
|
||||
@@ -291,10 +302,14 @@ export class ECSCompiler extends Tokenizer {
|
||||
}
|
||||
|
||||
parseEntityRef(reftype?: RefType) : Entity {
|
||||
if (!this.currentScope) { this.internalError(); throw new Error(); }
|
||||
this.expectToken('#');
|
||||
let name = this.expectIdent().str;
|
||||
let eref = this.currentScope.entities.find(e => e.name == name);
|
||||
if (!eref) this.compileError(`I couldn't find an entity named "${name}" in this scope.`)
|
||||
if (!eref) {
|
||||
this.compileError(`I couldn't find an entity named "${name}" in this scope.`)
|
||||
throw new Error();
|
||||
}
|
||||
return eref;
|
||||
}
|
||||
|
||||
|
@@ -26,7 +26,6 @@ export enum TokenType {
|
||||
Ident = 'ident',
|
||||
Comment = 'comment',
|
||||
Ignore = 'ignore',
|
||||
CodeFragment = 'code-fragment',
|
||||
CatchAll = 'catch-all',
|
||||
}
|
||||
|
||||
@@ -49,79 +48,78 @@ function re_escape(rule: TokenRule): string {
|
||||
return `(${rule.regex.source})`;
|
||||
}
|
||||
|
||||
export class Tokenizer {
|
||||
export class TokenizerRuleSet {
|
||||
rules: TokenRule[];
|
||||
regex: RegExp;
|
||||
constructor(rules: TokenRule[]) {
|
||||
this.rules = rules.concat(CATCH_ALL_RULES);
|
||||
var pattern = this.rules.map(re_escape).join('|');
|
||||
this.regex = new RegExp(pattern, "gs"); // global, dotall
|
||||
}
|
||||
}
|
||||
|
||||
export class Tokenizer {
|
||||
ruleset: TokenizerRuleSet;
|
||||
lineindex: number[];
|
||||
path: string;
|
||||
lineno: number;
|
||||
tokens: Token[];
|
||||
lasttoken: Token;
|
||||
errors: WorkerError[];
|
||||
curlabel: string;
|
||||
eol: Token;
|
||||
includeEOL = false;
|
||||
eof: Token;
|
||||
errorOnCatchAll = false;
|
||||
codeFragment : string | null = null;
|
||||
codeFragmentStart : SourceLocation | null = null;
|
||||
|
||||
constructor() {
|
||||
this.lineno = 0;
|
||||
this.errors = [];
|
||||
this.lineno = 0;
|
||||
this.lineindex = [];
|
||||
this.tokens = [];
|
||||
}
|
||||
setTokenRuleSet(ruleset: TokenizerRuleSet) {
|
||||
this.ruleset = ruleset;
|
||||
}
|
||||
setTokenRules(rules: TokenRule[]) {
|
||||
this.rules = rules.concat(CATCH_ALL_RULES);
|
||||
var pattern = this.rules.map(re_escape).join('|');
|
||||
this.regex = new RegExp(pattern, "g");
|
||||
this.setTokenRuleSet(new TokenizerRuleSet(rules));
|
||||
}
|
||||
tokenizeFile(contents: string, path: string) {
|
||||
this.path = path;
|
||||
this.tokens = []; // can't have errors until this is set
|
||||
let txtlines = contents.split(/\n|\r\n?/);
|
||||
txtlines.forEach((line) => this._tokenize(line));
|
||||
this._pushToken({ type: TokenType.EOF, str: "", $loc: { path: this.path, line: this.lineno } });
|
||||
let m;
|
||||
let re = /\n|\r\n?/g;
|
||||
this.lineindex.push(0);
|
||||
while (m = re.exec(contents)) {
|
||||
this.lineindex.push(m.index);
|
||||
}
|
||||
tokenizeLine(line: string) : void {
|
||||
this.lineno++;
|
||||
this._tokenize(line);
|
||||
this._tokenize(contents);
|
||||
this.eof = { type: TokenType.EOF, str: "", $loc: { path: this.path, line: this.lineno } };
|
||||
this.pushToken(this.eof);
|
||||
}
|
||||
_tokenize(line: string): void {
|
||||
this.lineno++;
|
||||
this.eol = { type: TokenType.EOL, str: "", $loc: { path: this.path, line: this.lineno, start: line.length } };
|
||||
_tokenize(text: string): void {
|
||||
// iterate over each token via re_toks regex
|
||||
let m: RegExpMatchArray;
|
||||
while (m = this.regex.exec(line)) {
|
||||
this.lineno = 0;
|
||||
while (m = this.ruleset.regex.exec(text)) {
|
||||
let found = false;
|
||||
// find line #
|
||||
while (m.index >= this.lineindex[this.lineno]) {
|
||||
this.lineno++;
|
||||
}
|
||||
// find out which capture group was matched, and thus token type
|
||||
for (let i = 0; i < this.rules.length; i++) {
|
||||
let rules = this.ruleset.rules;
|
||||
for (let i = 0; i < rules.length; i++) {
|
||||
let s: string = m[i + 1];
|
||||
if (s != null) {
|
||||
found = true;
|
||||
let loc = { path: this.path, line: this.lineno, start: m.index, end: m.index + s.length };
|
||||
let rule = this.rules[i];
|
||||
let rule = rules[i];
|
||||
// add token to list
|
||||
switch (rule.type) {
|
||||
case TokenType.CodeFragment:
|
||||
// TODO: empty code fragment doesn't work
|
||||
if (this.codeFragment != null) {
|
||||
let codeLoc = mergeLocs(this.codeFragmentStart, loc);
|
||||
this._pushToken({ str: this.codeFragment, type: rule.type, $loc: codeLoc });
|
||||
this.codeFragmentStart = null;
|
||||
this.codeFragment = null;
|
||||
} else {
|
||||
loc.line++;
|
||||
this.codeFragmentStart = loc;
|
||||
this.codeFragment = '';
|
||||
return; // don't add any more tokens (TODO: check for trash?)
|
||||
}
|
||||
break;
|
||||
case TokenType.CatchAll:
|
||||
if (this.errorOnCatchAll && this.codeFragment == null) {
|
||||
if (this.errorOnCatchAll) {
|
||||
this.compileError(`I didn't expect the character "${m[0]}" here.`);
|
||||
}
|
||||
default:
|
||||
if (this.codeFragment == null) {
|
||||
this._pushToken({ str: s, type: rule.type, $loc: loc });
|
||||
}
|
||||
this.pushToken({ str: s, type: rule.type, $loc: loc });
|
||||
case TokenType.Comment:
|
||||
case TokenType.Ignore:
|
||||
break;
|
||||
@@ -133,14 +131,8 @@ export class Tokenizer {
|
||||
this.compileError(`Could not parse token: <<${m[0]}>>`)
|
||||
}
|
||||
}
|
||||
if (this.includeEOL) {
|
||||
this._pushToken(this.eol);
|
||||
}
|
||||
if (this.codeFragment != null) {
|
||||
this.codeFragment += line + '\n';
|
||||
}
|
||||
}
|
||||
_pushToken(token: Token) {
|
||||
pushToken(token: Token) {
|
||||
this.tokens.push(token);
|
||||
}
|
||||
addError(msg: string, loc?: SourceLocation) {
|
||||
@@ -161,10 +153,10 @@ export class Tokenizer {
|
||||
}
|
||||
peekToken(lookahead?: number): Token {
|
||||
let tok = this.tokens[lookahead || 0];
|
||||
return tok ? tok : this.eol;
|
||||
return tok ? tok : this.eof;
|
||||
}
|
||||
consumeToken(): Token {
|
||||
let tok = this.lasttoken = (this.tokens.shift() || this.eol);
|
||||
let tok = this.lasttoken = (this.tokens.shift() || this.eof);
|
||||
return tok;
|
||||
}
|
||||
expectToken(str: string, msg?: string): Token {
|
||||
|
@@ -293,7 +293,10 @@ function testCompiler() {
|
||||
let c = new ECSCompiler();
|
||||
try {
|
||||
c.parseFile(`
|
||||
|
||||
// comment
|
||||
/*
|
||||
mju,fjeqowfjqewiofjqe
|
||||
*/
|
||||
component Kernel
|
||||
lines: 0..255
|
||||
bgcolor: 0..255
|
||||
|
@@ -144,26 +144,29 @@ describe('Tokenizer', function() {
|
||||
{ type: 'qstring', regex: /".*?"/ },
|
||||
{ type: 'integer', regex: /[-]?\d+/ },
|
||||
{ type: 'ignore', regex: /\s+/ },
|
||||
{ type: TokenType.CodeFragment, regex: /---/ },
|
||||
]);
|
||||
t.tokenizeFile("\n{\"key\" value\n \"number\" 531\n \"f\" (fn [x] (+ x 2))}\n", "test.file");
|
||||
t.tokenizeFile("a\n{\"key\" value\n \"number\" 531\n\n \"f\" (fn [x] (+ x 2))}\n", "test.file");
|
||||
assert.strictEqual(t.tokens.map(t => t.type).join(' '),
|
||||
'delim qstring ident qstring integer qstring delim ident delim ident delim delim catch-all ident integer delim delim delim eof');
|
||||
'ident delim qstring ident qstring integer qstring delim ident delim ident delim delim catch-all ident integer delim delim delim eof');
|
||||
assert.strictEqual(t.tokens.map(t => t.str).join(' '),
|
||||
'{ "key" value "number" 531 "f" ( fn [ x ] ( + x 2 ) ) } ');
|
||||
assert.strictEqual(19, t.tokens.length);
|
||||
assert.strictEqual('{', t.peekToken().str);
|
||||
assert.strictEqual('{', t.expectToken('{').str);
|
||||
'a { "key" value "number" 531 "f" ( fn [ x ] ( + x 2 ) ) } ');
|
||||
assert.strictEqual(t.tokens.map(t => t.$loc.line).join(' '),
|
||||
'1 2 2 2 3 3 5 5 5 5 5 5 5 5 5 5 5 5 5 6');
|
||||
assert.strictEqual(20, t.tokens.length);
|
||||
assert.strictEqual('a', t.peekToken().str);
|
||||
assert.strictEqual('a', t.expectToken('a').str);
|
||||
t.expectTokens(['foo', '{']);
|
||||
t.pushbackToken(t.consumeToken());
|
||||
assert.strictEqual('"key"', t.consumeToken().str);
|
||||
assert.deepStrictEqual({ 'value': true }, t.parseModifiers(['foo', 'value', 'bar']));
|
||||
assert.deepStrictEqual([], t.errors);
|
||||
t.includeEOL = true;
|
||||
t.tokenizeFile("\n{\"key\" value\n \"number\" 531\n \"f\" (fn [x] (+ x 2))}\n", "test.file");
|
||||
assert.strictEqual(24, t.tokens.length);
|
||||
assert.strictEqual(t.tokens.map(t => t.type).join(' '),
|
||||
'eol delim qstring ident eol qstring integer eol qstring delim ident delim ident delim delim catch-all ident integer delim delim delim eol eol eof');
|
||||
t.includeEOL = false;
|
||||
t = new Tokenizer();
|
||||
t.setTokenRules([
|
||||
{ type: 'ident', regex: /[$A-Za-z_][A-Za-z0-9_-]*/ },
|
||||
{ type: 'delim', regex: /[\(\)\{\}\[\]]/ },
|
||||
{ type: 'code-fragment', regex: /---[^-]*---/ },
|
||||
{ type: 'ignore', regex: /\s+/ },
|
||||
]);
|
||||
t.tokenizeFile("key value ---\nthis is\na fragment\n--- foo", "test.file");
|
||||
assert.strictEqual(t.tokens.map(t => t.type).join(' '),
|
||||
'ident ident code-fragment ident eof');
|
||||
|
Reference in New Issue
Block a user