1
0
mirror of https://github.com/sehugg/8bitworkshop.git synced 2026-03-14 17:16:35 +00:00
Files
8bitworkshop/gen/worker/assembler.js

431 lines
16 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Assembler = void 0;
const isError = (o) => o.error !== undefined;
function hex(v, nd) {
try {
if (!nd)
nd = 2;
if (nd == 8) {
return hex((v >> 16) & 0xffff, 4) + hex(v & 0xffff, 4);
}
var s = v.toString(16).toUpperCase();
while (s.length < nd)
s = "0" + s;
return s;
}
catch (e) {
return v + "";
}
}
function stringToData(s) {
var data = [];
for (var i = 0; i < s.length; i++) {
data[i] = s.charCodeAt(i);
}
return data;
}
class Assembler {
constructor(spec) {
this.ip = 0;
this.origin = 0;
this.linenum = 0;
this.symbols = {};
this.errors = [];
this.outwords = [];
this.asmlines = [];
this.fixups = [];
this.width = 8;
this.codelen = 0;
this.aborted = false;
this.spec = spec;
if (spec) {
this.preprocessRules();
}
}
rule2regex(rule, vars) {
var s = rule.fmt;
if (!s || !(typeof s === 'string'))
throw Error('Each rule must have a "fmt" string field');
if (!rule.bits || !(rule.bits instanceof Array))
throw Error('Each rule must have a "bits" array field');
var varlist = [];
rule.prefix = s.split(/\s+/)[0];
s = s.replace(/\+/g, '\\+');
s = s.replace(/\*/g, '\\*');
s = s.replace(/\s+/g, '\\s+');
s = s.replace(/\[/g, '\\[');
s = s.replace(/\]/g, '\\]');
s = s.replace(/\(/g, '\\(');
s = s.replace(/\)/g, '\\)');
s = s.replace(/\./g, '\\.');
s = s.replace(/\,/g, '\\s*,\\s*'); // TODO?
// TODO: more escapes?
s = s.replace(/~\w+/g, (varname) => {
varname = varname.substr(1);
var v = vars[varname];
varlist.push(varname);
if (!v)
throw Error('Could not find variable definition for "~' + varname + '"');
else if (v.toks)
return '(\\w+)';
else
return '([0-9]+|[$][0-9a-f]+|\\w+)';
});
try {
rule.re = new RegExp('^' + s + '$', 'i');
}
catch (e) {
throw Error("Bad regex for rule \"" + rule.fmt + "\": /" + s + "/ -- " + e);
}
rule.varlist = varlist;
// TODO: check rule constraints
return rule;
}
preprocessRules() {
if (this.spec.width) {
this.width = this.spec.width || 8;
}
for (var rule of this.spec.rules) {
this.rule2regex(rule, this.spec.vars);
}
}
warning(msg, line) {
this.errors.push({ msg: msg, line: line ? line : this.linenum });
}
fatal(msg, line) {
this.warning(msg, line);
this.aborted = true;
}
fatalIf(msg, line) {
if (msg)
this.fatal(msg, line);
}
addBytes(result) {
this.asmlines.push({
line: this.linenum,
offset: this.ip,
nbits: result.nbits
});
var op = result.opcode;
var nb = result.nbits / this.width;
for (var i = 0; i < nb; i++) {
if (this.width < 32)
this.outwords[this.ip++ - this.origin] = (op >> (nb - 1 - i) * this.width) & ((1 << this.width) - 1);
else
this.outwords[this.ip++ - this.origin] = op;
}
}
addWords(data) {
this.asmlines.push({
line: this.linenum,
offset: this.ip,
nbits: this.width * data.length
});
for (var i = 0; i < data.length; i++) {
if (this.width < 32)
this.outwords[this.ip++ - this.origin] = data[i] & ((1 << this.width) - 1);
else
this.outwords[this.ip++ - this.origin] = data[i];
}
}
parseData(toks) {
var data = [];
for (var i = 0; i < toks.length; i++) {
data[i] = this.parseConst(toks[i]);
}
return data;
}
alignIP(align) {
if (align < 1 || align > this.codelen)
this.fatal("Invalid alignment value");
else
this.ip = Math.floor((this.ip + align - 1) / align) * align;
}
parseConst(s, nbits) {
// TODO: check bit length
if (s && s[0] == '$')
return parseInt(s.substr(1), 16);
else
return parseInt(s);
}
swapEndian(x, nbits) {
var y = 0;
while (nbits > 0) {
var n = Math.min(nbits, this.width);
var mask = (1 << n) - 1;
y <<= n;
y |= (x & mask);
x >>>= n;
nbits -= n;
}
return y;
}
buildInstruction(rule, m) {
var opcode = 0;
var oplen = 0;
// iterate over each component of the rule output ("bits")
for (let b of rule.bits) {
let nbits;
let value;
// is a string? then it's a bit constant
// TODO
if (typeof b === "string") {
nbits = b.length;
value = parseInt(b, 2);
}
else {
// is it a slice {a,b,n} or just a number?
var index = typeof b === "number" ? b : b.a;
// it's an indexed variable, look up its variable
var id = m[index + 1];
var v = this.spec.vars[rule.varlist[index]];
if (!v) {
return { error: `Could not find matching identifier for '${m[0]}' index ${index}` };
}
nbits = v.bits;
var shift = 0;
if (typeof b !== "number") {
nbits = b.n;
shift = b.b;
}
// is it an enumerated type? look up the index of its keyword
if (v.toks) {
value = v.toks.indexOf(id);
if (value < 0)
return { error: "Can't use '" + id + "' here, only one of: " + v.toks.join(', ') };
}
else {
// otherwise, parse it as a constant
value = this.parseConst(id, nbits);
// is it a label? add fixup
if (isNaN(value)) {
this.fixups.push({
sym: id, ofs: this.ip, size: v.bits, line: this.linenum,
dstlen: nbits, dstofs: oplen, srcofs: shift,
endian: v.endian,
iprel: !!v.iprel, ipofs: (v.ipofs || 0), ipmul: v.ipmul || 1,
rule, m
});
//console.log(id, shift, oplen, nbits, v.bits);
value = 0;
}
else {
var mask = (1 << v.bits) - 1;
if ((value & mask) != value)
return { error: "Value " + value + " does not fit in " + v.bits + " bits" };
}
}
// if little endian, we need to swap ordering
if (v.endian == 'little')
value = this.swapEndian(value, v.bits);
// is it an array slice? slice the bits
if (typeof b !== "number") {
value = (value >>> shift) & ((1 << b.n) - 1);
}
}
opcode = (opcode << nbits) | value;
oplen += nbits;
}
if (oplen == 0)
this.warning("Opcode had zero length");
else if (oplen > 32)
this.warning("Opcodes > 32 bits not supported");
else if ((oplen % this.width) != 0)
this.warning("Opcode was not word-aligned (" + oplen + " bits)");
return { opcode: opcode, nbits: oplen };
}
loadArch(arch) {
if (this.loadJSON) {
var json = this.loadJSON(arch + ".json");
if (json && json.vars && json.rules) {
this.spec = json;
this.preprocessRules();
}
else {
return ("Could not load arch file '" + arch + ".json'");
}
}
}
parseDirective(tokens) {
var cmd = tokens[0].toLowerCase();
if (cmd == '.define')
this.symbols[tokens[1].toLowerCase()] = { value: tokens[2] };
else if (cmd == '.org')
this.ip = this.origin = parseInt(tokens[1]);
else if (cmd == '.len')
this.codelen = parseInt(tokens[1]);
else if (cmd == '.width')
this.width = parseInt(tokens[1]);
else if (cmd == '.arch')
this.fatalIf(this.loadArch(tokens[1]));
else if (cmd == '.include')
this.fatalIf(this.loadInclude(tokens[1]));
else if (cmd == '.module')
this.fatalIf(this.loadModule(tokens[1]));
else if (cmd == '.data')
this.addWords(this.parseData(tokens.slice(1)));
else if (cmd == '.string')
this.addWords(stringToData(tokens.slice(1).join(' ')));
else if (cmd == '.align')
this.alignIP(this.parseConst(tokens[1]));
else
this.warning("Unrecognized directive: " + tokens);
}
assemble(line) {
this.linenum++;
// remove comments
line = line.replace(/[;].*/g, '').trim();
// is it a directive?
if (line[0] == '.') {
var tokens = line.split(/\s+/);
this.parseDirective(tokens);
return;
}
// make it lowercase
line = line.toLowerCase();
// find labels
line = line.replace(/(\w+):/, (_label, label) => {
this.symbols[label] = { value: this.ip };
return ''; // replace label with blank
});
line = line.trim();
if (line == '')
return; // empty line
// look at each rule in order
if (!this.spec) {
this.fatal("Need to load .arch first");
return;
}
var lastError;
for (var rule of this.spec.rules) {
var m = rule.re.exec(line);
if (m) {
var result = this.buildInstruction(rule, m);
if (!isError(result)) {
this.addBytes(result);
return result;
}
else {
lastError = result.error;
}
}
}
this.warning(lastError ? lastError : ("Could not decode instruction: " + line));
}
applyFixup(fix, sym) {
// Calculate the word offset where we'll apply this fixup
// fix.ofs is the instruction address, fix.dstofs is bit position within instruction
var ofs = fix.ofs + Math.floor(fix.dstofs / this.width);
// Create mask for the full symbol size (used for range checking)
var mask = ((1 << fix.size) - 1);
// Get the symbol's value (e.g., target address for a branch/jump)
var value = this.parseConst(sym.value + "", fix.dstlen);
// Handle PC-relative addressing (branches, relative jumps)
// Converts absolute address to relative offset from current instruction
// value = (target - current_pc) * ipmul - ipofs
// - ipmul: multiplier for instruction units vs byte units (e.g., 4 for word-addressed)
// - ipofs: additional offset adjustment (e.g., for architectures with PC+offset)
if (fix.iprel)
value = (value - fix.ofs) * fix.ipmul - fix.ipofs;
// Range check: ensure value fits in the destination field
// Only check when not extracting a slice (srcofs == 0)
if (fix.srcofs == 0 && (value > mask || value < -mask))
this.warning("Symbol " + fix.sym + " (" + value + ") does not fit in " + fix.dstlen + " bits", fix.line);
//console.log(hex(value,8), fix.srcofs, fix.dstofs, fix.dstlen);
// Extract bit slice if needed (e.g., bits [12:5] from a 13-bit immediate)
// srcofs is the starting bit position to extract from the value
if (fix.srcofs > 0)
value >>>= fix.srcofs;
// Mask to only the bits we want to insert (dstlen bits)
value &= (1 << fix.dstlen) - 1;
// Position the value within the instruction word
// For 32-bit width: shift value left to align with destination bit position
// dstofs is counted from MSB, so we shift to put our bits in the right place
// TODO: make it work for all widths
if (this.width == 32) {
var shift = 32 - fix.dstofs - fix.dstlen;
value <<= shift;
}
// Apply the fixup to the output
// TODO: check range
if (fix.size <= this.width) {
// Simple case: fixup fits in one word, just XOR it in
this.outwords[ofs - this.origin] ^= value;
}
else {
// Complex case: multi-byte fixup (e.g., 32-bit immediate in 8-bit words)
// swap if we want big endian (we'll apply in LSB first order)
if (fix.endian == 'big')
value = this.swapEndian(value, fix.size);
// Apply fixup across multiple words
while (value) {
// Extract the low bits for this word
const v = value & ((1 << this.width) - 1);
// Check for overlap (trying to set bits that are already set)
// TODO: check against mask
if (v & this.outwords[ofs - this.origin]) {
this.warning(`Instruction bits overlapped at bits ${fix.dstofs}:${fix.dstofs + fix.dstlen - 1}: ${fix.rule.fmt} -> "${fix.sym}" ${hex(this.outwords[ofs - this.origin], 8)} & ${hex(v, 8)}`, fix.line);
}
else {
this.outwords[ofs - this.origin] ^= v;
}
// Move to next word
value >>>= this.width;
ofs++;
}
}
}
finish() {
// apply fixups
for (var i = 0; i < this.fixups.length; i++) {
var fix = this.fixups[i];
var sym = this.symbols[fix.sym];
if (sym) {
this.applyFixup(fix, sym);
}
else {
this.warning("Symbol '" + fix.sym + "' not found");
}
}
// update asmlines
for (var i = 0; i < this.asmlines.length; i++) {
var al = this.asmlines[i];
al.insns = '';
for (var j = 0; j < al.nbits / this.width; j++) {
var word = this.outwords[al.offset + j - this.origin];
if (j > 0)
al.insns += ' ';
al.insns += hex(word, this.width / 4);
}
}
while (this.outwords.length < this.codelen) {
this.outwords.push(0);
}
this.fixups = [];
return this.state();
}
assembleFile(text) {
var lines = text.split(/\n/g);
for (var i = 0; i < lines.length && !this.aborted; i++) {
try {
this.assemble(lines[i]);
}
catch (e) {
console.log(e);
this.fatal("Exception during assembly: " + e);
}
}
return this.finish();
}
state() {
return { ip: this.ip, line: this.linenum, origin: this.origin, codelen: this.codelen,
intermediate: {}, // TODO: listing, symbols?
output: this.outwords,
lines: this.asmlines,
errors: this.errors,
fixups: this.fixups };
}
}
exports.Assembler = Assembler;
//# sourceMappingURL=assembler.js.map