diff --git a/.travis.yml b/.travis.yml index a093733..7ab0a36 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,19 @@ dist: bionic osx_image: xcode10 language: cpp + +addons: + apt: + packages: re2c + homebrew: + packages: re2c + os: - osx - linux + compiler: - clang - gcc script: make all - - diff --git a/Makefile b/Makefile index 04d1d73..da5749f 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ clean: o: mkdir o -merlin-link: o/link.o o/mapped_file.o o/omf.o o/set_file_type.o afp/libafp.a +merlin-link: o/main.o o/link.o o/script.o o/mapped_file.o o/omf.o o/set_file_type.o afp/libafp.a $(LINK.o) $^ $(LDLIBS) -o $@ o/mapped_file.o : mapped_file.cpp mapped_file.h unique_resource.h diff --git a/link.cpp b/link.cpp index 883fbc5..d953a98 100644 --- a/link.cpp +++ b/link.cpp @@ -8,17 +8,13 @@ #include #include -/* old version of stdlib have this stuff in utility */ -#if __has_include() -#define HAVE_CHARCONV -#include -#endif #include #include #include #include -#include +#include +#include #include #include @@ -31,6 +27,8 @@ #include "omf.h" #include "rel.h" +#include "link.h" +#include "script.h" void save_omf(const std::string &path, std::vector &segments, bool compress, bool expressload); int set_file_type(const std::string &path, uint16_t file_type, uint32_t aux_type, std::error_code &ec); @@ -40,35 +38,148 @@ void set_file_type(const std::string &path, uint16_t file_type, uint32_t aux_typ typedef std::basic_string_view byte_view; -struct symbol { - std::string name; - std::string file; - uint32_t value = 0; - unsigned id = 0; - unsigned count = 0; - - bool absolute = false; - bool defined = false; -}; - - -std::unordered_map symbol_map; -std::vector symbol_table; struct pending_reloc : public omf::reloc { unsigned id = 0; }; -std::vector relocations; -std::vector segments; +struct cookie { + std::string file; + std::vector remap; + + uint32_t begin = 0; + uint32_t end = 0; +}; + + +namespace { + + + std::unordered_map symbol_map; + std::vector symbol_table; + + std::vector segments; + std::vector> relocations; + + std::unordered_map file_types = { + + { "NON", 0x00 }, + { "BAD", 0x01 }, + { "BIN", 0x06 }, + { "TXT", 0x04 }, + { "DIR", 0x0f }, + { "ADB", 0x19 }, + { "AWP", 0x1a }, + { "ASP", 0x1b }, + { "GSB", 0xab }, + { "TDF", 0xac }, + { "BDF", 0xad }, + { "SRC", 0xb0 }, + { "OBJ", 0xb1 }, + { "LIB", 0xb2 }, + { "S16", 0xb3 }, + { "RTL", 0xb4 }, + { "EXE", 0xb5 }, + { "PIF", 0xb6 }, + { "TIF", 0xb7 }, + { "NDA", 0xb8 }, + { "CDA", 0xb9 }, + { "TOL", 0xba }, + { "DRV", 0xbb }, + { "DOC", 0xbf }, + { "PNT", 0xc0 }, + { "PIC", 0xc1 }, + { "FON", 0xcb }, + { "PAS", 0xef }, + { "CMD", 0xf0 }, + { "LNK", 0xf8 }, + { "BAS", 0xfc }, + { "VAR", 0xfd }, + { "REL", 0xfe }, + { "SYS", 0xff }, + + }; + +} + + +/* + Variable types: + + linker symbol table includes =, EQU, GEQ, and KBD + + + GEQ - global absolute label, in effect for all subsequent asms. + inhibits KBD, otherwise causes duplicate symbol errors during assembly. + + KBD - same as GEQ + + EQU - same as GEQ BUT symbol is discarded after ASM (ie, only in effect for 1 assembly) + + = - internal to link script (DO, etc). not passed to assembler. not passed to linker. + + + POS - current offset + LEN - length of last linked file + +a = assembler +l = linker +c = command file + + a l c + EQU y n n + = n n y + GEQ y y y + KBD y y y + POS n y n + LEN n y n + +seems like it might be nice for POS and LEN to be available in the command file, eg + + POS xxx + DO xxx>4096 + ERR too big + ELS + DS 4096-xxx + FIN + + */ + + +namespace { + /* script related */ + + unsigned lkv = 1; + unsigned ver = 2; + unsigned ftype = 0xb3; + unsigned atype = 0x0000; + unsigned kind = 0x0000; + unsigned sav = 0; + unsigned lnk = 0; + bool end = false; + bool fas = false; + int ovr = OVR_OFF; + + size_t pos_offset = 0; + size_t len_offset = 0; + + /* do/els/fin stuff */ + uint32_t active_bits = 1; + bool active = true; + + std::unordered_map local_symbol_table; + +} + /* nb - pointer may be invalidated by next call */ -symbol *find_symbol(const std::string &name) { +symbol *find_symbol(const std::string &name, bool insert) { auto iter = symbol_map.find(name); if (iter != symbol_map.end()) return &symbol_table[iter->second]; + if (!insert) return nullptr; unsigned id = symbol_table.size(); symbol_map.emplace(name, id); @@ -79,18 +190,39 @@ symbol *find_symbol(const std::string &name) { return &rv; } +void define(std::string name, uint32_t value, int type) { + + bool warn = false; + if (type & 4) { + /* command script */ + auto iter = local_symbol_table.find(name); + if (iter == local_symbol_table.end()) { + local_symbol_table.emplace(std::make_pair(name, value)); + } else if (iter->second != value) { + warn = true; + } + } + if (type & 2) { + /* linker */ + auto e = find_symbol(name, true); + if (e->defined) { + if (!e->absolute || e->value != value) { + warn = true; + } + } else { + e->absolute = true; + e->defined = true; + e->file = "-D"; + e->value = value; + } + } + if (warn) warnx("duplicate symbol %s", name.c_str()); + +} -struct cookie { - std::string file; - std::vector remap; - //std::vector> zero; - uint32_t begin = 0; - uint32_t end = 0; -}; - -void process_labels(byte_view &data, cookie &cookie) { +static void process_labels(byte_view &data, cookie &cookie) { for(;;) { assert(data.size()); @@ -128,6 +260,7 @@ void process_labels(byte_view &data, cookie &cookie) { } e->defined = true; e->file = cookie.file; + e->segment = segments.size() - 1; /* 1-based */ if (flag & SYMBOL_ABSOLUTE) { e->absolute = true; e->value = value; @@ -144,9 +277,10 @@ void process_labels(byte_view &data, cookie &cookie) { } -void process_reloc(byte_view &data, cookie &cookie) { +static void process_reloc(byte_view &data, cookie &cookie) { auto &seg = segments.back(); + auto &pending = relocations.back(); for(;;) { assert(data.size()); @@ -236,7 +370,7 @@ void process_reloc(byte_view &data, cookie &cookie) { r.shift = shift; symbol_table[r.id].count += 1; - relocations.emplace_back(r); + pending.emplace_back(r); } else { omf::reloc r; r.size = size; @@ -247,28 +381,15 @@ void process_reloc(byte_view &data, cookie &cookie) { seg.relocs.emplace_back(r); } /* clear out the inline relocation data */ - for(unsigned i = 0; i < size; ++i) { + for (unsigned i = 0; i < size; ++i) { seg.data[offset + i] = 0; } //cookie.zero.emplace_back(std::make_pair(offset, size)); } } -/* -void add_libraries() { - auto iter = libs.begin(); - auto end = libs.end(); - for(;;) { - - - - } -} -*/ - - -void process_unit(const std::string &path) { +static void process_unit(const std::string &path) { cookie cookie; /* skip over relocs, do symbols first */ @@ -298,7 +419,7 @@ void process_unit(const std::string &path) { errx(1, "Invalid aux type %s", path.c_str()); } - omf::segment &seg = segments.back(); + auto &seg = segments.back(); cookie.begin = seg.data.size(); cookie.end = cookie.begin + offset; @@ -324,47 +445,99 @@ void process_unit(const std::string &path) { /* now relocations */ process_reloc(rr, cookie); + + // LEN support + len_offset = offset; } +static void import(const std::string &path, const std::string &name) { -void resolve(void) { + std::error_code ec; + mapped_file mf(path, mapped_file::readonly, ec); + if (ec) { + errx(1, "Unable to open %s: %s", path.c_str(), ec.message().c_str()); + } - /* this needs to be updated if supporting multiple segments */ auto &seg = segments.back(); - for (auto &r : relocations) { - assert(r.id < symbol_map.size()); - const auto &e = symbol_table[r.id]; - - /* if this is an absolute value, do the math */ - if (!e.defined) { - warnx("%s is not defined", e.name.c_str()); - continue; - } - - if (e.absolute) { - uint32_t value = e.value + r.value; - /* shift is a uint8_t so negating doesn't work right */ - value >>= -(int8_t)r.shift; - - unsigned offset = r.offset; - unsigned size = r.size; - while (size--) { - seg.data[offset++] = value & 0xff; - value >>= 8; - } - continue; - } - - r.value += e.value; - seg.relocs.emplace_back(r); + // check for duplicate label. + auto e = find_symbol(name); + if (e->defined) { + warnx("Duplicate symbol %s", name.c_str()); + return; } - relocations.clear(); - /* sort them */ - std::sort(seg.relocs.begin(), seg.relocs.end(), [](const omf::reloc &a, const omf::reloc &b){ - return a.offset < b.offset; - }); + e->file = path; + e->defined = true; + e->value = seg.data.size(); + e->segment = segments.size()-1; + + seg.data.insert(seg.data.end(), mf.data(), mf.data() + mf.size()); + + // LEN support + len_offset = mf.size(); +} + +static void resolve(void) { + + for (unsigned seg_num = 0; seg_num < segments.size(); ++seg_num) { + + auto &seg = segments[seg_num]; + auto &pending = relocations[seg_num]; + + seg.segnum = seg_num + 1; + + for (auto &r : pending) { + assert(r.id < symbol_map.size()); + const auto &e = symbol_table[r.id]; + + /* if this is an absolute value, do the math */ + if (!e.defined) { + warnx("%s is not defined", e.name.c_str()); + continue; + } + + if (e.absolute) { + uint32_t value = e.value + r.value; + /* shift is a uint8_t so negating doesn't work right */ + value >>= -(int8_t)r.shift; + + unsigned offset = r.offset; + unsigned size = r.size; + while (size--) { + seg.data[offset++] = value & 0xff; + value >>= 8; + } + continue; + } + + /* e.segment is 0-based */ + if (e.segment == seg_num) { + r.value += e.value; + seg.relocs.emplace_back(r); + continue; + } + + omf::interseg inter; + inter.size = r.size; + inter.shift = r.shift; + inter.offset = r.offset; + inter.segment = e.segment + 1; /* 1-based */ + inter.segment_offset = r.value + e.value; + + seg.intersegs.emplace_back(inter); + } + pending.clear(); + + /* sort them */ + std::sort(seg.relocs.begin(), seg.relocs.end(), [](const auto &a, const auto &b){ + return a.offset < b.offset; + }); + + std::sort(seg.intersegs.begin(), seg.intersegs.end(), [](const auto &a, const auto &b){ + return a.offset < b.offset; + }); + } } static void print_symbols2(void) { @@ -377,7 +550,7 @@ static void print_symbols2(void) { } } -void print_symbols(void) { +static void print_symbols(void) { if (symbol_table.empty()) return; @@ -404,113 +577,420 @@ void print_symbols(void) { -void usage(int ex) { - fputs("merlin-link [-o outfile] infile...\n", stderr); - exit(ex); -} +void finish(void) { -/* older std libraries lack charconv and std::from_chars */ -bool parse_number(const char *begin, const char *end, uint32_t &value, int base = 10) { + resolve(); + print_symbols(); -#if defined(HAVE_CHARCONV) - auto r = std::from_chars(begin, end, value, base); - if (r.ec != std::errc() || r.ptr != end) return false; -#else - auto xerrno = errno; - errno = 0; - char *ptr = nullptr; - value = std::strtoul(begin, &ptr, base); - std::swap(errno, xerrno); - if (xerrno || ptr != end) { - return false; + if (save_file.empty()) save_file = "gs.out"; + try { + save_omf(save_file, segments, compress, express); + set_file_type(save_file, ftype, atype); + } catch (std::exception &ex) { + errx(EX_OSERR, "%s: %s", save_file.c_str(), ex.what()); } -#endif - return true; } -static void add_define(std::string str) { - /* -D key[=value] - value = 0x, $, % or base 10 */ - uint32_t value = 0; +static bool op_needs_label(opcode_t op) { + switch (op) { + case OP_KBD: + case OP_EQ: + case OP_EQU: + case OP_GEQ: + return true; + default: + return false; + } +} - auto ix = str.find('='); - if (ix == 0) usage(EX_USAGE); - if (ix == str.npos) { - value = 1; - } else { +static bool op_after_end(opcode_t op) { + switch(op) { + case OP_END: + case OP_CMD: + return true; + default: + return false; + } +} - int base = 10; - auto pos = ++ix; - char c = str[pos]; /* returns 0 if == size */ +extern uint32_t number_operand(const char *cursor, int flags = OP_REQUIRED); +extern uint32_t number_operand(const char *cursor, const std::unordered_map &, int flags = OP_REQUIRED); +extern int ovr_operand(const char *cursor); +extern std::string label_operand(const char *cursor, int flags = OP_REQUIRED); +extern std::string string_operand(const char *cursor, int flags = OP_REQUIRED); +extern std::string path_operand(const char *cursor, int flags = OP_REQUIRED); - switch(c) { - case '%': - base = 2; ++pos; break; - case '$': - base = 16; ++pos; break; - case '0': - c = str[pos+1]; - if (c == 'x' || c == 'X') { - base = 16; pos += 2; - } - break; +extern void no_operand(const char *cursor); + +static std::string basename(const std::string &str) { + + auto ix = str.find_last_of("/:"); + if (ix == str.npos) return str; + return str.substr(0, ix); +} + +/* fixup GS/OS strings. */ +static void fix_path(std::string &s) { + for (char &c : s) + if (c == ':') c = '/'; +} + +/* + SEG name -> undocumented? command to set the OMF segment name (linker 3 only) + + */ +void evaluate(label_t label, opcode_t opcode, const char *cursor) { + + // todo - should move operand parsing to here. + + switch(opcode) { + case OP_DO: + if (active_bits & 0x80000000) throw std::runtime_error("too much do do"); + active_bits <<= 1; + if (active) { + uint32_t value = number_operand(cursor, local_symbol_table); + active_bits |= value ? 1 : 0; + active = (active_bits & (active_bits + 1)) == 0; + } + return; + break; + + case OP_ELS: + if (active_bits < 2) + throw std::runtime_error("els without do"); + + active_bits ^= 0x01; + active = (active_bits & (active_bits + 1)) == 0; + return; + break; + + case OP_FIN: + active_bits >>= 1; + if (!active_bits) { + active = 1; + throw std::runtime_error("fin without do"); + } + active = (active_bits & (active_bits + 1)) == 0; + return; + break; + default: + break; + } + if (!active) return; + + if (label.empty() && op_needs_label(opcode)) + throw std::runtime_error("Bad label"); + + if (end && !op_after_end(opcode)) return; + + switch(opcode) { + + case OP_END: + if (!end && lkv == 2) { + /* finish up */ + segments.pop_back(); + relocations.pop_back(); + if (!segments.empty()) + finish(); + } + end = true; + break; + + case OP_DAT: { + /* 29-DEC-88 4:18:37 PM */ + time_t t = time(nullptr); + struct tm *tm = localtime(&t); + char buffer[32]; + + strftime(buffer, sizeof(buffer), "%d-%b-%y %l:%M:%S %p", tm); + for (char &c : buffer) c = std::toupper(c); + + fprintf(stdout, "%s\n", buffer); + break; } - if (!parse_number(str.data() + pos, str.data() + str.length(), value, base)) - usage(EX_USAGE); - str.resize(ix-1); + case OP_PFX: { + + std::string path = path_operand(cursor); + fix_path(path); + + int ok = chdir(path.c_str()); + if (ok < 0) + warn("PFX %s", path.c_str()); + break; + } + + + case OP_TYP: + ftype = number_operand(cursor, file_types, OP_REQUIRED | OP_INSENSITIVE); + break; + case OP_ADR: + atype = number_operand(cursor, local_symbol_table); + break; + case OP_KND: { + uint32_t kind = number_operand(cursor, local_symbol_table); + if (!segments.empty()) + segments.back().kind = kind; + break; + } + case OP_ALI: { + uint32_t align = number_operand(cursor, local_symbol_table); + // must be power of 2 or 0 + if ((align & (align-1)) == 0) { + // not yet supported. + } else { + throw std::runtime_error("Bad alignment"); + } + break; + } + + + case OP_LKV: { + /* specify linker version */ + /* 0 = binary, 1 = Linker.GS, 2 = Linker.XL, 3 = convert to OMF object file */ + + uint32_t value = number_operand(cursor, local_symbol_table); + switch (value) { + case 0: throw std::runtime_error("binary linker not supported"); + case 3: throw std::runtime_error("object file linker not supported"); + case 1: + case 2: + lkv = value; + break; + default: + throw std::runtime_error("bad linker version"); + } + break; + } + + case OP_VER: { + /* OMF version, 1 or 2 */ + uint32_t value = number_operand(cursor, local_symbol_table); + + if (value != 2) + throw std::runtime_error("bad OMF version"); + ver = value; + break; + } + + case OP_LNK: { + if (end) throw std::runtime_error("link after end"); + + std::string path = path_operand(cursor); + process_unit(path); + ++lnk; + break; + } + + case OP_IMP: { + + /* qasm addition. import binary file. entry name is filename w/ . converted to _ */ + std::string path = path_operand(cursor); + std::string name = basename(path); + for (char &c : name) { + c = std::isalnum(c) ? std::toupper(c) : '_'; + } + import(path, name); + ++lnk; + break; + } + + case OP_SAV: { + if (end) throw std::runtime_error("save after end"); + + std::string path = path_operand(cursor); + + /* use 1st SAV as the path */ + if (save_file.empty()) save_file = path; + + /* + lkv 0 = binary linker (unsupported) + lkv 1 = 1 segment GS linker + lkv 2 = multi-segment GS linker + lkv 3 = convert REL to OMF object file (unsupported) + */ + + if (lkv == 1 || lkv == 2 || lkv == 3) { + auto &seg = segments.back(); + std::string base = basename(path); + /* merlin link uses a 10-char fixed label */ + //base.resize(10, ' '); + seg.segname = base; + seg.loadname = base; + // seg.kind = kind; + } + if (lkv == 1) { + finish(); + end = true; + } + if (lkv == 2) { + /* add a new segment */ + segments.emplace_back(); + relocations.emplace_back(); + pos_offset = 0; // POS support + } + ++sav; + break; + } + + case OP_KBD: { + char buffer[256]; + + if (!isatty(STDIN_FILENO)) return; + + /* todo if already defined (via -D) don't prompt */ + if (local_symbol_table.find(label) != local_symbol_table.end()) + return; + + std::string prompt = string_operand(cursor, OP_OPTIONAL); + + if (prompt.empty()) prompt = "Give value for " + label; + prompt += ": "; + fputs(prompt.c_str(), stdout); + fflush(stdout); + + char *cp = fgets(buffer, sizeof(buffer), stdin); + + if (!cp) throw std::runtime_error("Bad input"); + + uint32_t value = number_operand(cp, local_symbol_table, true); + + define(label, value, LBL_KBD); + break; + } + + case OP_POS: { + // POS label << sets label = current segment offset + // POS << resets pos byte counter. + + std::string label = label_operand(cursor, OP_OPTIONAL); + if (label.empty()) { + pos_offset = segments.back().data.size(); + } else { + uint32_t value = segments.back().data.size() - pos_offset; + + define(label, value, LBL_POS); + } + break; + } + case OP_LEN: { + // LEN label + // sets label = length of most recent file linked + + std::string label = label_operand(cursor); + uint32_t value = len_offset; + define(label, value, LBL_LEN); + break; + } + + case OP_EQ: + define(label, number_operand(cursor, local_symbol_table), LBL_EQ); + break; + case OP_EQU: + define(label, number_operand(cursor, local_symbol_table), LBL_EQU); + break; + case OP_GEQ: + define(label, number_operand(cursor, local_symbol_table), LBL_GEQ); + break; + + case OP_SEG: { + /* OMF object file linker - set the object file seg name */ + std::string name = label_operand(cursor); + break; + } + + case OP_FAS: + /* fast linker, only 1 file allowed */ + fas = true; + break; + + case OP_OVR: + ovr = ovr_operand(cursor); + break; + + case OP_PUT: { + std::string path = path_operand(cursor); + break; + } + case OP_IF: { + std::string path = path_operand(cursor); + break; + } + + case OP_ASM: { + std::string path = path_operand(cursor); + break; + } + default: + throw std::runtime_error("opcode not yet supported"); } - - symbol *e = find_symbol(str); - if (e->defined && e->absolute && e->value == value) return; - - if (e->defined) { - warnx("%s previously defined", str.c_str()); - return; - } - - e->defined = true; - e->absolute = true; - e->file = "-D"; - e->value = value; } +void process_script(const char *path) { -int main(int argc, char **argv) { + extern void parse_line(const char *); - int c; - std::string gs_out = "gs.out"; - bool express = true; - bool compress = true; + FILE *fp = nullptr; - - while ((c = getopt(argc, argv, "o:D:XC")) != -1) { - switch(c) { - case 'o': - gs_out = optarg; - break; - case 'X': express = false; break; - case 'C': compress = false; break; - case 'D': add_define(optarg); break; - case ':': - case '?': - default: - usage(EX_USAGE); - break; + if (!path || !strcmp(path, "-")) fp = stdin; + else { + fp = fopen(path, "r"); + if (!fp) { + err(1, "Unable to open %s", path); } } - argv += optind; - argc -= optind; - - if (!argc) usage(EX_USAGE); segments.emplace_back(); + relocations.emplace_back(); + + int no = 1; + int errors = 0; + char *line = NULL; + size_t cap = 0; + for(;; ++no) { + + ssize_t len = getline(&line, &cap, fp); + if (len == 0) break; + if (len < 0) break; + + /* strip trailing ws */ + while (len && isspace(line[len-1])) --len; + line[len] = 0; + if (len == 0) continue; + + try { + parse_line(line); + } catch (std::exception &ex) { + if (!active) continue; + + fprintf(stderr, "%s in line: %d\n", ex.what(), no); + fprintf(stderr, "%s\n", line); + if (++errors >= 10) { + fputs("Too many errors, aborting\n", stderr); + break; + } + } + } + if (fp != stdin) + fclose(fp); + free(line); + exit(errors ? EX_DATAERR : 0); +} + + + + +void process_files(int argc, char **argv) { + + segments.emplace_back(); + relocations.emplace_back(); + for (int i = 0; i < argc; ++i) { char *path = argv[i]; try { @@ -519,15 +999,7 @@ int main(int argc, char **argv) { errx(EX_DATAERR, "%s: %s", path, ex.what()); } } - - resolve(); - print_symbols(); - - try { - save_omf(gs_out, segments, compress, express); - set_file_type(gs_out, 0xb3, 0x0000); - exit(0); - } catch (std::exception &ex) { - errx(EX_OSERR, "%s: %s", gs_out.c_str(), ex.what()); - } + finish(); + exit(0); } + diff --git a/link.h b/link.h new file mode 100644 index 0000000..298acfa --- /dev/null +++ b/link.h @@ -0,0 +1,59 @@ +#ifndef link_h +#define link_h + +#include +#include + +extern bool verbose; +extern bool compress; +extern bool express; +extern std::string save_file; + + +struct symbol { + std::string name; + std::string file; + uint32_t value = 0; + unsigned id = 0; + unsigned segment = 0; + unsigned count = 0; + + bool absolute = false; + bool defined = false; +}; + +/* + +a = assembler +l = linker +c = command file + + a l c + EQU y n n + = n n y + GEQ y y y + KBD y y y + POS n y n + LEN n y n + +*/ +enum { + LBL_EQU = (1 << 0), + LBL_GEQ = (1 << 0) | (1 << 1) | (1 << 2), + LBL_KBD = (1 << 0) | (1 << 1) | (1 << 2), + LBL_D = (1 << 0) | (1 << 1) | (1 << 2), + LBL_EQ = (1 << 2), + LBL_POS = (1 << 1), + LBL_LEN = (1 << 1), +}; + +void process_script(const char *argv); +void process_files(int argc, char **argv); + + +symbol *find_symbol(const std::string &name, bool insert = true); + +void define(std::string name, uint32_t value, int type); + + +#endif \ No newline at end of file diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..e23f7e2 --- /dev/null +++ b/main.cpp @@ -0,0 +1,146 @@ + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* old version of stdlib have this stuff in utility */ +#if __has_include() +#define HAVE_CHARCONV +#include +#endif + +#include +#include +#include + +#include "link.h" + +static void usage(int ex) { + + fputs( + "merlin-link [options] infile...\n" + "\noptions:\n" + "-C inhibit SUPER compression\n" + "-D symbol=value define symbol\n" + "-X inhibit expressload segment\n" + "-o outfile specify output file (default gs.out)\n" + "-v be verbose\n" + "\n", + stderr); + + exit(ex); +} + +/* older std libraries lack charconv and std::from_chars */ +static bool parse_number(const char *begin, const char *end, uint32_t &value, int base = 10) { + +#if defined(HAVE_CHARCONV) + auto r = std::from_chars(begin, end, value, base); + if (r.ec != std::errc() || r.ptr != end) return false; +#else + auto xerrno = errno; + errno = 0; + char *ptr = nullptr; + value = std::strtoul(begin, &ptr, base); + std::swap(errno, xerrno); + if (xerrno || ptr != end) { + return false; + } +#endif + + return true; +} + +static void add_define(std::string str) { + /* -D key[=value] + value = 0x, $, % or base 10 */ + + uint32_t value = 0; + + auto ix = str.find('='); + if (ix == 0) usage(EX_USAGE); + if (ix == str.npos) { + value = 1; + } else { + + int base = 10; + auto pos = ++ix; + + char c = str[pos]; /* returns 0 if == size */ + + switch(c) { + case '%': + base = 2; ++pos; break; + case '$': + base = 16; ++pos; break; + case '0': + c = str[pos+1]; + if (c == 'x' || c == 'X') { + base = 16; pos += 2; + } + break; + } + if (!parse_number(str.data() + pos, str.data() + str.length(), value, base)) + usage(EX_USAGE); + + str.resize(ix-1); + } + + + define(str, value, LBL_D); +} + +/* .ends_with() is c++20 */ +static bool is_S(std::string_view sv) { + size_t s = sv.size(); + // && is a sequence point. + return s >= 2 && std::toupper(sv[--s]) == 'S' && sv[--s] == '.'; +} + + +bool verbose = false; +std::string save_file; +bool express = true; +bool compress = true; + +int main(int argc, char **argv) { + + int c; + bool script = false; + + while ((c = getopt(argc, argv, "o:D:XCSv")) != -1) { + switch(c) { + case 'o': + save_file = optarg; + break; + case 'X': express = false; break; + case 'C': compress = false; break; + case 'D': add_define(optarg); break; + case 'v': verbose = true; break; + case 'S': script = true; break; + case ':': + case '?': + default: + usage(EX_USAGE); + break; + } + } + + argv += optind; + argc -= optind; + + if (!script && !argc) usage(EX_USAGE); + if (script && argc > 1) usage(EX_USAGE); + if (argc == 1 && is_S(*argv)) script = true; + + if (script) process_script(argc ? *argv : nullptr); + else process_files(argc, argv); + + exit(0); +} \ No newline at end of file diff --git a/omf.cpp b/omf.cpp index 3d8dc90..6ab1ba5 100644 --- a/omf.cpp +++ b/omf.cpp @@ -63,6 +63,9 @@ struct omf_express_header { #pragma pack(pop) +static_assert(sizeof(omf_header) == 44, "OMF Header not packed"); +static_assert(sizeof(omf_express_header) == 48, "OMF Express Header not packed"); + void push(std::vector &v, uint8_t x) { v.push_back(x); } @@ -201,9 +204,9 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.value; - sr->append(r.offset); for (int i = 0; i < 2; ++i, value >>= 8) data[data_offset + r.offset + i] = value; continue; @@ -217,9 +220,9 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.value; - sr->append(r.offset); for (int i = 0; i < 3; ++i, value >>= 8) data[data_offset + r.offset + i] = value; continue; @@ -231,9 +234,9 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment int n = SUPER_INTERSEG24 + seg.segnum; auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.value; - sr->append(r.offset); for (int i = 0; i < 2; ++i, value >>= 8) data[data_offset + r.offset + i] = value; continue; @@ -265,8 +268,10 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment constexpr int n = SUPER_INTERSEG1; auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.segment_offset; + data[data_offset + r.offset + 0] = value; value >>= 8; data[data_offset + r.offset + 1] = value; value >>= 8; data[data_offset + r.offset + 2] = r.segment; @@ -279,9 +284,9 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment int n = SUPER_INTERSEG12 + r.segment; auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.segment_offset; - sr->append(r.offset); for (int i = 0; i < 2; ++i, value >>= 8) data[data_offset + r.offset + i] = value; continue; @@ -292,9 +297,9 @@ uint32_t add_relocs(std::vector &data, size_t data_offset, omf::segment int n = SUPER_INTERSEG24 + r.segment; auto &sr = ss[n]; if (!sr) sr.emplace(); + sr->append(r.offset); uint32_t value = r.segment_offset; - sr->append(r.offset); for (int i = 0; i < 2; ++i, value >>= 8) data[data_offset + r.offset + i] = value; continue; diff --git a/ops.h b/ops.h index 737a558..de5517a 100644 --- a/ops.h +++ b/ops.h @@ -36,3 +36,4 @@ x(CMD) x(ZIP) x(IMP) x(PFX) +x(SEG) diff --git a/script.h b/script.h new file mode 100644 index 0000000..2beae7e --- /dev/null +++ b/script.h @@ -0,0 +1,30 @@ +#ifndef script_h +#define script_h + +#include +#include + +typedef std::string label_t; + +enum opcode_t { + + OP_NONE = 0, + #define x(op) OP_##op, + #include "ops.h" + #undef x + OP_EQ +}; + +enum { + OVR_NONE = 1, + OVR_ALL = -1, + OVR_OFF = 0 +}; + +enum { + OP_OPTIONAL = 0, + OP_REQUIRED = 1, + OP_INSENSITIVE = 2 +}; + +#endif diff --git a/script.re2c b/script.re2c index d27e5fc..c274d9a 100644 --- a/script.re2c +++ b/script.re2c @@ -18,6 +18,8 @@ #include +#include "script.h" + /*!re2c re2c:define:YYCTYPE = char; @@ -27,185 +29,35 @@ ident = [:<-~][0-~]*; ws = [ \t]; eof = "\x00"; + + number_prefix = [%$0-9]; + ident_prefix = [:-~]; + string_prefix = ['"]; */ -enum { - CMD_NONE = 0, - #define x(op) CMD_##op, - #include "ops.h" - #undef x - CMD_EQ -}; +namespace { + std::unordered_map opcodes = { + #define x(op) { #op, OP_##op }, -static std::unordered_map commands = { - #define x(op) { #op, CMD_##op }, + #include "ops.h" + #undef x - #include "ops.h" - #undef x + /* aliases */ + { "AUX", OP_ADR }, + { "REZ", OP_RES }, + { "LIN", OP_LNK }, + { "KIN", OP_KND }, + { "=", OP_EQ } + }; - /* aliases */ - { "AUX", CMD_ADR }, - { "REZ", CMD_RES }, - { "LIN", CMD_LNK }, - { "KIN", CMD_KND }, - { "=", CMD_EQ } -}; - - -static std::unordered_map types = { - - { "NON", 0x00 }, - { "BAD", 0x01 }, - { "BIN", 0x06 }, - { "TXT", 0x04 }, - { "DIR", 0x0f }, - { "ADB", 0x19 }, - { "AWP", 0x1a }, - { "ASP", 0x1b }, - { "GSB", 0xab }, - { "TDF", 0xac }, - { "BDF", 0xad }, - { "SRC", 0xb0 }, - { "OBJ", 0xb1 }, - { "LIB", 0xb2 }, - { "S16", 0xb3 }, - { "RTL", 0xb4 }, - { "EXE", 0xb5 }, - { "PIF", 0xb6 }, - { "TIF", 0xb7 }, - { "NDA", 0xb8 }, - { "CDA", 0xb9 }, - { "TOL", 0xba }, - { "DRV", 0xbb }, - { "DOC", 0xbf }, - { "PNT", 0xc0 }, - { "PIC", 0xc1 }, - { "FON", 0xcb }, - { "PAS", 0xef }, - { "CMD", 0xf0 }, - { "LNK", 0xf8 }, - { "BAS", 0xfc }, - { "VAR", 0xfd }, - { "REL", 0xfe }, - { "SYS", 0xff }, - -}; - - - -static uint32_t number_operand(const char *YYCURSOR, bool required = true) { - - char *iter = YYCURSOR; - uint32_t rv = 0; - /*!re2c - * { throw std::invalid_argument("bad operand"); } - [;*] | eof { - if (!required) return rv; - throw std::invalid_argument("missing operand"); - } - '%' [01]+ { - ++iter; - for(;iter < YYCURSOR; ++iter) { - rv <<= 1; - rv |= *iter - '0'; - } - goto exit; - } - - '$' [A-Fa-f0-9]+ { - ++iter; - for(;iter < YYCURSOR; ++iter) { - char c = *iter | 0x20; - rv <<= 4; - if (c <= '9') rv |= c - '0'; - else rv |= c - 'a' + 10; - } - goto exit; - } - - [0-9]+ { - for(;iter < YYCURSOR; ++iter) { - rv *= 10; - rv += *iter - '0'; - } - goto exit; - - } - - ident { - std::string s(iter, YYCURSOR); - //look up symbol, verify it's an absolute value, etc - } - */ -exit: - char c = *YYCURSOR; - if (isspace(c) || c == 0) return rv; - - throw std::invalid_argument("bad operand"); } -static uint32_t type_operand(const char *YYCURSOR, bool required = true) { - - char *iter = YYCURSOR; - uint32_t rv = 0; - /*!re2c - * { throw std::invalid_argument("bad operand"); } - - [;*] | eof { - if (!required) return rv; - throw std::invalid_argument("missing operand"); - } - - '%' [01]+ { - ++iter; - for(;iter < YYCURSOR; ++iter) { - rv <<= 1; - rv |= *iter - '0'; - } - goto exit; - } - - '$' [A-Fa-f0-9]+ { - ++iter; - for(;iter < YYCURSOR; ++iter) { - char c = *iter | 0x20; - rv <<= 4; - if (c <= '9') rv |= c - '0'; - else rv |= c - 'a' + 10; - } - goto exit; - } - - [0-9]+ { - for(;iter < YYCURSOR; ++iter) { - rv *= 10; - rv += *iter - '0'; - } - goto exit; - - } - - [A-Za-z][A-Za-z0-9]{2} { - std::string s(iter, YYCURSOR); - for(char &c : s) c = std::toupper(c); - auto iter = types.find(s); - if (iter == types.end) { - throw std::invalid_argument("bad operand"); - } - rv = *iter; - } - */ -exit: - char c = *YYCURSOR; - if (isspace(c) || c == 0) return rv; - - throw std::invalid_argument("bad operand"); -} static int x_number_operand(const char *YYCURSOR) { - char *iter = YYCURSOR; + const char *iter = YYCURSOR; + // const char *YYMARKER = nullptr; uint32_t rv = 0; /*!re2c * { throw std::invalid_argument("bad operand"); } @@ -246,25 +98,31 @@ exit: throw std::invalid_argument("bad operand"); } -static std::string x_label_operand(const char *YYCURSOR) { - char *iter = YYCURSOR; +static std::string x_label_operand(const char *YYCURSOR, bool insensitive) { + const char *iter = YYCURSOR; + // const char *YYMARKER = nullptr; std::string rv; /*!re2c * { throw std::invalid_argument("bad operand"); } ident { - std::string s(iter, YYCURSOR); - //look up symbol, verify it's an absolute value, etc + rv = std::string(iter, YYCURSOR); + if (insensitive) + for (char &c : rv) rv = std::toupper(c); + goto exit; } */ exit: char c = *YYCURSOR; - if (isspace(c) || c == 0) return rv; + if (isspace(c) || c == 0) { + //look up symbol, verify it's an absolute value, etc + return rv; + } throw std::invalid_argument("bad operand"); } static std::string x_string_operand(const char *YYCURSOR) { - char *iter = YYCURSOR; + const char *iter = YYCURSOR; std::string rv; /*!re2c * { throw std::invalid_argument("bad operand"); } @@ -281,21 +139,46 @@ exit: throw std::invalid_argument("bad operand"); } +uint32_t number_operand(const char *YYCURSOR, const std::unordered_map &map, int flags) { + + const char *cp = YYCURSOR; + // const char *YYMARKER = nullptr; + /*!re2c + * { throw std::invalid_argument("bad operand"); } + [;*] | eof { + if (flags & OP_REQUIRED) + throw std::invalid_argument("missing operand"); + return 0; + } + + number_prefix { + return x_number_operand(cp); + } + + ident_prefix { + std::string s = x_label_operand(cp, flags & OP_INSENSITIVE); + auto iter = map.find(s); + if (iter == map.end()) throw std::runtime_error("Bad symbol"); + return iter->second; + } + */ +} -static int ovr_operand(const char *YYCURSOR) { +int ovr_operand(const char *YYCURSOR) { int rv = 0; + const char *YYMARKER = nullptr; /*!re2c * { throw std::invalid_argument("bad operand"); } [;*] | eof { - return 1; + return OVR_NONE; } 'ALL' { - rv = -1; + rv = OVR_ALL; } 'OFF' { - rv = 0; + rv = OVR_OFF; } */ @@ -305,102 +188,99 @@ static int ovr_operand(const char *YYCURSOR) { throw std::invalid_argument("bad operand"); } -static std::string label_operand(const char *YYCURSOR, bool required = true) { - std::string rv; - char *iter = YYCURSOR; +std::string label_operand(const char *YYCURSOR, int flags) { + const char *cp = YYCURSOR; + // const char *YYMARKER = nullptr; /*!re2c * { throw std::invalid_argument("bad operand"); } [;*] | eof { - if (!required) return rv; - throw std::invalid_argument("missing operand"); + if (flags & OP_REQUIRED) + throw std::invalid_argument("missing operand"); + return std::string(); } - ident { - rv = std::string(iter, YYCURSOR); - goto exit; + ident_prefix { + return x_label_operand(cp, flags & OP_INSENSITIVE); } */ - - char c = *YYCURSOR; - if (isspace(c) || c == 0) return rv; - - throw std::invalid_argument("bad operand"); } -static std::string path_operand(const char *YYCURSOR, bool required = true) { +std::string path_operand(const char *YYCURSOR, int flags) { std::string rv; - char *iter = YYCURSOR; + const char *iter = YYCURSOR; + // const char *YYMARKER = nullptr; /*!re2c * { throw std::invalid_argument("bad operand"); } + [;*] | eof { - if (!required) return rv; - throw std::invalid_argument("missing operand"); + if (flags & OP_REQUIRED) + throw std::invalid_argument("missing operand"); + return rv; } + string_prefix { + return x_string_operand(iter); + } + // don't allow leading quotes, eof, or comment chars - [^;*\x00'"][^ \t]* { + [^ \t\x00;*'"][^ \t\x00]* { rv = std::string(iter, YYCURSOR); goto exit; } - ['] [^']* ['] | ["] [^"]* ["] { - rv = std::string(iter+1, YYCURSOR-1); - goto exit; - } */ +exit: char c = *YYCURSOR; if (isspace(c) || c == 0) return rv; throw std::invalid_argument("bad operand"); } - -static void no_operand(const char *YYCURSOR) { +std::string string_operand(const char *YYCURSOR, int flags) { + const char *cp = YYCURSOR; + // const char *YYMARKER = nullptr; /*!re2c * { throw std::invalid_argument("bad operand"); } - [;*] | eof { return } - */ -} - -static std::string string_operand(const char *YYCURSOR, bool required = true) { - - std::string rv; - char *iter = YYCURSOR; - /*!re2c - * { throw std::invalid_argument("bad operand"); } [;*] | eof { - if (!required) return rv; - throw std::invalid_argument("missing operand"); + if (flags & OP_REQUIRED) + throw std::invalid_argument("missing operand"); + return std::string(); } - ['] [^']* ['] | ["] [^"]* ["] { - rv = std::string(iter+1, YYCURSOR-1); - goto exit; + string_prefix { + return x_string_operand(cp); } - */ - char c = *YYCURSOR; - if (isspace(c) || c == 0) return rv; - throw std::invalid_argument("bad operand"); + */ } -static void parse_line(const char *YYCURSOR) { +void no_operand(const char *YYCURSOR) { + /*!re2c + * { throw std::invalid_argument("bad operand"); } + [;*] | eof { return; } + */ +} - unsigned cmd = 0; - std::string label; + +void parse_line(const char *YYCURSOR) { + + label_t label; + opcode_t opcode = OP_NONE; const char *iter = YYCURSOR; + const char *YYMARKER = nullptr; + /*!re2c - * { throw std::invalid_argument("bad label") } + * { throw std::invalid_argument("bad label"); } [;*] | eof { return; } - ws { goto opcode } + ws { goto opcode; } ident / (ws|eof) { - label(iter, YYCURSOR); + label = std::string(iter, YYCURSOR); goto opcode; } @@ -409,27 +289,26 @@ static void parse_line(const char *YYCURSOR) { opcode: - while (isspace(*YYCURSOR)) ++YYCURSOR; iter = YYCURSOR; /*!re2c * { throw std::invalid_argument("bad opcode"); } - [;*]|eof { return 0; } + [;*]|eof { return; } - '=' / (ws|eof) { cmd = CMD_EQ; goto operand; } + '=' / (ws|eof) { opcode = OP_EQ; goto operand; } [A-Za-z]+ / (ws|eof) { - size_t = l YYCURSOR - iter; + size_t l = YYCURSOR - iter; if (l > 3) l = 3; std::string s(iter, iter + l); - for (char &c in s): c = std::toupper(c); - auto iter = commands.find(s); - if (!iter == commands.end()) { + for (char &c : s) c = std::toupper(c); + auto iter = opcodes.find(s); + if (iter == opcodes.end()) { throw std::invalid_argument("bad opcode"); } - cmd = *iter; + opcode = iter->second; goto operand; } */ @@ -439,195 +318,10 @@ operand: while (isspace(*YYCURSOR)) ++YYCURSOR; iter = YYCURSOR; - std::string str_operand; - long int_operand; - - switch(cmd) { - case CMD_LNK: - case CMD_PUT: - case CMD_ASM: - case CMD_SAV: - case CMD_LIB: - case CMD_IF: - case CMD_PFX: - case CMD_IMP: - case CMD_RES: - case CMD_FIL: - str_operand = path_operand(YYCURSOR); - break; - case CMD_ORG: - case CMD_ADR: - case CMD_DS: - case CMD_KND: - case CMD_VER: - case CMD_ALI: - case CMD_LKV: - case CMD_DO: - case CMD_EQU: - case CMD_EQ: - case CMD_GEQ: - int_operand = number_operand(YYCURSOR); - break; - case CMD_TYP: - int_operand = type_operand(YYCURSOR); - break; - case CMD_OVR: - int_operand = ovr_operand(YYCURSOR); - break; - case CMD_POS: - case CMD_LEN: - str_operand = label_operand(YYCURSOR, false); - break; - case CMD_KBD: - str_operand = string_operand(YYCURSOR, false); - break; - - case CMD_CMD: - str_operand = string(YYCURSOR); - break; - - default: - no_operand(YYCURSOR); - break; - } - - switch(cmd) { - - case CMD_NONE: - case CMD_NOL: /* asm: default list off */ - case CMD_PUT: /* check if source file is outdated */ - case CMD_OVR: /* override outdated check and force assembly */ - case CMD_FAS: /* fast linker. only one SAV allowed */ - /* nop */ - break; - - case CMD_LNK: - /* link file ... */ - break; - case CMD_ORG: - /* set the link origin. also used as aux type */ - break; - case CMD_ADR: - /* set the file aux type */ - break; - - case CMD_SAV: - /* save linked file. for xl linker, specifies segment name */ - break; - - case CMD_TYP: - /* set the file type */ - break; - - case CMD_EXT: - /* print addresses of all resolved externals (not just errors) - disabled after each SAV - */ - break; - case CMD_ENT: - /* print the entry list */ - /* flag symbol w ? if unused */ - break; - case CMD_DAT: - /* print the current date and time */ - break; - - case CMD_END: - /* end of command file (optional) */ - break; - - case CMD_LIB: - /* search directory for unresolved symbols */ - break; - - case CMD_LKV: - /* specify linker version */ - /* 0 = binary, 1 = Linker.GS, 2 = Linker.XL, 3 = convert to OMF object file */ - switch (int_operand) { - case 0: throw std::runtime_error("binary linker not supported"); - case 3: throw std::runtime_error("object file linker not supported"); - case 1: - case 2: - /* default file type = S16 */ - break; - default: - throw std::runtime_error("bad linker version"); - } - break; - - case CMD_VER: - /*specify OMF version. 1 or 2 */ - break; - - case CMD_KND: - /* set the OMF kind flag */ - /* 8-bit for v 1, 16-bit for v2 */ - break; - - case CMD_ALI: - /* OMF align field. default = 0 */ - break; - - case CMD_DS: - /* OMF RESSPC field */ - break; - - - case CMD_LEN: - /* Puts "LABEL" in symbol dictionary a san ENTry whose value is equal to the number of bytes of the last linked file. */ - break; - case CMD_POS: - - - - case CMD_ASM: - default: - throw std::runtime_error("opcode not supported"); - } - - + void evaluate(label_t label, opcode_t opcode, const char *); + evaluate(label, opcode, YYCURSOR); } -int process_link_file(const std::string &path) { - FILE *fp; - fp = fopen(path, "r"); - if (!fp) { - warn("Unable to open %s", path.c_str()); - return -1; - } - int no = 1; - int errors = 0; - const char *line = NULL; - size_t cap = 0; - for(;; ++no) { - - ssize_t len = getline(&line, &cap, fp); - if (len == 0) break; - if (len < 0) { - warn("read error"); - ++errors; - break; - } - /* strip trailing ws */ - while (len && isspace(line[len-1])) --len; - line[len] = 0; - if (len == 0) continue; - - try { - parse_line(line); - } catch (std::exception &ex) { - fprintf(stderr, "%s in line: %d\n", ex.what(), no); - fprintf(stderr, "%s\n", line); - if (++errors >= 10) { - fputs("Too many errors, aborting\n", stderr); - break; - } - } - } - fclose(fp); - free(line); - return errors ? -1 : 0; -}