// // x65.cpp // // // Created by Carl-Henrik Skårstedt on 9/23/15. // // // A simple 6502 assembler // // // The MIT License (MIT) // // Copyright (c) 2015 Carl-Henrik Skårstedt // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software // and associated documentation files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, publish, distribute, // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or // substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Details, source and documentation at https://github.com/Sakrac/x65. // // "struse.h" can be found at https://github.com/Sakrac/struse, only the header file is required. // #define _CRT_SECURE_NO_WARNINGS // Windows shenanigans #define STRUSE_IMPLEMENTATION // include implementation of struse in this file #include "struse.h" // https://github.com/Sakrac/struse/blob/master/struse.h #include #include #include #include // if the number of resolved labels exceed this in one late eval then skip // checking for relevance and just eval all unresolved expressions. #define MAX_LABELS_EVAL_ALL 16 // Max number of nested scopes (within { and }) #define MAX_SCOPE_DEPTH 32 // Max number of nested conditional expressions #define MAX_CONDITIONAL_DEPTH 64 // The maximum complexity of expressions to be evaluated #define MAX_EVAL_VALUES 32 #define MAX_EVAL_OPER 64 // Max capacity of each label pool #define MAX_POOL_RANGES 4 #define MAX_POOL_BYTES 128 // Max number of exported binary files from a single source #define MAX_EXPORT_FILES 64 // Maximum number of opcodes, aliases and directives #define MAX_OPCODES_DIRECTIVES 320 // minor variation of 6502 #define NUM_ILLEGAL_6502_OPS 21 // minor variation of 65C02 #define NUM_WDC_65C02_SPECIFIC_OPS 18 // To simplify some syntax disambiguation the preferred // ruleset can be specified on the command line. enum AsmSyntax { SYNTAX_SANE, SYNTAX_MERLIN }; // Internal status and error type enum StatusCode { STATUS_OK, // everything is fine STATUS_RELATIVE_SECTION, // value is relative to a single section STATUS_NOT_READY, // label could not be evaluated at this time STATUS_XREF_DEPENDENT, // evaluated but relied on an XREF label to do so STATUS_NOT_STRUCT, // return is not a struct. STATUS_EXPORT_NO_CODE_OR_DATA_SECTION, FIRST_ERROR, ERROR_UNDEFINED_CODE = FIRST_ERROR, ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION, ERROR_TOO_MANY_VALUES_IN_EXPRESSION, ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION, ERROR_UNBALANCED_RIGHT_PARENTHESIS, ERROR_EXPRESSION_OPERATION, ERROR_EXPRESSION_MISSING_VALUES, ERROR_INSTRUCTION_NOT_ZP, ERROR_INVALID_ADDRESSING_MODE, ERROR_BRANCH_OUT_OF_RANGE, ERROR_LABEL_MISPLACED_INTERNAL, ERROR_BAD_ADDRESSING_MODE, ERROR_UNEXPECTED_CHARACTER_IN_ADDRESSING_MODE, ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT, ERROR_MODIFYING_CONST_LABEL, ERROR_OUT_OF_LABELS_IN_POOL, ERROR_INTERNAL_LABEL_POOL_ERROR, ERROR_POOL_RANGE_EXPRESSION_EVAL, ERROR_LABEL_POOL_REDECLARATION, ERROR_POOL_LABEL_ALREADY_DEFINED, ERROR_STRUCT_ALREADY_DEFINED, ERROR_REFERENCED_STRUCT_NOT_FOUND, ERROR_BAD_TYPE_FOR_DECLARE_CONSTANT, ERROR_REPT_COUNT_EXPRESSION, ERROR_HEX_WITH_ODD_NIBBLE_COUNT, ERROR_DS_MUST_EVALUATE_IMMEDIATELY, ERROR_NOT_AN_X65_OBJECT_FILE, ERROR_COULD_NOT_INCLUDE_FILE, ERROR_STOP_PROCESSING_ON_HIGHER, // errors greater than this will stop execution ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY, ERROR_TOO_DEEP_SCOPE, ERROR_UNBALANCED_SCOPE_CLOSURE, ERROR_BAD_MACRO_FORMAT, ERROR_ALIGN_MUST_EVALUATE_IMMEDIATELY, ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION, ERROR_MACRO_ARGUMENT, ERROR_CONDITION_COULD_NOT_BE_RESOLVED, ERROR_ENDIF_WITHOUT_CONDITION, ERROR_ELSE_WITHOUT_IF, ERROR_STRUCT_CANT_BE_ASSEMBLED, ERROR_ENUM_CANT_BE_ASSEMBLED, ERROR_UNTERMINATED_CONDITION, ERROR_REPT_MISSING_SCOPE, ERROR_LINKER_MUST_BE_IN_FIXED_ADDRESS_SECTION, ERROR_LINKER_CANT_LINK_TO_DUMMY_SECTION, ERROR_UNABLE_TO_PROCESS, ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE, ERROR_CPU_NOT_SUPPORTED, ERROR_CANT_APPEND_SECTION_TO_TARGET, ERROR_ZEROPAGE_SECTION_OUT_OF_RANGE, ERROR_NOT_A_SECTION, ERROR_CANT_REASSIGN_FIXED_SECTION, ERROR_CANT_LINK_ZP_AND_NON_ZP, ERROR_OUT_OF_MEMORY, ERROR_CANT_WRITE_TO_FILE, ERROR_ABORTED, ERROR_CONDITION_TOO_NESTED, STATUSCODE_COUNT }; // The following strings are in the same order as StatusCode const char *aStatusStrings[STATUSCODE_COUNT] = { "ok", "relative section", "not ready", "XREF dependent result", "name is not a struct", "Exporting binary without code or data section", "Undefined code", "Unexpected character in expression", "Too many values in expression", "Too many operators in expression", "Unbalanced right parenthesis in expression", "Expression operation", "Expression missing values", "Instruction can not be zero page", "Invalid addressing mode for instruction", "Branch out of range", "Internal label organization mishap", "Bad addressing mode", "Unexpected character in addressing mode", "Unexpected label assignment format", "Changing value of label that is constant", "Out of labels in pool", "Internal label pool release confusion", "Label pool range evaluation failed", "Label pool was redeclared within its scope", "Pool label already defined", "Struct already defined", "Referenced struct not found", "Declare constant type not recognized (dc.?)", "rept count expression could not be evaluated", "hex must be followed by an even number of hex numbers", "DS directive failed to evaluate immediately", "File is not a valid x65 object file", "Failed to read include file", "Errors after this point will stop execution", "Target address must evaluate immediately for this operation", "Scoping is too deep", "Unbalanced scope closure", "Unexpected macro formatting", "Align must evaluate immediately", "Out of memory for macro expansion", "Problem with macro argument", "Conditional could not be resolved", "#endif encountered outside conditional block", "#else or #elif outside conditional block", "Struct can not be assembled as is", "Enum can not be assembled as is", "Conditional assembly (#if/#ifdef) was not terminated in file or macro", "rept is missing a scope ('{ ... }')", "Link can only be used in a fixed address section", "Link can not be used in dummy sections", "Can not process this line", "Unexpected target offset for reloc or late evaluation", "CPU is not supported", "Can't append sections", "Zero page / Direct page section out of range", "Attempting to assign an address to a non-existent section", "Attempting to assign an address to a fixed address section", "Can not link a zero page section with a non-zp section", "Out of memory while building", "Can not write to file", "Assembly aborted", "Condition too deeply nested", }; // Assembler directives enum AssemblerDirective { AD_CPU, // CPU: Assemble for this target, AD_ORG, // ORG: Assemble as if loaded at this address AD_EXPORT, // EXPORT: export this section or disable export AD_LOAD, // LOAD: If applicable, instruct to load at this address AD_SECTION, // SECTION: Enable code that will be assigned a start address during a link step AD_LINK, // LINK: Put sections with this name at this address (must be ORG / fixed address section) AD_XDEF, // XDEF: Externally declare a symbol AD_XREF, // XREF: Reference an external symbol AD_INCOBJ, // INCOBJ: Read in an object file saved from a previous build AD_ALIGN, // ALIGN: Add to address to make it evenly divisible by this AD_MACRO, // MACRO: Create a macro AD_EVAL, // EVAL: Print expression to stdout during assemble AD_BYTES, // BYTES: Add 8 bit values to output AD_WORDS, // WORDS: Add 16 bit values to output AD_DC, // DC.B/DC.W: Declare constant (same as BYTES/WORDS) AD_TEXT, // TEXT: Add text to output AD_INCLUDE, // INCLUDE: Load and assemble another file at this address AD_INCBIN, // INCBIN: Load and directly insert another file at this address AD_IMPORT, // IMPORT: Include or Incbin or Incobj or Incsym AD_CONST, // CONST: Prevent a label from mutating during assemble AD_LABEL, // LABEL: Create a mutable label (optional) AD_STRING, // STRING: Declare a string symbol AD_UNDEF, // UNDEF: remove a string or a label AD_INCSYM, // INCSYM: Reference labels from another assemble AD_LABPOOL, // POOL: Create a pool of addresses to assign as labels dynamically AD_IF, // #IF: Conditional assembly follows based on expression AD_IFDEF, // #IFDEF: Conditional assembly follows based on label defined or not AD_ELSE, // #ELSE: Otherwise assembly AD_ELIF, // #ELIF: Otherwise conditional assembly follows AD_ENDIF, // #ENDIF: End a block of #IF/#IFDEF AD_STRUCT, // STRUCT: Declare a set of labels offset from a base address AD_ENUM, // ENUM: Declare a set of incremental labels AD_REPT, // REPT: Repeat the assembly of the bracketed code a number of times AD_INCDIR, // INCDIR: Add a folder to search for include files AD_A16, // A16: Set 16 bit accumulator mode AD_A8, // A8: Set 8 bit accumulator mode AD_XY16, // A16: Set 16 bit index register mode AD_XY8, // A8: Set 8 bit index register mode AD_HEX, // HEX: LISA assembler data block AD_ABORT, // ABORT: stop assembler and error AD_EJECT, // EJECT: Page break for printing assembler code, ignore AD_LST, // LST: Controls symbol listing AD_DUMMY, // DUM: Start a dummy section (increment address but don't write anything???) AD_DUMMY_END, // DEND: End a dummy section AD_DS, // DS: Define section, zero out # bytes or rewind the address if negative AD_USR, // USR: MERLIN user defined pseudo op, runs some code at a hard coded address on apple II, on PC does nothing. AD_SAV, // SAV: MERLIN version of export but contains full filename, not an appendable name AD_XC, // XC: MERLIN version of setting CPU AD_MX, // MX: MERLIN control accumulator 16 bit mode AD_LNK, // LNK: MERLIN load object and link AD_ADR, // ADR: MERLIN store 3 byte word AD_ADRL, // ADRL: MERLIN store 4 byte word AD_ENT, // ENT: MERLIN extern this address label AD_EXT, // EXT: MERLIN reference this address label from a different file AD_CYC, // CYC: MERLIN start / stop cycle timer }; // Operators are either instructions or directives enum OperationType { OT_NONE, OT_MNEMONIC, OT_DIRECTIVE }; // These are expression tokens in order of precedence (last is highest precedence) enum EvalOperator { EVOP_NONE, EVOP_VAL = 'a', // a, value => read from value queue EVOP_EQU, // b, 1 if left equal to right otherwise 0 EVOP_LT, // c, 1 if left less than right otherwise 0 EVOP_GT, // d, 1 if left greater than right otherwise 0 EVOP_LTE, // e, 1 if left less than or equal to right otherwise 0 EVOP_GTE, // f, 1 if left greater than or equal to right otherwise 0 EVOP_LOB, // g, low byte of 16 bit value EVOP_HIB, // h, high byte of 16 bit value EVOP_BAB, // i, bank byte of 24 bit value EVOP_LPR, // j, left parenthesis EVOP_RPR, // k, right parenthesis EVOP_ADD, // l, + EVOP_SUB, // m, - EVOP_MUL, // n, * (note: if not preceded by value or right paren this is current PC) EVOP_DIV, // o, / EVOP_AND, // p, & EVOP_OR, // q, | EVOP_EOR, // r, ^ EVOP_SHL, // s, << EVOP_SHR, // t, >> EVOP_NEG, // u, negate value EVOP_STP, // v, Unexpected input, should stop and evaluate what we have EVOP_NRY, // w, Not ready yet EVOP_XRF, // x, value from XREF label EVOP_EXP, // y, sub expression EVOP_ERR, // z, Error }; // Opcode encoding typedef struct sOPLookup { uint32_t op_hash; uint8_t index; // ground index uint8_t type; // mnemonic or } OPLookup; enum AddrMode { // address mode bit index // 6502 AMB_ZP_REL_X, // 0 ($12,x) AMB_ZP, // 1 $12 AMB_IMM, // 2 #$12 AMB_ABS, // 3 $1234 AMB_ZP_Y_REL, // 4 ($12),y AMB_ZP_X, // 5 $12,x AMB_ABS_Y, // 6 $1234,y AMB_ABS_X, // 7 $1234,x AMB_REL, // 8 ($1234) AMB_ACC, // 9 A AMB_NON, // a // 65C02 AMB_ZP_REL, // b ($12) AMB_REL_X, // c ($1234,x) AMB_ZP_ABS, // d $12, *+$12 // 65816 AMB_ZP_REL_L, // e [$02] AMB_ZP_REL_Y_L, // f [$00],y AMB_ABS_L, // 10 $bahilo AMB_ABS_L_X, // 11 $123456,x AMB_STK, // 12 $12,s AMB_STK_REL_Y, // 13 ($12,s),y AMB_REL_L, // 14 [$1234] AMB_BLK_MOV, // 15 $12,$34 AMB_COUNT, AMB_FLIPXY = AMB_COUNT, // 16 (indexing index using y treat as x address mode) AMB_BRANCH, // 17 (relative address 8 bit) AMB_BRANCH_L, // 18 (relative address 16 bit) AMB_IMM_DBL_A, // 19 (immediate mode can be doubled in 16 bit mode) AMB_IMM_DBL_XY, // 1a (immediate mode can be doubled in 16 bit mode) AMB_ILL, // 1b illegal address mode // address mode masks AMM_NON = 1<read) first = index+1; else count = index; } if (counthash) count--; return count; } // // // ASSEMBLER STATE // // // pairArray is basically two vectors sharing a size without constructors on growth or insert template class pairArray { protected: H *keys; V *values; uint32_t _count; uint32_t _capacity; public: pairArray() : keys(nullptr), values(nullptr), _count(0), _capacity(0) {} void reserve(uint32_t size) { if (size>_capacity) { H *new_keys = (H*)malloc(sizeof(H) * size); if (!new_keys) { return; } V *new_values = (V*)malloc(sizeof(V) * size); if (!new_values) { free(new_keys); return; } if (keys && values) { memcpy(new_keys, keys, sizeof(H) * _count); memcpy(new_values, values, sizeof(V) * _count); free(keys); free(values); } keys = new_keys; values = new_values; _capacity = size; } } bool insert(uint32_t pos) { if (pos>_count) return false; if (_count==_capacity) reserve(_capacity+64); if (pos<_count) { memmove(keys+pos+1, keys+pos, sizeof(H) * (_count-pos)); memmove(values+pos+1, values+pos, sizeof(V) * (_count-pos)); } memset(keys+pos, 0, sizeof(H)); memset(values+pos, 0, sizeof(V)); _count++; return true; } bool insert(uint32_t pos, H key) { if (insert(pos) && keys) { keys[pos] = key; return true; } return false; } void remove(uint32_t pos) { if (pos<_count) { _count--; if (pos<_count) { memmove(keys+pos, keys+pos+1, sizeof(H) * (_count-pos)); memmove(values+pos, values+pos+1, sizeof(V) * (_count-pos)); } } } H* getKeys() { return keys; } H& getKey(uint32_t pos) { return keys[pos]; } V* getValues() { return values; } V& getValue(uint32_t pos) { return values[pos]; } uint32_t count() const { return _count; } uint32_t capacity() const { return _capacity; } void clear() { if (keys!=nullptr) free(keys); keys = nullptr; if (values!=nullptr) free(values); values = nullptr; _capacity = 0; _count = 0; } }; // relocs are cheaper than full expressions and work with // local labels for relative sections which would otherwise // be out of scope at link time. struct Reloc { int base_value; int section_offset; // offset into this section int target_section; // which section does this reloc target? int8_t bytes; // number of bytes to write int8_t shift; // number of bits to shift to get value Reloc() : base_value(0), section_offset(-1), target_section(-1), bytes(0), shift(0) {} Reloc(int base, int offs, int sect, int8_t num_bytes, int8_t bit_shift) : base_value(base), section_offset(offs), target_section(sect), bytes(num_bytes), shift(bit_shift) {} }; typedef std::vector relocList; // For assembly listing this remembers the location of each line struct ListLine { enum Flags { MNEMONIC = 0x01, KEYWORD = 0x02, CYCLES_START = 0x04, CYCLES_STOP = 0x08, }; strref source_name; // source file index name strref code; // line of code this represents int address; // start address of this line int size; // number of bytes generated for this line int line_offs; // offset into code int flags; // only output code if generated by code bool wasMnemonic() const { return !!(flags & MNEMONIC); } bool startClock() const { return !!(flags & CYCLES_START); } bool stopClock() const { return !!(flags & CYCLES_STOP); } }; typedef std::vector Listing; enum SectionType : int8_t { // enum order indicates fixed address linking priority ST_UNDEFINED, // not set ST_CODE, // default type ST_DATA, // data section (matters for GS/OS OMF) ST_BSS, // uninitialized data section ST_ZEROPAGE, // uninitialized data section in zero page / direct page ST_REMOVED // removed, don't export to object file }; // start of data section support // Default is a relative section // Whenever org or dum with address is encountered => new section // If org is fixed and < $200 then it is a dummy section Otherwise clear dummy section typedef struct Section { // section name, same named section => append strref name; // name of section for comparison strref export_append; // append this name to export of file strref include_from; // which file did this section originate from? // generated address status int load_address; // if assigned a load address int start_address; int address; // relative or absolute PC int align_address; // for relative sections that needs alignment // merged sections int merged_offset; // -1 if not merged int merged_section; // which section merged with // data output uint8_t *output; // memory for this section uint8_t *curr; // current pointer for this section size_t output_capacity; // current output capacity // reloc data relocList *pRelocs; // link time resolve (not all sections need this) Listing *pListing; // if list output // grouped sections int next_group; // next section of a group of relative sections or -1 int first_group; // >=0 if another section is grouped with this section bool address_assigned; // address is absolute if assigned bool dummySection; // true if section does not generate data, only labels SectionType type; // distinguishing section type for relocatable output void reset() { // explicitly cleaning up sections, not called from Section destructor name.clear(); export_append.clear(); include_from.clear(); start_address = address = load_address = 0x0; type = ST_CODE; address_assigned = false; output = nullptr; curr = nullptr; dummySection = false; output_capacity = 0; merged_offset = -1; merged_section = -1; align_address = 1; if (pRelocs) delete pRelocs; next_group = first_group = -1; pRelocs = nullptr; if (pListing) delete pListing; pListing = nullptr; } void Cleanup() { if (output) free(output); reset(); } bool empty() const { return merged_offset<0 && curr==output; } bool unused() const { return !address_assigned && address == start_address; } int DataOffset() const { return int(curr - output); } int size() const { return (int)(curr - output); } int addr_size() const { return address - start_address; } const uint8_t *get() { return output; } int GetPC() const { return address; } void AddAddress(int value) { address += value; } void SetLoadAddress(int addr) { load_address = addr; } int GetLoadAddress() const { return load_address; } void SetDummySection(bool enable) { dummySection = enable; type = ST_BSS; } bool IsDummySection() const { return dummySection; } bool IsRelativeSection() const { return address_assigned == false; } bool IsMergedSection() const { return merged_offset >= 0; } void AddReloc(int base, int offset, int section, int8_t bytes, int8_t shift); Section() : pRelocs(nullptr), pListing(nullptr) { reset(); } Section(strref _name, int _address) : pRelocs(nullptr), pListing(nullptr) { reset(); name = _name; start_address = load_address = address = _address; address_assigned = true; } Section(strref _name) : pRelocs(nullptr), pListing(nullptr) { reset(); name = _name; start_address = load_address = address = 0; address_assigned = false; } ~Section() { } // Append data to a section StatusCode CheckOutputCapacity(uint32_t addSize); void AddByte(int b); void AddWord(int w); void AddTriple(int l); void AddBin(const uint8_t *p, int size); void AddText(strref line, strref text_prefix); void SetByte(size_t offs, int b) { output[offs] = (uint8_t)b; } void SetWord(size_t offs, int w) { output[offs] = (uint8_t)w; output[offs+1] = uint8_t(w>>8); } void SetTriple(size_t offs, int w) { output[offs] = (uint8_t)w; output[offs+1] = uint8_t(w>>8); output[offs+2] = uint8_t(w>>16); } void SetQuad(size_t offs, int w) { output[offs] = (uint8_t)w; output[offs+1] = uint8_t(w>>8); output[offs+2] = uint8_t(w>>16); output[offs+3] = uint8_t(w>>24); } } Section; // Symbol list entry (in order of parsing) struct MapSymbol { strref name; // string name int value; int16_t section; bool local; // local variables }; typedef std::vector MapSymbolArray; // Data related to a label typedef struct sLabel { public: strref label_name; // the name of this label strref pool_name; // name of the pool that this label is related to int value; int section; // rel section address labels belong to a section, -1 if fixed address or assigned int mapIndex; // index into map symbols in case of late resolve bool evaluated; // a value may not yet be evaluated bool pc_relative; // this is an inline label describing a point in the code bool constant; // the value of this label can not change bool external; // this label is globally accessible bool reference; // this label is accessed from external and can't be used for evaluation locally } Label; // String data typedef struct sStringSymbols { public: strref string_name; // name of the string strref string_const; // string contents if source reference strovl string_value; // string contents if modified, initialized to null string StatusCode Append(strref append); StatusCode ParseLine(strref line); strref get() { return string_value.valid() ? string_value.get_strref() : string_const; } void clear() { if (string_value.cap()) { free(string_value.charstr()); string_value.invalidate(); string_value.clear(); } string_const.clear(); } } StringSymbol; // If an expression can't be evaluated immediately, this is required // to reconstruct the result when it can be. typedef struct sLateEval { enum Type { // When an expression is evaluated late, determine how to encode the result LET_LABEL, // this evaluation applies to a label and not memory LET_ABS_REF, // calculate an absolute address and store at 0, +1 LET_ABS_L_REF, // calculate a bank + absolute address and store at 0, +1, +2 LET_ABS_4_REF, // calculate a 32 bit number LET_BRANCH, // calculate a branch offset and store at this address LET_BRANCH_16, // calculate a branch offset of 16 bits and store at this address LET_BYTE, // calculate a byte and store at this address }; int target; // offset into output buffer int address; // current pc int scope; // scope pc int scope_depth; // relevant for scope end int16_t section; // which section to apply to. int16_t rept; // value of rept int file_ref; // -1 if current or xdef'd otherwise index of file for label strref label; // valid if this is not a target but another label strref expression; strref source_file; Type type; } LateEval; // A macro is a text reference to where it was defined typedef struct sMacro { strref name; strref macro; strref source_name; // source file name (error output) strref source_file; // entire source file (req. for line #) bool params_first_line; // the first line of this macro are parameters } Macro; // All local labels are removed when a global label is defined but some when a scope ends typedef struct sLocalLabelRecord { strref label; int scope_depth; bool scope_reserve; // not released for global label, only scope } LocalLabelRecord; // Label pools allows C like stack frame label allocation typedef struct sLabelPool { strref pool_name; int16_t numRanges; // normally 1 range, support multiple for ease of use int16_t scopeDepth; // Required for scope closure cleanup uint16_t ranges[MAX_POOL_RANGES*2]; // 2 shorts per range uint32_t usedMap[(MAX_POOL_BYTES+15)>>4]; // 2 bits per byte to store byte count of label StatusCode Reserve(int numBytes, uint32_t &addr); StatusCode Release(uint32_t addr); } LabelPool; // One member of a label struct struct MemberOffset { uint16_t offset; uint32_t name_hash; strref name; strref sub_struct; }; // Label struct typedef struct sLabelStruct { strref name; uint16_t first_member; uint16_t numMembers; uint16_t size; } LabelStruct; // object file labels that are not xdef'd end up here struct ExtLabels { pairArray labels; }; // EvalExpression needs a location reference to work out some addresses struct EvalContext { int pc; // current address at point of eval int scope_pc; // current scope open at point of eval int scope_end_pc; // late scope closure after eval int scope_depth; // scope depth for eval (must match current for scope_end_pc to eval) int relative_section; // return can be relative to this section int file_ref; // can access private label from this file or -1 int rept_cnt; // current repeat counter EvalContext() {} EvalContext(int _pc, int _scope, int _close, int _sect, int _rept_cnt) : pc(_pc), scope_pc(_scope), scope_end_pc(_close), scope_depth(-1), relative_section(_sect), file_ref(-1), rept_cnt(_rept_cnt) {} }; // Source context is current file (include file, etc.) or current macro. typedef struct sSourceContext { strref source_name; // source file name (error output) strref source_file; // entire source file (req. for line #) strref code_segment; // the segment of the file for this context strref read_source; // current position/length in source file strref next_source; // next position/length in source file int16_t repeat; // how many times to repeat this code segment int16_t repeat_total; // initial number of repeats for this code segment int16_t conditional_ctx; // conditional depth at root of this context void restart() { read_source = code_segment; } bool complete() { repeat--; return repeat <= 0; } } SourceContext; // Context stack is a stack of currently processing text class ContextStack { private: std::vector stack; // stack of contexts SourceContext *currContext; // current context public: ContextStack() : currContext(nullptr) { stack.reserve(32); } SourceContext& curr() { return *currContext; } const SourceContext& curr() const { return *currContext; } void push(strref src_name, strref src_file, strref code_seg, int rept = 1) { if (currContext) currContext->read_source = currContext->next_source; SourceContext context; context.source_name = src_name; context.source_file = src_file; context.code_segment = code_seg; context.read_source = code_seg; context.next_source = code_seg; context.repeat = (int16_t)rept; context.repeat_total = (int16_t)rept; stack.push_back(context); currContext = &stack[stack.size()-1]; } void pop() { stack.pop_back(); currContext = stack.size() ? &stack[stack.size()-1] : nullptr; } bool has_work() { return currContext!=nullptr; } }; // The state of the assembler class Asm { public: pairArray labels; pairArray strings; pairArray macros; pairArray labelPools; pairArray labelStructs; pairArray xdefs; // labels matching xdef names will be marked as external std::vector lateEval; std::vector localLabels; std::vector loadedData; // free when assembler is completed std::vector structMembers; // labelStructs refer to sets of structMembers std::vector includePaths; std::vector
allSections; std::vector externals; // external labels organized by object file MapSymbolArray map; // CPU target struct mnem *opcode_table; int opcode_count; CPUIndex cpu, list_cpu; OPLookup aInstructions[MAX_OPCODES_DIRECTIVES]; int num_instructions; int default_org; // context for macros / include files ContextStack contextStack; // Current section Section *current_section; // Special syntax rules AsmSyntax syntax; // Conditional assembly vars int conditional_depth; // conditional depth / base depth for context strref conditional_source[MAX_CONDITIONAL_DEPTH]; // start of conditional for error report int8_t conditional_nesting[MAX_CONDITIONAL_DEPTH]; bool conditional_consumed[MAX_CONDITIONAL_DEPTH]; // Scope info int scope_address[MAX_SCOPE_DEPTH]; int scope_depth; // Eval relative result (only valid if EvalExpression returns STATUS_RELATIVE_SECTION) int lastEvalSection; int lastEvalValue; int8_t lastEvalShift; strref export_base_name; // binary output name if available strref last_label; // most recently defined label for Merlin macro int8_t list_flags; // listing flags accumulating for each line bool accumulator_16bit; // 65816 specific software dependent immediate mode bool index_reg_16bit; // -"- int8_t cycle_counter_level; // merlin toggles the cycle counter rather than hierarchically evals bool error_encountered; // if any error encountered, don't export binary bool list_assembly; // generate assembler listing bool end_macro_directive; // whether to use { } or macro / endmacro for macro scope // Convert source to binary void Assemble(strref source, strref filename, bool obj_target); // Push a new context and handle enter / exit of context StatusCode PushContext(strref src_name, strref src_file, strref code_seg, int rept = 1); StatusCode PopContext(); // Generate assembler listing if requested bool List(strref filename); // Generate source for all valid instructions and addressing modes for current CPU bool AllOpcodes(strref filename); // Clean up memory allocations, reset assembler state void Cleanup(); // Make sure there is room to write more code StatusCode CheckOutputCapacity(uint32_t addSize); // Operations on current section void SetSection(strref name, int address); // fixed address section void SetSection(strref name); // relative address section void LinkLabelsToAddress(int section_id, int section_new, int section_address); StatusCode LinkRelocs(int section_id, int section_new, int section_address); StatusCode AssignAddressToSection(int section_id, int address); StatusCode LinkSections(strref name); // link relative address sections with this name here StatusCode MergeSections(int section_id, int section_merge); // Combine the result of a section onto another StatusCode MergeSectionsByName(int first_section); StatusCode MergeAllSections(int first_section); void DummySection(int address); // non-data section (fixed) void DummySection(); // non-data section (relative) void EndSection(); // pop current section Section& CurrSection() { return *current_section; } void AssignAddressToGroup(); // Merlin LNK support uint8_t* BuildExport(strref append, int &file_size, int &addr); int GetExportNames(strref *aNames, int maxNames); StatusCode LinkZP(); int SectionId() { return int(current_section - &allSections[0]); } int SectionId(Section &s) { return (int)(&s - &allSections[0]); } void AddByte(int b) { CurrSection().AddByte(b); } void AddWord(int w) { CurrSection().AddWord(w); } void AddTriple(int l) { CurrSection().AddTriple(l); } void AddBin(const uint8_t *p, int size) { CurrSection().AddBin(p, size); } // Object file handling StatusCode WriteObjectFile(strref filename); // write x65 object file StatusCode ReadObjectFile(strref filename, int link_to_section = -1); // read x65 object file // Apple II GS OMF StatusCode WriteA2GS_OMF(strref filename, bool full_collapse); // Scope management StatusCode EnterScope(); StatusCode ExitScope(); // Macro management StatusCode AddMacro(strref macro, strref source_name, strref source_file, strref &left); StatusCode BuildMacro(Macro &m, strref arg_list); // Structs StatusCode BuildStruct(strref name, strref declaration); StatusCode EvalStruct(strref name, int &value); StatusCode BuildEnum(strref name, strref declaration); // Calculate a value based on an expression. EvalOperator RPNToken_Merlin(strref &expression, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value); EvalOperator RPNToken(strref &expression, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value, strref &subexp); StatusCode EvalExpression(strref expression, const struct EvalContext &etx, int &result); void SetEvalCtxDefaults(struct EvalContext &etx); int ReptCnt() const; // Access labels Label* GetLabel(strref label); Label* GetLabel(strref label, int file_ref); Label* AddLabel(uint32_t hash); bool MatchXDEF(strref label); StatusCode AssignLabel(strref label, strref line, bool make_constant = false); StatusCode AddressLabel(strref label); void LabelAdded(Label *pLabel, bool local = false); StatusCode IncludeSymbols(strref line); // Strings StringSymbol *GetString(strref string_name); StringSymbol *AddString(strref string_name, strref string_value); StatusCode StringAction(StringSymbol *pStr, strref line); StatusCode ParseStringOp(StringSymbol *pStr, strref line); // Manage locals void MarkLabelLocal(strref label, bool scope_label = false); StatusCode FlushLocalLabels(int scope_exit = -1); // Label pools LabelPool* GetLabelPool(strref pool_name); StatusCode AddLabelPool(strref name, strref args); StatusCode AssignPoolLabel(LabelPool &pool, strref args); // Late expression evaluation void AddLateEval(int target, int pc, int scope_pc, strref expression, strref source_file, LateEval::Type type); void AddLateEval(strref label, int pc, int scope_pc, strref expression, LateEval::Type type); StatusCode CheckLateEval(strref added_label = strref(), int scope_end = -1, bool missing_is_error = false); // Assembler Directives StatusCode ApplyDirective(AssemblerDirective dir, strref line, strref source_file); StatusCode Directive_Rept(strref line); StatusCode Directive_Macro(strref line); StatusCode Directive_String(strref line); StatusCode Directive_Undef(strref line); StatusCode Directive_Include(strref line); StatusCode Directive_Incbin(strref line, int skip=0, int len=0); StatusCode Directive_Import(strref line); StatusCode Directive_ORG(strref line); StatusCode Directive_LOAD(strref line); StatusCode Directive_LNK(strref line); StatusCode Directive_XDEF(strref line); StatusCode Directive_XREF(strref label); StatusCode Directive_DC(strref line, int width, strref source_file); StatusCode Directive_DS(strref line); StatusCode Directive_ALIGN(strref line); StatusCode Directive_EVAL(strref line); StatusCode Directive_HEX(strref line); StatusCode Directive_ENUM_STRUCT(strref line, AssemblerDirective dir); // Assembler steps StatusCode GetAddressMode(strref line, bool flipXY, uint32_t validModes, AddrMode &addrMode, int &len, strref &expression); StatusCode AddOpcode(strref line, int index, strref source_file); StatusCode BuildLine(strref line); StatusCode BuildSegment(); // Display error in stderr void PrintError(strref line, StatusCode error); // Conditional Status bool ConditionalAsm(); // Assembly is currently enabled bool NewConditional(); // Start a new conditional block void CloseConditional(); // Close a conditional block void CheckConditionalDepth(); // Check if this conditional will nest the assembly (a conditional is already consumed) void ConsumeConditional(); // This conditional block is going to be assembled, mark it as consumed bool ConditionalConsumed(); // Has a block of this conditional already been assembled? void SetConditional(); // This conditional block is not going to be assembled so mark that it is nesting bool ConditionalAvail(); // Returns true if this conditional can be consumed void ConditionalElse(); // Conditional else that does not enable block void EnableConditional(bool enable); // This conditional block is enabled and the prior wasn't // Conditional statement evaluation (A==B? A?) StatusCode EvalStatement(strref line, bool &result); // Add include folder void AddIncludeFolder(strref path); char* LoadText(strref filename, size_t &size); char* LoadBinary(strref filename, size_t &size); // Change CPU void SetCPU(CPUIndex CPU); // Syntax bool Merlin() const { return syntax == SYNTAX_MERLIN; } // constructor Asm() : opcode_table(opcodes_6502), opcode_count(num_opcodes_6502), num_instructions(0), cpu(CPU_6502), list_cpu(CPU_6502) { Cleanup(); localLabels.reserve(256); loadedData.reserve(16); lateEval.reserve(64); } }; // Clean up work allocations void Asm::Cleanup() { for (std::vector::iterator i = loadedData.begin(); i != loadedData.end(); ++i) { if (char *data = *i) free(data); } map.clear(); labelPools.clear(); loadedData.clear(); labels.clear(); macros.clear(); allSections.clear(); for (uint32_t i = 0; i < strings.count(); ++i) { StringSymbol &str = strings.getValue(i); if (str.string_value.cap()) free(str.string_value.charstr()); } strings.clear(); for (std::vector::iterator exti = externals.begin(); exti !=externals.end(); ++exti) exti->labels.clear(); externals.clear(); // this section is relocatable but is assigned address $1000 if exporting without directives SetSection(strref("default,code")); current_section = &allSections[0]; syntax = SYNTAX_SANE; default_org = 0x1000; scope_depth = 0; conditional_depth = 0; conditional_nesting[0] = 0; conditional_consumed[0] = false; error_encountered = false; list_assembly = false; end_macro_directive = false; accumulator_16bit = false; // default 65816 8 bit immediate mode index_reg_16bit = false; // other CPUs won't be affected. cycle_counter_level = 0; } int sortHashLookup(const void *A, const void *B) { const OPLookup *_A = (const OPLookup*)A; const OPLookup *_B = (const OPLookup*)B; return _A->op_hash > _B->op_hash ? 1 : -1; } int BuildInstructionTable(OPLookup *pInstr, struct mnem *opcodes, int count, const char **aliases, bool merlin) { // create an instruction table (mnemonic hash lookup) int numInstructions = 0; for (int i = 0; i < count; i++) { OPLookup &op = pInstr[numInstructions++]; op.op_hash = strref(opcodes[i].instr).fnv1a_lower(); op.index = (uint8_t)i; op.type = OT_MNEMONIC; } // add instruction aliases if (aliases) { while (*aliases) { strref orig(*aliases++); strref alias(*aliases++); for (int o=0; o list_cpu) list_cpu = cpu; opcode_table = aCPUs[CPU].opcodes; opcode_count = aCPUs[CPU].num_opcodes; num_instructions = BuildInstructionTable(aInstructions, opcode_table, opcode_count, aCPUs[CPU].aliases, Merlin()); } // Read in text data (main source, include, etc.) char* Asm::LoadText(strref filename, size_t &size) { strown<512> file(filename); std::vector::iterator i = includePaths.begin(); for (;;) { if (FILE *f = fopen(file.c_str(), "rb")) { // rb is intended here since OS fseek(f, 0, SEEK_END); // eol conversion can do ugly things size_t _size = ftell(f); fseek(f, 0, SEEK_SET); if (char *buf = (char*)calloc(_size, 1)) { fread(buf, _size, 1, f); fclose(f); size = _size; return buf; } fclose(f); } if (i==includePaths.end()) break; file.copy(*i); if (file.get_last()!='/' && file.get_last()!='\\') file.append('/'); file.append(filename); ++i; } size = 0; return nullptr; } // Read in binary data (incbin) char* Asm::LoadBinary(strref filename, size_t &size) { strown<512> file(filename); std::vector::iterator i = includePaths.begin(); for (;;) { if (FILE *f = fopen(file.c_str(), "rb")) { fseek(f, 0, SEEK_END); size_t _size = ftell(f); fseek(f, 0, SEEK_SET); if (char *buf = (char*)malloc(_size)) { fread(buf, _size, 1, f); fclose(f); size = _size; return buf; } fclose(f); } if (i==includePaths.end()) break; file.copy(*i); if (file.get_last()!='/' && file.get_last()!='\\') file.append('/'); file.append(filename); #ifdef WIN32 file.replace('/', '\\'); #endif ++i; } size = 0; return nullptr; } // Create a new section with a fixed address void Asm::SetSection(strref name, int address) { if (name) { for (std::vector
::iterator i = allSections.begin(); i!=allSections.end(); ++i) { if (i->name && name.same_str(i->name)) { current_section = &*i; return; } } } if (allSections.size()==allSections.capacity()) allSections.reserve(allSections.size() + 16); Section newSection(name, address); if (address < 0x200) // don't compile over zero page and stack frame (may be bad assumption) newSection.SetDummySection(true); allSections.push_back(newSection); current_section = &allSections[allSections.size()-1]; } void Asm::SetSection(strref line) { if (allSections.size() && CurrSection().unused()) allSections.erase(allSections.begin() + SectionId()); if (allSections.size() == allSections.capacity()) allSections.reserve(allSections.size() + 16); SectionType type = ST_UNDEFINED; // SEG.U etc. if (line.get_first() == '.') { ++line; switch (strref::tolower(line.get_first())) { case 'u': type = ST_BSS; break; case 'z': type = ST_ZEROPAGE; break; case 'd': type = ST_DATA; break; case 'c': type = ST_CODE; break; } } line.trim_whitespace(); int align = 1; strref name; while (strref arg = line.split_token_any_trim(",:")) { if (arg.get_first() == '$') { ++arg; align = (int)arg.ahextoui(); } else if (arg.is_number()) align = (int)arg.atoi(); else if (arg.get_first() == '"') name = (arg + 1).before_or_full('"'); else if (!name) name = arg; else if (arg.same_str("code")) type = ST_CODE; else if (arg.same_str("data")) type = ST_DATA; else if (arg.same_str("bss")) type = ST_BSS; else if (arg.same_str("zp") || arg.same_str("dp") || arg.same_str("zeropage") || arg.same_str("direct")) type = ST_ZEROPAGE; } if (type == ST_UNDEFINED) { if (name.find("code") >= 0) type = ST_CODE; else if (name.find("data") >= 0) type = ST_DATA; else if (name.find("bss") >= 0 || name.same_str("directpage_stack")) type = ST_BSS; else if (name.find("zp") >= 0 || name.find("zeropage") >= 0 || name.find("direct") >= 0) type = ST_ZEROPAGE; else type = ST_CODE; } Section newSection(name); newSection.align_address = align; newSection.type = type; allSections.push_back(newSection); current_section = &allSections[allSections.size()-1]; } // Fixed address dummy section void Asm::DummySection(int address) { if (allSections.size()==allSections.capacity()) allSections.reserve(allSections.size() + 16); Section newSection(strref(), address); newSection.SetDummySection(true); allSections.push_back(newSection); current_section = &allSections[allSections.size()-1]; } // Current address dummy section void Asm::DummySection() { DummySection(CurrSection().GetPC()); } void Asm::EndSection() { int section = SectionId(); if (section) current_section = &allSections[section-1]; } // Iterate through the current group of sections and assign addresses if this section is fixed // This is to handle the special linking of Merlin where sections are brought together pre-export void Asm::AssignAddressToGroup() { Section &curr = CurrSection(); if (!curr.address_assigned) return; // Put in all the sections cared about into either the fixed sections or the relative sections std::vector FixedExport; std::vector RelativeExport; int seg = SectionId(); while (seg>=0) { Section &s = allSections[seg]; if (s.address_assigned && s.type != ST_ZEROPAGE && s.start_address >= curr.start_address) { bool inserted = false; for (std::vector::iterator i = FixedExport.begin(); i!=FixedExport.end(); ++i) { if (s.start_address < (*i)->start_address) { FixedExport.insert(i, &s); inserted = true; break; } } if (!inserted) FixedExport.push_back(&s); } else if (!s.address_assigned && s.type != ST_ZEROPAGE) { RelativeExport.push_back(&s); s.export_append = curr.export_append; } seg = allSections[seg].next_group; } // in this case each block should be added individually in order of code / data / bss for (int type = ST_CODE; type <= ST_BSS; type++) { std::vector::iterator i = RelativeExport.begin(); while (i!=RelativeExport.end()) { Section *pSec = *i; if (pSec->type == type) { int bytes = pSec->address - pSec->start_address; size_t insert_after = FixedExport.size()-1; for (size_t p = 0; paddress; int start_next = FixedExport[p+1]->start_address; int avail = start_next - end_prev; if (avail >= bytes) { int addr = end_prev; addr += pSec->align_address <= 1 ? 0 : (pSec->align_address - (addr % pSec->align_address)) % pSec->align_address; if ((addr + bytes) <= start_next) { insert_after = p; break; } } } int address = FixedExport[insert_after]->address; address += pSec->align_address <= 1 ? 0 : (pSec->align_address - (address % pSec->align_address)) % pSec->align_address; AssignAddressToSection(SectionId(*pSec), address); FixedExport.insert((FixedExport.begin() + insert_after + 1), pSec); i = RelativeExport.erase(i); } else ++i; } } } // list all export append names // for each valid export append name build a binary fixed address code // - find lowest and highest address // - alloc & 0 memory // - any matching relative sections gets linked in after // - go through all section that matches export_append in order and copy over memory uint8_t* Asm::BuildExport(strref append, int &file_size, int &addr) { int start_address = 0x7fffffff; int end_address = 0; bool has_relative_section = false; bool has_fixed_section = false; int first_link_section = -1; std::vector FixedExport; // find address range while (!has_relative_section && !has_fixed_section) { int section_id = 0; for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if (((!append && !i->export_append) || append.same_str_case(i->export_append)) && i->type != ST_ZEROPAGE) { if (!i->IsMergedSection()) { if (i->IsRelativeSection()) { // prioritize code over data, local code over included code for initial binary segment if ((i->type == ST_CODE || i->type == ST_DATA) && i->first_group < 0 && (first_link_section < 0 || (i->type == ST_CODE && (allSections[first_link_section].type == ST_DATA || (!i->include_from && allSections[first_link_section].include_from))))) first_link_section = SectionId(*i); has_relative_section = true; } else if (i->start_address >= 0x100 && i->size() > 0) { has_fixed_section = true; bool inserted = false; for (std::vector::iterator f = FixedExport.begin(); f != FixedExport.end(); ++f) { if ((*f)->start_address > i->start_address) { FixedExport.insert(f, &*i); inserted = true; break; } } if (!inserted) FixedExport.push_back(&*i); if (i->start_address < start_address) start_address = i->start_address; if ((i->start_address + (int)i->size()) > end_address) { end_address = i->start_address + (int)i->size(); } } } } section_id++; } if (!has_relative_section && !has_fixed_section) return nullptr; if (has_relative_section) { if (!has_fixed_section) { // there is not a fixed section so go through and assign addresses to all sections // starting with the first reasonable section start_address = default_org; if (first_link_section < 0) return nullptr; while (first_link_section >= 0) { FixedExport.push_back(&allSections[first_link_section]); AssignAddressToSection(first_link_section, start_address); start_address = allSections[first_link_section].address; first_link_section = allSections[first_link_section].next_group; } } // First link code sections, then data sections, then BSS sections for (int sectype = ST_CODE; sectype <= ST_BSS; sectype++) { // there are fixed sections so fit all relative sections after or inbetween fixed sections in export group for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if (sectype == i->type && ((!append && !i->export_append) || append.same_str_case(i->export_append))) { int id = (int)(&*i - &allSections[0]); if (i->IsRelativeSection() && i->first_group < 0) { // try to fit this section in between existing sections if possible int insert_after = (int)FixedExport.size()-1; for (int f = 0; f < insert_after; f++) { int start_block = FixedExport[f]->address; int end_block = FixedExport[f + 1]->start_address; if ((end_block - start_block) >= (i->address - i->start_address)) { int addr_block = start_block; int sec = id; while (sec >= 0) { Section &s = allSections[sec]; addr_block += s.align_address <= 1 ? 0 : (s.align_address - (addr_block % s.align_address)) % s.align_address; addr_block += s.address - s.start_address; sec = s.next_group; } if (addr_block <= end_block) { insert_after = f; break; } } } int sec = id; start_address = FixedExport[insert_after]->address; while (sec >= 0) { insert_after++; if (insert_after<(int)FixedExport.size()) FixedExport.insert(FixedExport.begin() + insert_after, &allSections[sec]); else FixedExport.push_back(&allSections[sec]); AssignAddressToSection(sec, start_address); start_address = allSections[sec].address; sec = allSections[sec].next_group; } } } } } } } // get memory for output buffer start_address = FixedExport[0]->start_address; int last_data_export = (int)(FixedExport.size() - 1); while (last_data_export>0 && FixedExport[last_data_export]->type == ST_BSS) last_data_export--; end_address = FixedExport[last_data_export]->address; uint8_t *output = (uint8_t*)calloc(1, end_address - start_address); // copy over in order for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if (((!append && !i->export_append) || append.same_str_case(i->export_append)) && i->type != ST_ZEROPAGE) { if (i->merged_offset == -1 && i->start_address >= 0x200 && i->size() > 0) memcpy(output + i->start_address - start_address, i->output, i->size()); } } printf("Linker export + \"" STRREF_FMT "\" summary:\n", STRREF_ARG(append)); for (std::vector::iterator f = FixedExport.begin(); f != FixedExport.end(); ++f) { if ((*f)->include_from) { printf("* $%04x-$%04x: " STRREF_FMT " (%d) included from " STRREF_FMT "\n", (*f)->start_address, (*f)->address, STRREF_ARG((*f)->name), (int)(*f - &allSections[0]), STRREF_ARG((*f)->include_from)); } else { printf("* $%04x-$%04x: " STRREF_FMT " (%d)\n", (*f)->start_address, (*f)->address, STRREF_ARG((*f)->name), (int)(*f - &allSections[0])); } } // return the result file_size = end_address - start_address; addr = start_address; return output; } // Collect all the export names int Asm::GetExportNames(strref *aNames, int maxNames) { int count = 0; for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if (!i->IsMergedSection()) { bool found = false; uint32_t hash = i->export_append.fnv1a_lower(); for (int n = 0; n < count; n++) { if (aNames[n].fnv1a_lower() == hash) { found = true; break; } } if (!found && count < maxNames) aNames[count++] = i->export_append; } } return count; } // Collect all unassigned ZP sections and link them StatusCode Asm::LinkZP() { uint8_t min_addr = 0xff, max_addr = 0x00; int num_addr = 0; bool has_assigned = false, has_unassigned = false; int first_unassigned = -1; // determine if any zeropage section has been asseigned for (std::vector
::iterator s = allSections.begin(); s != allSections.end(); ++s) { if (s->type == ST_ZEROPAGE && !s->IsMergedSection()) { if (s->address_assigned) { has_assigned = true; if (s->start_address < (int)min_addr) min_addr = (uint8_t)s->start_address; else if ((int)s->address > max_addr) max_addr = (uint8_t)s->address; } else { has_unassigned = true; first_unassigned = first_unassigned >=0 ? first_unassigned : (int)(&*s - &allSections[0]); } num_addr += s->address - s->start_address; } } if (num_addr > 0x100) return ERROR_ZEROPAGE_SECTION_OUT_OF_RANGE; // no unassigned zp section, nothing to fix if (!has_unassigned) return STATUS_OK; StatusCode status = STATUS_OK; // no section assigned => fit together at end if (!has_assigned) { int address = 0x100 - num_addr; for (std::vector
::iterator s = allSections.begin(); status==STATUS_OK && s != allSections.end(); ++s) { if (s->type == ST_ZEROPAGE && !s->IsMergedSection()) { status = AssignAddressToSection((int)(&*s - &allSections[0]), address); address = s->address; } } } else { // find first fit neighbouring an address assigned zero page section for (std::vector
::iterator s = allSections.begin(); s != allSections.end(); ++s) { if (s->type == ST_ZEROPAGE && !s->IsMergedSection() && !s->address_assigned) { int size = s->address - s->start_address; bool found = false; // find any assigned address section and try to place before or after for (std::vector
::iterator sa = allSections.begin(); sa != allSections.end(); ++sa) { if (sa->type == ST_ZEROPAGE && !sa->IsMergedSection() && sa->address_assigned) { for (int e = 0; e < 2; ++e) { int start = e ? sa->start_address - size : sa->address; int align_size = s->align_address <= 1 ? 0 : (s->align_address - (start % s->align_address)) % s->align_address; start += align_size; int end = start + size; if (start >= 0 && end <= 0x100) { for (std::vector
::iterator sc = allSections.begin(); !found && sc != allSections.end(); ++sc) { found = true; if (&*sa != &*sc && sc->type == ST_ZEROPAGE && !sc->IsMergedSection() && sc->address_assigned) { if (start <= sc->address && sc->start_address <= end) found = false; } } } if (found) AssignAddressToSection((int)(&*s - &allSections[0]), start); } } } if (!found) return ERROR_ZEROPAGE_SECTION_OUT_OF_RANGE; } } } return status; } // Apply labels assigned to addresses in a relative section a fixed address or as part of another section void Asm::LinkLabelsToAddress(int section_id, int section_new, int section_address) { Label *pLabels = labels.getValues(); int numLabels = labels.count(); for (int l = 0; l < numLabels; l++) { if (pLabels->section == section_id) { pLabels->value += section_address; pLabels->section = section_new; if (pLabels->mapIndex>=0 && pLabels->mapIndex<(int)map.size()) { struct MapSymbol &msym = map[pLabels->mapIndex]; msym.value = pLabels->value; msym.section = (int16_t)section_new; } CheckLateEval(pLabels->label_name); } ++pLabels; } } // go through relocs in all sections to see if any targets this section // relocate section to address! StatusCode Asm::LinkRelocs(int section_id, int section_new, int section_address) { for (std::vector
::iterator j = allSections.begin(); j != allSections.end(); ++j) { Section &s2 = *j; if (s2.pRelocs) { relocList *pList = s2.pRelocs; relocList::iterator i = pList->end(); while (i != pList->begin()) { --i; if (i->target_section == section_id) { Section *trg_sect = &s2; size_t output_offs = 0; while (trg_sect->merged_offset>=0) { output_offs += trg_sect->merged_offset; trg_sect = &allSections[trg_sect->merged_section]; } // only finalize the target value if fixed address if (section_new == -1 || allSections[section_new].address_assigned) { uint8_t *trg = trg_sect->output + output_offs + i->section_offset; int value = i->base_value + section_address; if (i->shift < 0) value >>= -i->shift; else if (i->shift) value <<= i->shift; for (int b = 0; b < i->bytes; b++) *trg++ = (uint8_t)(value >> (b * 8)); i = pList->erase(i); if (i != pList->end()) ++i; } } } if (pList->empty()) { free(pList); s2.pRelocs = nullptr; } } } return STATUS_OK; } // Append one section to the end of another StatusCode Asm::AssignAddressToSection(int section_id, int address) { if (section_id < 0 || section_id >= (int)allSections.size()) return ERROR_NOT_A_SECTION; Section &s = allSections[section_id]; if (s.address_assigned) return ERROR_CANT_REASSIGN_FIXED_SECTION; // fix up the alignment of the address int align_size = s.align_address <= 1 ? 0 : (s.align_address - (address % s.align_address)) % s.align_address; address += align_size; s.start_address = address; s.address += address; s.address_assigned = true; LinkLabelsToAddress(section_id, -1, s.start_address); return LinkRelocs(section_id, -1, s.start_address); } // Link sections with a specific name at this point // Relative sections will just be appeneded to a grouping list // Fixed address sections will be merged together StatusCode Asm::LinkSections(strref name) { if (CurrSection().IsDummySection()) return ERROR_LINKER_CANT_LINK_TO_DUMMY_SECTION; int last_section_group = CurrSection().next_group; while (last_section_group > -1 && allSections[last_section_group].next_group > -1) last_section_group = allSections[last_section_group].next_group; for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if ((!name || i->name.same_str_case(name)) && i->IsRelativeSection() && !i->IsMergedSection()) { // it is ok to link other sections with the same name to this section if (&*i == &CurrSection()) continue; // Zero page sections can only be linked with zero page sections if (i->type != ST_ZEROPAGE || CurrSection().type == ST_ZEROPAGE) { i->export_append = CurrSection().export_append; if (!i->address_assigned) { if (i->first_group < 0) { int prev = last_section_group >= 0 ? last_section_group : SectionId(); int curr = (int)(&*i - &allSections[0]); allSections[prev].next_group = curr; i->first_group = CurrSection().first_group ? CurrSection().first_group : SectionId(); last_section_group = curr; } } } else return ERROR_CANT_LINK_ZP_AND_NON_ZP; } } return STATUS_OK; } StatusCode Asm::MergeSections(int section_id, int section_merge) { if (section_id == section_merge || section_id<0 || section_merge<0) return STATUS_OK; Section &s = allSections[section_id]; Section &m = allSections[section_merge]; // merging section needs to be relative to be appended if (!m.IsRelativeSection()) return ERROR_CANT_APPEND_SECTION_TO_TARGET; // if merging section is aligned and target section is not aligned to that or multiple of then can't merge if (m.align_address>1 && (!s.IsRelativeSection() || (s.align_address%m.align_address)!=0)) return ERROR_CANT_APPEND_SECTION_TO_TARGET; // append the binary to the target.. int addr_start = s.address; int align = m.align_address <= 1 ? 0 : (m.align_address - (addr_start % m.align_address)) % m.align_address; if (m.size()) { if (s.CheckOutputCapacity(m.size() + align) == STATUS_OK) { for (int a = 0; a < align; a++) s.AddByte(0); s.AddBin(m.output, m.size()); } } else if (m.addr_size() && s.type != ST_BSS && s.type != ST_ZEROPAGE && !s.dummySection) { if (s.CheckOutputCapacity(m.address - m.start_address) == STATUS_OK) { for (int a = (m.start_address-align); a < m.address; a++) s.AddByte(0); } } else if (m.addr_size()) s.AddAddress(align + m.addr_size()); addr_start += align; // move the relocs from the merge section to the keep section if (m.pRelocs) { if (!s.pRelocs) s.pRelocs = new relocList; if (s.pRelocs->capacity() < (s.pRelocs->size() + m.pRelocs->size())) s.pRelocs->reserve(s.pRelocs->size() + m.pRelocs->size()); for (relocList::iterator r = m.pRelocs->begin(); r != m.pRelocs->end(); ++r) { struct Reloc rel = *r; rel.section_offset += addr_start; s.pRelocs->push_back(rel); } delete m.pRelocs; m.pRelocs = nullptr; } // go through all the relocs referring to merging section and replace for (std::vector
::iterator i = allSections.begin(); i!=allSections.end(); ++i) { if (relocList *pReloc = i->pRelocs) { for (relocList::iterator r = pReloc->begin(); r!=pReloc->end(); ++r) { if (r->target_section == section_merge) { r->base_value += addr_start; r->target_section = section_id; } } } } // go through all labels referencing merging section for (uint32_t i = 0; isection == section_merge) { i->value += addr_start; i->section = (int16_t)section_id; } } // go through all late evals referencing this section for (std::vector::iterator i = lateEval.begin(); i!=lateEval.end(); ++i) { if (i->section == section_merge) { i->section = (int16_t)section_id; if (i->target >= 0) i->target += addr_start; i->address += addr_start; if (i->scope >= 0) i->scope += addr_start; } } // go through listing if (m.pListing) { if (!s.pListing) s.pListing = new Listing; if (s.pListing->capacity() < (m.pListing->size() + s.pListing->size())) s.pListing->reserve((m.pListing->size() + s.pListing->size())); for (Listing::iterator i = m.pListing->begin(); i!=m.pListing->end(); ++i) { ListLine l = *i; l.address += addr_start; s.pListing->push_back(l); } delete m.pListing; m.pListing = nullptr; } printf("merged section %d into section %d at $%x offset\n", section_merge, section_id, addr_start); m.type = ST_REMOVED; return STATUS_OK; } // Go through sections and merge same name sections together StatusCode Asm::MergeSectionsByName(int first_section) { int first_code_seg = -1; StatusCode status = STATUS_OK; for (std::vector
::iterator i = allSections.begin(); i != allSections.end(); ++i) { if (i->type != ST_REMOVED) { if (first_code_seg<0 && i->type==ST_CODE) first_code_seg = (int)(&*i-&allSections[0]); std::vector
::iterator n = i; ++n; while (n != allSections.end()) { if (n->name.same_str_case(i->name) && n->type == i->type) { int sk = (int)(&*i - &allSections[0]); int sm = (int)(&*n - &allSections[0]); if (sm == first_section || (n->align_address > i->align_address)) { if (n->align_addressalign_address) n->align_address = i->align_address; status = MergeSections(sm, sk); } else status = MergeSections(sk, sm); if (status != STATUS_OK) return status; } ++n; } } } return STATUS_OK; } // Merge all sections in order of code, data, bss and make sure a specific section remains first #define MERGE_ORDER_CNT (ST_BSS - ST_CODE+1) StatusCode Asm::MergeAllSections(int first_section) { StatusCode status = STATUS_OK; // combine all sections by type first for (int t = ST_CODE; t allSections[j].align_address) allSections[i].align_address = allSections[j].align_address; status = MergeSections(j, i); } else status = MergeSections(i, j); } } } } } // then combine by category except zero page int merge_order[MERGE_ORDER_CNT] = { -1 }; for (int t = ST_CODE; t <= ST_BSS; t++) { for (int i = 0; i<(int)allSections.size(); ++i) { if (allSections[i].type == t) { merge_order[t - ST_CODE] = i; break; } } } for (int n = 1; n < MERGE_ORDER_CNT; n++) { if (merge_order[n] == -1) { for (int m = n + 1; m < MERGE_ORDER_CNT; m++) merge_order[m - 1] = merge_order[m]; } } if (merge_order[0] == -1) return ERROR_NOT_A_SECTION; for (int o = 1; o < MERGE_ORDER_CNT; o++) { if (merge_order[o] != -1 && status == STATUS_OK) { if (allSections[merge_order[0]].align_address < allSections[merge_order[o]].align_address) allSections[merge_order[0]].align_address = allSections[merge_order[o]].align_address; status = MergeSections(merge_order[0], merge_order[o]); } } return status; } // Section based output capacity // Make sure there is room to assemble in StatusCode Section::CheckOutputCapacity(uint32_t addSize) { if (dummySection || type == ST_ZEROPAGE || type == ST_BSS) return STATUS_OK; size_t currSize = curr - output; if ((addSize + currSize) >= output_capacity) { size_t newSize = currSize * 2; if (newSize < 64*1024) newSize = 64*1024; if ((addSize+currSize) > newSize) newSize += newSize; if (uint8_t *new_output = (uint8_t*)malloc(newSize)) { memcpy(new_output, output, size()); curr = new_output + (curr - output); free(output); output = new_output; output_capacity = newSize; } else return ERROR_OUT_OF_MEMORY; } return STATUS_OK; } // Add one byte to a section void Section::AddByte(int b) { if (!dummySection && type != ST_ZEROPAGE && type != ST_BSS) { if (CheckOutputCapacity(1) == STATUS_OK) *curr++ = (uint8_t)b; } address++; } // Add a 16 bit word to a section void Section::AddWord(int w) { if (!dummySection && type != ST_ZEROPAGE && type != ST_BSS) { if (CheckOutputCapacity(2) == STATUS_OK) { *curr++ = (uint8_t)(w & 0xff); *curr++ = (uint8_t)(w >> 8); } } address += 2; } // Add a 24 bit word to a section void Section::AddTriple(int l) { if (!dummySection && type != ST_ZEROPAGE && type != ST_BSS) { if (CheckOutputCapacity(3) == STATUS_OK) { *curr++ = (uint8_t)(l & 0xff); *curr++ = (uint8_t)(l >> 8); *curr++ = (uint8_t)(l >> 16); } } address += 3; } // Add arbitrary length data to a section void Section::AddBin(const uint8_t *p, int size) { if (!dummySection && type != ST_ZEROPAGE && type != ST_BSS) { if (CheckOutputCapacity(size) == STATUS_OK) { memcpy(curr, p, size); curr += size; } } address += size; } // Add text data to a section void Section::AddText(strref line, strref text_prefix) { // https://en.wikipedia.org/wiki/PETSCII // ascii: no change // shifted: a-z => $41.. A-Z => $61.. // unshifted: a-z, A-Z => $41 if (CheckOutputCapacity((uint32_t)line.get_len()) == STATUS_OK) { if (!text_prefix || text_prefix.same_str("ascii")) { AddBin((const uint8_t*)line.get(), (int)line.get_len()); } else if (text_prefix.same_str("petscii")) { while (line) { char c = line[0]; AddByte((c >= 'a' && c <= 'z') ? (c - 'a' + 'A') : (c > 0x60 ? ' ' : line[0])); ++line; } } else if (text_prefix.same_str("petscii_shifted")) { while (line) { char c = line[0]; AddByte((c >= 'a' && c <= 'z') ? (c - 'a' + 0x61) : ((c >= 'A' && c <= 'Z') ? (c - 'A' + 0x61) : (c > 0x60 ? ' ' : line[0]))); ++line; } } } } // Add a relocation marker to a section void Section::AddReloc(int base, int offset, int section, int8_t bytes, int8_t shift) { if (!pRelocs) pRelocs = new relocList; if (pRelocs->size() == pRelocs->capacity()) pRelocs->reserve(pRelocs->size() + 32); pRelocs->push_back(Reloc(base, offset, section, bytes, shift)); } // Make sure there is room to assemble in StatusCode Asm::CheckOutputCapacity(uint32_t addSize) { return CurrSection().CheckOutputCapacity(addSize); } // // // SCOPE MANAGEMENT // // StatusCode Asm::EnterScope() { if (scope_depth >= (MAX_SCOPE_DEPTH - 1)) return ERROR_TOO_DEEP_SCOPE; scope_address[++scope_depth] = CurrSection().GetPC(); return STATUS_OK; } StatusCode Asm::ExitScope() { CheckLateEval(strref(), CurrSection().GetPC()); StatusCode error = FlushLocalLabels(scope_depth); if (error >= FIRST_ERROR) return error; --scope_depth; if (scope_depth<0) return ERROR_UNBALANCED_SCOPE_CLOSURE; return STATUS_OK; } // // // CONTEXT ISOLATION // // StatusCode Asm::PushContext(strref src_name, strref src_file, strref code_seg, int rept) { if (conditional_depth>=(MAX_CONDITIONAL_DEPTH-1)) return ERROR_CONDITION_TOO_NESTED; conditional_depth++; conditional_nesting[conditional_depth] = 0; conditional_consumed[conditional_depth] = false; contextStack.push(src_name, src_file, code_seg, rept); contextStack.curr().conditional_ctx = (int16_t)conditional_depth; if (scope_depth >= (MAX_SCOPE_DEPTH - 1)) return ERROR_TOO_DEEP_SCOPE; else scope_address[++scope_depth] = CurrSection().GetPC(); return STATUS_OK; } StatusCode Asm::PopContext() { if (scope_depth) { StatusCode ret = ExitScope(); if (ret != STATUS_OK) return ret; } if (!ConditionalAsm() || ConditionalConsumed() || conditional_depth!=contextStack.curr().conditional_ctx) return ERROR_UNTERMINATED_CONDITION; conditional_depth = contextStack.curr().conditional_ctx-1; contextStack.pop(); return STATUS_OK; } // // // MACROS // // // add a custom macro StatusCode Asm::AddMacro(strref macro, strref source_name, strref source_file, strref &left) { // // Recommended macro syntax: // macro name(optional params) { actual macro } // // -endm option macro syntax: // macro name arg // actual macro // endmacro // // Merlin macro syntax: (TODO: ignore arguments and use ]1, ]2, etc.) // name mac arg1 arg2 // actual macro // [<<<]/[EOM] // strref name; bool params_first_line = false; if (Merlin()) { if (Label *pLastLabel = GetLabel(last_label)) { labels.remove((uint32_t)(pLastLabel - labels.getValues())); name = last_label; last_label.clear(); macro.skip_whitespace(); if (macro.get_first()==';' || macro.has_prefix(c_comment)) macro.line(); else params_first_line = true; } else return ERROR_BAD_MACRO_FORMAT; } else { name = macro.split_range(label_end_char_range); macro.skip_whitespace(); strref left_line = macro.get_line(); left_line.skip_whitespace(); left_line = left_line.before_or_full(';').before_or_full(c_comment); if (left_line && left_line[0] != '(' && left_line[0] != '{') params_first_line = true; } uint32_t hash = name.fnv1a(); uint32_t ins = FindLabelIndex(hash, macros.getKeys(), macros.count()); Macro *pMacro = nullptr; while (ins < macros.count() && macros.getKey(ins)==hash) { if (name.same_str_case(macros.getValue(ins).name)) { pMacro = macros.getValues() + ins; break; } ++ins; } if (!pMacro) { macros.insert(ins, hash); pMacro = macros.getValues() + ins; } pMacro->name = name; if (Merlin()) { strref source = macro; while (strref next_line = macro.line()) { next_line = next_line.before_or_full(';'); next_line = next_line.before_or_full(c_comment); int term = next_line.find("<<<"); if (term < 0) term = next_line.find("EOM"); if (term >= 0) { strl_t macro_len = strl_t(next_line.get() + term - source.get()); source = source.get_substr(0, macro_len); break; } } left = macro; pMacro->macro = source; source.skip_whitespace(); } else if (end_macro_directive) { int f = -1; const strref endm("endm"); for (;;) { f = macro.find(endm, f+1); if (f<0) return ERROR_BAD_MACRO_FORMAT; if (f == 0 || strref::is_ws(macro[f - 1])) break; } pMacro->macro = macro.get_substr(0, f); macro += f; macro.line(); left = macro; } else { int pos_bracket = macro.find('{'); if (pos_bracket < 0) { pMacro->macro = strref(); return ERROR_BAD_MACRO_FORMAT; } strref source = macro + pos_bracket; strref macro_body = source.scoped_block_skip(); pMacro->macro = strref(macro.get(), pos_bracket + macro_body.get_len() + 2); source.skip_whitespace(); left = source; } pMacro->source_name = source_name; pMacro->source_file = source_file; pMacro->params_first_line = params_first_line; return STATUS_OK; } // Compile in a macro StatusCode Asm::BuildMacro(Macro &m, strref arg_list) { strref macro_src = m.macro, params; if (m.params_first_line) { if (end_macro_directive || Merlin()) params = macro_src.line(); else { params = macro_src.before('{'); macro_src += params.get_len(); } } else params = (macro_src[0] == '(' ? macro_src.scoped_block_skip() : strref()); params.trim_whitespace(); arg_list.trim_whitespace(); if (Merlin()) { // need to include comment field because separator is ; if (contextStack.curr().read_source.is_substr(arg_list.get())) arg_list = (contextStack.curr().read_source + strl_t(arg_list.get()-contextStack.curr().read_source.get()) ).line(); arg_list = arg_list.before_or_full(c_comment).get_trimmed_ws(); strref arg = arg_list; strown<16> tag; int t_max = 16; int dSize = 0; for (int t=1; t 0) { for (int skip = 1; skip < t; skip++) args.split_token_trim(';'); strref a = args.split_token_trim(';'); macexp.exchange(offs, tag_mac.get_len(), a); pos += a.get_len(); success = true; } } if (!success) return ERROR_MACRO_ARGUMENT; } } PushContext(m.source_name, macexp.get_strref(), macexp.get_strref()); return STATUS_OK; } else return ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION; } else if (params) { if (arg_list[0]=='(') arg_list = arg_list.scoped_block_skip(); strref pchk = params; strref arg = arg_list; int dSize = 0; char token = arg_list.find(',')>=0 ? ',' : ' '; char token_macro = m.params_first_line && params.find(',') < 0 ? ' ' : ','; while (strref param = pchk.split_token_trim(token_macro)) { strref a = arg.split_token_trim(token); if (param.get_len() < a.get_len()) { int count = macro_src.substr_case_count(param); dSize += count * ((int)a.get_len() - (int)param.get_len()); } } int mac_size = (int)macro_src.get_len() + dSize + 32; if (char *buffer = (char*)malloc(mac_size)) { loadedData.push_back(buffer); strovl macexp(buffer, mac_size); macexp.copy(macro_src); while (strref param = params.split_token_trim(token_macro)) { strref a = arg_list.split_token_trim(token); macexp.replace_bookend(param, a, label_end_char_range); } PushContext(m.source_name, macexp.get_strref(), macexp.get_strref()); return STATUS_OK; } else return ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION; } PushContext(m.source_name, m.source_file, macro_src); return STATUS_OK; } // // // STRUCTS AND ENUMS // // // Enums are Structs in disguise StatusCode Asm::BuildEnum(strref name, strref declaration) { uint32_t hash = name.fnv1a(); uint32_t ins = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); LabelStruct *pEnum = nullptr; while (ins < labelStructs.count() && labelStructs.getKey(ins)==hash) { if (name.same_str_case(labelStructs.getValue(ins).name)) { pEnum = labelStructs.getValues() + ins; break; } ++ins; } if (pEnum) return ERROR_STRUCT_ALREADY_DEFINED; labelStructs.insert(ins, hash); pEnum = labelStructs.getValues() + ins; pEnum->name = name; pEnum->first_member = (uint16_t)structMembers.size(); pEnum->numMembers = 0; pEnum->size = 0; // enums are 0 sized int value = 0; struct EvalContext etx; SetEvalCtxDefaults(etx); while (strref line = declaration.line()) { line = line.before_or_full(','); line.trim_whitespace(); strref member_name = line.split_token_trim('='); line = line.before_or_full(';').before_or_full(c_comment).get_trimmed_ws(); if (line) { StatusCode error = EvalExpression(line, etx, value); if (error == STATUS_NOT_READY || error == STATUS_XREF_DEPENDENT) return ERROR_ENUM_CANT_BE_ASSEMBLED; else if (error != STATUS_OK) return error; } struct MemberOffset member; member.offset = (uint16_t)value; member.name = member_name; member.name_hash = member.name.fnv1a(); member.sub_struct = strref(); structMembers.push_back(member); ++value; pEnum->numMembers++; } return STATUS_OK; } StatusCode Asm::BuildStruct(strref name, strref declaration) { uint32_t hash = name.fnv1a(); uint32_t ins = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); LabelStruct *pStruct = nullptr; while (ins < labelStructs.count() && labelStructs.getKey(ins)==hash) { if (name.same_str_case(labelStructs.getValue(ins).name)) { pStruct = labelStructs.getValues() + ins; break; } ++ins; } if (pStruct) return ERROR_STRUCT_ALREADY_DEFINED; labelStructs.insert(ins, hash); pStruct = labelStructs.getValues() + ins; pStruct->name = name; pStruct->first_member = (uint16_t)structMembers.size(); uint32_t byte_hash = struct_byte.fnv1a(); uint32_t word_hash = struct_word.fnv1a(); uint16_t size = 0; uint16_t member_count = 0; while (strref line = declaration.line()) { line.trim_whitespace(); strref type = line.split_label(); if (!type) continue; line.skip_whitespace(); uint32_t type_hash = type.fnv1a(); uint16_t type_size = 0; LabelStruct *pSubStruct = nullptr; if (type_hash==byte_hash && struct_byte.same_str_case(type)) type_size = 1; else if (type_hash==word_hash && struct_word.same_str_case(type)) type_size = 2; else { uint32_t index = FindLabelIndex(type_hash, labelStructs.getKeys(), labelStructs.count()); while (index < labelStructs.count() && labelStructs.getKey(index)==type_hash) { if (type.same_str_case(labelStructs.getValue(index).name)) { pSubStruct = labelStructs.getValues() + index; break; } ++index; } if (!pSubStruct) { labelStructs.remove(ins); return ERROR_REFERENCED_STRUCT_NOT_FOUND; } type_size = pSubStruct->size; } // add the new member, don't grow vectors one at a time. if (structMembers.size() == structMembers.capacity()) structMembers.reserve(structMembers.size() + 64); struct MemberOffset member; member.offset = size; member.name = line.get_label(); member.name_hash = member.name.fnv1a(); member.sub_struct = pSubStruct ? pSubStruct->name : strref(); structMembers.push_back(member); size += type_size; member_count++; } // add a trailing member of 0 bytes to access the size of the structure { struct MemberOffset bytes_member; bytes_member.offset = size; bytes_member.name = "bytes"; bytes_member.name_hash = bytes_member.name.fnv1a(); bytes_member.sub_struct = strref(); structMembers.push_back(bytes_member); member_count++; } pStruct->numMembers = member_count; pStruct->size = size; return STATUS_OK; } // Evaluate a struct offset as if it was a label StatusCode Asm::EvalStruct(strref name, int &value) { LabelStruct *pStruct = nullptr; uint16_t offset = 0; while (strref struct_seg = name.split_token('.')) { strref sub_struct = struct_seg; uint32_t seg_hash = struct_seg.fnv1a(); if (pStruct) { struct MemberOffset *member = &structMembers[pStruct->first_member]; bool found = false; for (int i = 0; inumMembers; i++) { if (member->name_hash == seg_hash && member->name.same_str_case(struct_seg)) { offset += member->offset; sub_struct = member->sub_struct; found = true; break; } ++member; } if (!found) return ERROR_REFERENCED_STRUCT_NOT_FOUND; } if (sub_struct) { uint32_t hash = sub_struct.fnv1a(); uint32_t index = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); while (index < labelStructs.count() && labelStructs.getKey(index)==hash) { if (sub_struct.same_str_case(labelStructs.getValue(index).name)) { pStruct = labelStructs.getValues() + index; break; } ++index; } } else if (name) return STATUS_NOT_STRUCT; } if (pStruct == nullptr) return STATUS_NOT_STRUCT; value = offset; return STATUS_OK; } // // // EXPRESSIONS AND LATE EVALUATION // // int Asm::ReptCnt() const { return contextStack.curr().repeat_total - contextStack.curr().repeat; } void Asm::SetEvalCtxDefaults(struct EvalContext &etx) { etx.pc = CurrSection().GetPC(); // current address at point of eval etx.scope_pc = scope_address[scope_depth]; // current scope open at point of eval etx.scope_end_pc = -1; // late scope closure after eval etx.scope_depth = scope_depth; // scope depth for eval (must match current for scope_end_pc to eval) etx.relative_section = -1; // return can be relative to this section etx.file_ref = -1; // can access private label from this file or -1 etx.rept_cnt = ReptCnt(); // current repeat counter } // Get a single token from a merlin expression EvalOperator Asm::RPNToken_Merlin(strref &expression, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value) { char c = expression.get_first(); switch (c) { case '$': ++expression; value = (int)expression.ahextoui_skip(); return EVOP_VAL; case '-': ++expression; return EVOP_SUB; case '+': ++expression; return EVOP_ADD; case '*': // asterisk means both multiply and current PC, disambiguate! ++expression; if (expression[0] == '*') return EVOP_STP; // double asterisks indicates comment else if (prev_op==EVOP_VAL || prev_op==EVOP_RPR) return EVOP_MUL; value = etx.pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; case '/': ++expression; return EVOP_DIV; case '>': if (expression.get_len() >= 2 && expression[1] == '>') { expression += 2; return EVOP_SHR; } ++expression; return EVOP_HIB; case '<': if (expression.get_len() >= 2 && expression[1] == '<') { expression += 2; return EVOP_SHL; } ++expression; return EVOP_LOB; case '%': // % means both binary and scope closure, disambiguate! if (expression[1]=='0' || expression[1]=='1') { ++expression; value = (int)expression.abinarytoui_skip(); return EVOP_VAL; } if (etx.scope_end_pc<0 || scope_depth != etx.scope_depth) return EVOP_NRY; ++expression; value = etx.scope_end_pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; case '|': case '.': ++expression; return EVOP_OR; // MERLIN: . is or, | is not used case '^': if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) { ++expression; return EVOP_EOR; } ++expression; return EVOP_BAB; case '&': ++expression; return EVOP_AND; case '(': if (prev_op!=EVOP_VAL) { ++expression; return EVOP_LPR; } return EVOP_STP; case ')': ++expression; return EVOP_RPR; case '"': if (expression[2] == '"') { value = expression[1]; expression += 3; return EVOP_VAL; } return EVOP_STP; case '\'': if (expression[2] == '\'') { value = expression[1]; expression += 3; return EVOP_VAL; } return EVOP_STP; case ',': case '?': default: { // MERLIN: ! is eor if (c == '!' && (prev_op == EVOP_VAL || prev_op == EVOP_RPR)) { ++expression; return EVOP_EOR; } else if (c == '!' && !(expression + 1).len_label()) { if (etx.scope_pc < 0) return EVOP_NRY; // ! by itself is current scope, !+label char is a local label ++expression; value = etx.scope_pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; } else if (expression.match_chars_str("0-9", "!a-zA-Z_")) { if (prev_op == EVOP_VAL) return EVOP_STP; // value followed by value doesn't make sense, stop value = expression.atoi_skip(); return EVOP_VAL; } else if (c == '!' || c == ']' || c==':' || strref::is_valid_label(c)) { if (prev_op == EVOP_VAL) return EVOP_STP; // a value followed by a value does not make sense, probably start of a comment (ORCA/LISA?) char e0 = expression[0]; int start_pos = (e0==']' || e0==':' || e0=='!' || e0=='.') ? 1 : 0; strref label = expression.split_range_trim(label_end_char_range_merlin, start_pos); Label *pLabel = pLabel = GetLabel(label, etx.file_ref); if (!pLabel) { StatusCode ret = EvalStruct(label, value); if (ret == STATUS_OK) return EVOP_VAL; if (ret != STATUS_NOT_STRUCT) return EVOP_ERR; // partial struct } if (!pLabel && label.same_str("rept")) { value = etx.rept_cnt; return EVOP_VAL; } if (!pLabel || !pLabel->evaluated) return EVOP_NRY; // this label could not be found (yet) value = pLabel->value; section = int16_t(pLabel->section); return EVOP_VAL; } else return EVOP_ERR; break; } } return EVOP_NONE; // shouldn't get here normally } // Get a single token from most non-apple II assemblers EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value, strref &subexp) { char c = exp.get_first(); switch (c) { case '$': ++exp; value = (int)exp.ahextoui_skip(); return EVOP_VAL; case '-': ++exp; return EVOP_SUB; case '+': ++exp; return EVOP_ADD; case '*': // asterisk means both multiply and current PC, disambiguate! ++exp; if (exp[0] == '*') return EVOP_STP; // double asterisks indicates comment else if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) return EVOP_MUL; value = etx.pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; case '/': ++exp; return EVOP_DIV; case '=': if (exp[1] == '=') { exp += 2; return EVOP_EQU; } return EVOP_STP; case '>': if (exp.get_len() >= 2 && exp[1] == '>') { exp += 2; return EVOP_SHR; } if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) { ++exp; if (exp[0] == '=') { ++exp; return EVOP_GTE; } return EVOP_GT; } ++exp; return EVOP_HIB; case '<': if (exp.get_len() >= 2 && exp[1] == '<') { exp += 2; return EVOP_SHL; } if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) { ++exp; if (exp[0] == '=') { ++exp; return EVOP_LTE; } return EVOP_LT; } ++exp; return EVOP_LOB; case '%': // % means both binary and scope closure, disambiguate! if (exp[1] == '0' || exp[1] == '1') { ++exp; value = (int)exp.abinarytoui_skip(); return EVOP_VAL; } if (etx.scope_end_pc<0 || scope_depth != etx.scope_depth) return EVOP_NRY; ++exp; value = etx.scope_end_pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; case '|': ++exp; return EVOP_OR; case '^': if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) { ++exp; return EVOP_EOR; } ++exp; return EVOP_BAB; case '&': ++exp; return EVOP_AND; case '(': if (prev_op != EVOP_VAL) { ++exp; return EVOP_LPR; } return EVOP_STP; case ')': ++exp; return EVOP_RPR; case ',': case '?': case '\'': return EVOP_STP; default: { // ! by itself is current scope, !+label char is a local label if (c == '!' && !(exp + 1).len_label()) { if (etx.scope_pc < 0) return EVOP_NRY; ++exp; value = etx.scope_pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; } else if (exp.match_chars_str("0-9", "!a-zA-Z_")) { if (prev_op == EVOP_VAL) return EVOP_STP; // value followed by value doesn't make sense, stop value = exp.atoi_skip(); return EVOP_VAL; } else if (c == '!' || c == ':' || c=='.' || c=='@' || strref::is_valid_label(c)) { if (prev_op == EVOP_VAL) return EVOP_STP; // a value followed by a value does not make sense, probably start of a comment (ORCA/LISA?) char e0 = exp[0]; int start_pos = (e0 == ':' || e0 == '!' || e0 == '.') ? 1 : 0; strref label = exp.split_range_trim(label_end_char_range, start_pos); Label *pLabel = pLabel = GetLabel(label, etx.file_ref); if (!pLabel) { StatusCode ret = EvalStruct(label, value); if (ret == STATUS_OK) return EVOP_VAL; if (ret != STATUS_NOT_STRUCT) return EVOP_ERR; // partial struct } if (!pLabel && label.same_str("rept")) { value = etx.rept_cnt; return EVOP_VAL; } if (!pLabel) { if (StringSymbol *pStr = GetString(label)) { subexp = pStr->get(); return EVOP_EXP; } } if (!pLabel || !pLabel->evaluated) return EVOP_NRY; // this label could not be found (yet) value = pLabel->value; section = int16_t(pLabel->section); return pLabel->reference ? EVOP_XRF : EVOP_VAL; } return EVOP_ERR; } } return EVOP_NONE; // shouldn't get here normally } // // EvalExpression // Uses the Shunting Yard algorithm to convert to RPN first // which makes the actual calculation trivial and avoids recursion. // https://en.wikipedia.org/wiki/Shunting-yard_algorithm // // Return: // STATUS_OK means value is completely evaluated // STATUS_NOT_READY means value could not be evaluated right now // ERROR_* means there is an error in the expression // // Max number of unresolved sections to evaluate in a single expression #define MAX_EVAL_SECTIONS 4 // determine if a scalar can be a shift static int mul_as_shift(int scalar) { int shift = 0; while (scalar > 1 && (scalar & 1) == 0) { shift++; scalar >>= 1; } return scalar == 1 ? shift : 0; } #define MAX_EXPR_STACK 2 StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, int &result) { int numValues = 0; int numOps = 0; strref expression_stack[MAX_EXPR_STACK]; int exp_sp = 0; char ops[MAX_EVAL_OPER]; // RPN expression int values[MAX_EVAL_VALUES]; // RPN values (in order of RPN EVOP_VAL operations) int16_t section_ids[MAX_EVAL_SECTIONS]; // local index of each referenced section int16_t section_val[MAX_EVAL_VALUES] = { 0 }; // each value can be assigned to one section, or -1 if fixed int16_t num_sections = 0; // number of sections in section_ids (normally 0 or 1, can be up to MAX_EVAL_SECTIONS) bool xrefd = false; values[0] = 0; // Initialize RPN if no expression { int sp = 0; char op_stack[MAX_EVAL_OPER]; EvalOperator prev_op = EVOP_NONE; expression.trim_whitespace(); while (expression || exp_sp) { int value = 0; int16_t section = -1, index_section = -1; EvalOperator op = EVOP_NONE; strref subexp; if (!expression && exp_sp) { expression = expression_stack[--exp_sp]; op = EVOP_RPR; } else if (Merlin()) op = RPNToken_Merlin(expression, etx, prev_op, section, value); else op = RPNToken(expression, etx, prev_op, section, value, subexp); if (op == EVOP_ERR) return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION; else if (op == EVOP_NRY) return STATUS_NOT_READY; else if (op == EVOP_EXP) { if (exp_sp >= MAX_EXPR_STACK) return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; expression_stack[exp_sp++] = expression; expression = subexp; op = EVOP_LPR; } else if (op == EVOP_XRF) { xrefd = true; op = EVOP_VAL; } if (section >= 0) { for (int s = 0; s= EVOP_EQU && prev_op <= EVOP_GTE) || (prev_op==EVOP_HIB || prev_op==EVOP_LOB)) { if (op==EVOP_SUB) op = EVOP_NEG; else if (op == EVOP_ADD) skip = true; } if (op == EVOP_SUB && sp && prev_op == EVOP_SUB) sp--; else { while (sp && !skip) { EvalOperator p = (EvalOperator)op_stack[sp-1]; if (p==EVOP_LPR || op>p) break; ops[numOps++] = (char)p; sp--; } op_stack[sp++] = (char)op; } } // check for out of bounds or unexpected input if (numValues==MAX_EVAL_VALUES) return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; else if (numOps==MAX_EVAL_OPER || sp==MAX_EVAL_OPER) return ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION; prev_op = op; expression.skip_whitespace(); } while (sp) { sp--; ops[numOps++] = op_stack[sp]; } } // Check if dependent on XREF'd symbol if (xrefd) return STATUS_XREF_DEPENDENT; // processing the result RPN will put the completed expression into values[0]. // values is used as both the queue and the stack of values since reads/writes won't // exceed itself. { int valIdx = 0; int ri = 0; // RPN index (value) int prev_val = values[0]; int shift_bits = 0; // special case for relative reference to low byte / high byte int16_t section_counts[MAX_EVAL_SECTIONS][MAX_EVAL_VALUES] = { 0 }; for (int o = 0; o ri--; values[ri - 1] = values[ri - 1] > values[ri]; break; case EVOP_LT: // < ri--; values[ri - 1] = values[ri - 1] < values[ri]; break; case EVOP_GTE: // >= ri--; values[ri - 1] = values[ri - 1] >= values[ri]; break; case EVOP_LTE: // >= ri--; values[ri - 1] = values[ri - 1] <= values[ri]; break; case EVOP_ADD: // + ri--; for (int i = 0; i1) { ri--; for (int i = 0; i=1) values[ri-1] = -values[ri-1]; break; case EVOP_MUL: // * ri--; for (int i = 0; i> ri--; for (int i = 0; i>= values[ri]; break; case EVOP_LOB: // low byte if (ri) values[ri-1] &= 0xff; break; case EVOP_HIB: if (ri) { shift_bits = -8; values[ri - 1] = values[ri - 1] >> 8; } break; case EVOP_BAB: if (ri) { shift_bits = -16; values[ri - 1] = (values[ri - 1] >> 16); } break; default: return ERROR_EXPRESSION_OPERATION; break; } if (shift_bits==0 && ri) prev_val = values[ri-1]; } int section_index = -1; bool curr_relative = false; // If relative to any section unless specifically interested in a relative value then return not ready for (int i = 0; i=0) return STATUS_NOT_READY; else if (etx.relative_section==section_ids[i]) curr_relative = true; else if (etx.relative_section>=0) return STATUS_NOT_READY; section_index = i; } } result = values[0]; if (section_index>=0 && !curr_relative) { lastEvalSection = section_ids[section_index]; lastEvalValue = prev_val; lastEvalShift = (int8_t)shift_bits; return STATUS_RELATIVE_SECTION; } } return STATUS_OK; } // if an expression could not be evaluated, add it along with // the action to perform if it can be evaluated later. void Asm::AddLateEval(int target, int pc, int scope_pc, strref expression, strref source_file, LateEval::Type type) { LateEval le; le.address = pc; le.scope = scope_pc; le.scope_depth = scope_depth; le.target = target; le.section = (int16_t)(&CurrSection() - &allSections[0]); le.rept = contextStack.curr().repeat_total - contextStack.curr().repeat; le.file_ref = -1; // current or xdef'd le.label.clear(); le.expression = expression; le.source_file = source_file; le.type = type; lateEval.push_back(le); } void Asm::AddLateEval(strref label, int pc, int scope_pc, strref expression, LateEval::Type type) { LateEval le; le.address = pc; le.scope = scope_pc; le.scope_depth = scope_depth; le.target = -1; le.label = label; le.section = (int16_t)(&CurrSection() - &allSections[0]); le.rept = contextStack.curr().repeat_total - contextStack.curr().repeat; le.file_ref = -1; // current or xdef'd le.expression = expression; le.source_file.clear(); le.type = type; lateEval.push_back(le); } // When a label is defined or a scope ends check if there are // any related late label evaluators that can now be evaluated. StatusCode Asm::CheckLateEval(strref added_label, int scope_end, bool print_missing_reference_errors) { bool evaluated_label = true; strref new_labels[MAX_LABELS_EVAL_ALL]; int num_new_labels = 0; if (added_label) new_labels[num_new_labels++] = added_label; bool all = !added_label; while (evaluated_label) { evaluated_label = false; std::vector::iterator i = lateEval.begin(); while (i != lateEval.end()) { int value = 0; // check if this expression is related to the late change (new label or end of scope) bool check = all || num_new_labels==MAX_LABELS_EVAL_ALL; for (int l = 0; lexpression.find(new_labels[l]) >= 0; if (!check && scope_end>0) { int gt_pos = 0; while (gt_pos>=0 && !check) { gt_pos = i->expression.find_at('%', gt_pos); if (gt_pos>=0) { if (i->expression[gt_pos+1]=='%') gt_pos++; else check = true; gt_pos++; } } } if (check) { struct EvalContext etx(i->address, i->scope, scope_end, i->type == LateEval::LET_BRANCH ? SectionId() : -1, i->rept); etx.scope_depth = i->scope_depth; etx.file_ref = i->file_ref; StatusCode ret = EvalExpression(i->expression, etx, value); if (ret == STATUS_OK || ret==STATUS_RELATIVE_SECTION) { // Check if target section merged with another section int trg = i->target; int sec = i->section; if (i->type != LateEval::LET_LABEL) { if (allSections[sec].IsMergedSection()) { trg += allSections[sec].merged_offset; sec = allSections[sec].merged_section; } } bool resolved = true; switch (i->type) { case LateEval::LET_BYTE: if (ret==STATUS_RELATIVE_SECTION) { if (i->section<0) resolved = false; else { allSections[sec].AddReloc(lastEvalValue, trg, lastEvalSection, 1, lastEvalShift); value = 0; } } if (trg >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetByte(trg, value); break; case LateEval::LET_ABS_REF: if (ret==STATUS_RELATIVE_SECTION) { if (i->section<0) resolved = false; else { allSections[sec].AddReloc(lastEvalValue, trg, lastEvalSection, 2, lastEvalShift); value = 0; } } if ((trg+1) >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetWord(trg, value); break; case LateEval::LET_ABS_L_REF: if (ret==STATUS_RELATIVE_SECTION) { if (i->section<0) resolved = false; else { allSections[sec].AddReloc(lastEvalValue, trg, lastEvalSection, 3, lastEvalShift); value = 0; } } if ((trg+2) >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetTriple(trg, value); break; case LateEval::LET_ABS_4_REF: if (ret==STATUS_RELATIVE_SECTION) { if (i->section<0) resolved = false; else { allSections[sec].AddReloc(lastEvalValue, trg, lastEvalSection, 4, lastEvalShift); value = 0; } } if ((trg+3) >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetQuad(trg, value); break; case LateEval::LET_BRANCH: value -= i->address+1; if (value<-128 || value>127) { i = lateEval.erase(i); return ERROR_BRANCH_OUT_OF_RANGE; } if (trg >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetByte(trg, value); break; case LateEval::LET_BRANCH_16: value -= i->address+2; if (trg >= allSections[sec].size()) return ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE; allSections[sec].SetWord(trg, value); break; case LateEval::LET_LABEL: { Label *label = GetLabel(i->label, i->file_ref); if (!label) return ERROR_LABEL_MISPLACED_INTERNAL; label->value = value; label->evaluated = true; label->section = ret==STATUS_RELATIVE_SECTION ? i->section : -1; if (num_new_labelslabel_name; evaluated_label = true; char f = i->label[0], l = i->label.get_last(); LabelAdded(label, f=='.' || f=='!' || f=='@' || f==':' || l=='$'); break; } default: break; } if (resolved) i = lateEval.erase(i); } else { if (print_missing_reference_errors && ret!=STATUS_XREF_DEPENDENT) { PrintError(i->expression, ret); error_encountered = true; } ++i; } } else ++i; } all = false; added_label.clear(); } return STATUS_OK; } // // // LABELS // // // Get a label record if it exists Label *Asm::GetLabel(strref label) { uint32_t label_hash = label.fnv1a(); uint32_t index = FindLabelIndex(label_hash, labels.getKeys(), labels.count()); while (index < labels.count() && label_hash == labels.getKey(index)) { if (label.same_str(labels.getValue(index).label_name)) return labels.getValues() + index; index++; } return nullptr; } // Get a protected label record from a file if it exists Label *Asm::GetLabel(strref label, int file_ref) { if (file_ref>=0 && file_ref<(int)externals.size()) { ExtLabels &labs = externals[file_ref]; uint32_t label_hash = label.fnv1a(); uint32_t index = FindLabelIndex(label_hash, labs.labels.getKeys(), labs.labels.count()); while (index < labs.labels.count() && label_hash == labs.labels.getKey(index)) { if (label.same_str(labs.labels.getValue(index).label_name)) return labs.labels.getValues() + index; index++; } } return GetLabel(label); } // If exporting labels, append this label to the list void Asm::LabelAdded(Label *pLabel, bool local) { if (pLabel && pLabel->evaluated) { if (map.size() == map.capacity()) map.reserve(map.size() + 256); MapSymbol sym; sym.name = pLabel->label_name; sym.section = (int16_t)(pLabel->section); sym.value = pLabel->value; sym.local = local; pLabel->mapIndex = pLabel->evaluated ? -1 : (int)map.size(); map.push_back(sym); } } // Add a label entry Label* Asm::AddLabel(uint32_t hash) { uint32_t index = FindLabelIndex(hash, labels.getKeys(), labels.count()); labels.insert(index, hash); return labels.getValues() + index; } // mark a label as a local label void Asm::MarkLabelLocal(strref label, bool scope_reserve) { LocalLabelRecord rec; rec.label = label; rec.scope_depth = scope_depth; rec.scope_reserve = scope_reserve; localLabels.push_back(rec); } // find all local labels or up to given scope level and remove them StatusCode Asm::FlushLocalLabels(int scope_exit) { StatusCode status = STATUS_OK; // iterate from end of local label records and early out if the label scope is lower than the current. std::vector::iterator i = localLabels.end(); while (i!=localLabels.begin()) { --i; if (i->scope_depth < scope_depth) break; strref label = i->label; StatusCode this_status = CheckLateEval(label); if (this_status>FIRST_ERROR) status = this_status; if (!i->scope_reserve || i->scope_depth<=scope_exit) { uint32_t index = FindLabelIndex(label.fnv1a(), labels.getKeys(), labels.count()); while (indexscope_reserve) { if (LabelPool *pool = GetLabelPool(labels.getValue(index).pool_name)) { pool->Release(labels.getValue(index).value); break; } } labels.remove(index); break; } ++index; } i = localLabels.erase(i); } } return status; } // Get a label pool by name LabelPool* Asm::GetLabelPool(strref pool_name) { uint32_t pool_hash = pool_name.fnv1a(); uint32_t ins = FindLabelIndex(pool_hash, labelPools.getKeys(), labelPools.count()); while (ins < labelPools.count() && pool_hash == labelPools.getKey(ins)) { if (pool_name.same_str(labelPools.getValue(ins).pool_name)) { return &labelPools.getValue(ins); } ins++; } return nullptr; } // Add a label pool StatusCode Asm::AddLabelPool(strref name, strref args) { uint32_t pool_hash = name.fnv1a(); uint32_t ins = FindLabelIndex(pool_hash, labelPools.getKeys(), labelPools.count()); uint32_t index = ins; while (index < labelPools.count() && pool_hash == labelPools.getKey(index)) { if (name.same_str(labelPools.getValue(index).pool_name)) return ERROR_LABEL_POOL_REDECLARATION; index++; } // check that there is at least one valid address int ranges = 0; int num32 = 0; uint16_t aRng[256]; struct EvalContext etx; SetEvalCtxDefaults(etx); while (strref arg = args.split_token_trim(',')) { strref start = arg[0]=='(' ? arg.scoped_block_skip() : arg.split_token_trim('-'); int addr0 = 0, addr1 = 0; if (STATUS_OK != EvalExpression(start, etx, addr0)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; if (STATUS_OK != EvalExpression(arg, etx, addr1)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; if (addr1<=addr0 || addr0<0) return ERROR_POOL_RANGE_EXPRESSION_EVAL; aRng[ranges++] = (uint16_t)addr0; aRng[ranges++] = (uint16_t)addr1; num32 += (addr1-addr0+15)>>4; if (ranges >(MAX_POOL_RANGES*2) || num32 > ((MAX_POOL_BYTES+15)>>4)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; } if (!ranges) return ERROR_POOL_RANGE_EXPRESSION_EVAL; LabelPool pool; pool.pool_name = name; pool.numRanges = (int16_t)(ranges>>1); pool.scopeDepth = (int16_t)scope_depth; memset(pool.usedMap, 0, sizeof(uint32_t) * num32); for (int r = 0; r 0) { label = type.split( sz ); ++type; switch (strref::tolower(type.get_first())) { case 'l': bytes = 4; break; case 't': bytes = 3; break; case 'd': case 'w': bytes = 2; break; } } if (GetLabel(label)) { return ERROR_POOL_LABEL_ALREADY_DEFINED; } uint32_t addr; StatusCode error = pool.Reserve(bytes, addr); if (error != STATUS_OK) return error; Label *pLabel = AddLabel(label.fnv1a()); pLabel->label_name = label; pLabel->pool_name = pool.pool_name; pLabel->evaluated = true; pLabel->section = -1; // pool labels are section-less pLabel->value = addr; pLabel->pc_relative = true; pLabel->constant = true; pLabel->external = false; pLabel->reference = false; if (label[ 0 ] == '.' || label[ 0 ] == '@' || label[ 0 ] == '!' || label[ 0 ] == ':' || label.get_last() == '$') { MarkLabelLocal( label, true ); } LabelAdded(pLabel, !!pool.scopeDepth); return error; } // Request a label from a pool StatusCode LabelPool::Reserve(int numBytes, uint32_t &ret_addr) { uint32_t *map = usedMap; uint16_t *pRanges = ranges; for (int r = 0; r=a0 && sequence= a0) { if ((m & chk)==0) { sequence++; if (sequence == numBytes) break; } else sequence = 0; --addr; m <<= 2; } } if (sequence == numBytes) { uint32_t index = (a1-addr-numBytes); uint32_t *addr_map = range_map + (index>>4); uint32_t m = numBytes << (index << 1); for (int b = 0; b>= 30; addr_map++; } else { m = _m; } } ret_addr = addr; return STATUS_OK; } } return ERROR_OUT_OF_LABELS_IN_POOL; } // Release a label from a pool (at scope closure) StatusCode LabelPool::Release(uint32_t addr) { uint32_t *map = usedMap; uint16_t *pRanges = ranges; for (int r = 0; r=a0 && addr>4; index &= 0xf; uint32_t u = *map, m = 3 << (index << 1); uint32_t b = u & m, bytes = b >> (index << 1); if (bytes) { for (uint32_t f = 0; f>2; if (!_m) { m <<= 30; *map-- = u; } else { m = _m; } } *map = u; return STATUS_OK; } else return ERROR_INTERNAL_LABEL_POOL_ERROR; } else map += (a1-a0+15)>>4; } return STATUS_OK; } // Check if a label is marked as an xdef bool Asm::MatchXDEF(strref label) { uint32_t hash = label.fnv1a(); uint32_t pos = FindLabelIndex(hash, xdefs.getKeys(), xdefs.count()); while (pos < xdefs.count() && xdefs.getKey(pos) == hash) { if (label.same_str_case(xdefs.getValue(pos))) return true; ++pos; } return false; } // assignment of label (