// // asm6502.cpp // // // Created by Carl-Henrik Skårstedt on 9/23/15. // // // A simple 6502 assembler // // // The MIT License (MIT) // // Copyright (c) 2015 Carl-Henrik Skårstedt // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software // and associated documentation files (the "Software"), to deal in the Software without restriction, // including without limitation the rights to use, copy, modify, merge, publish, distribute, // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or // substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // // Details, source and documentation at https://github.com/Sakrac/Asm6502. // // "struse.h" can be found at https://github.com/Sakrac/struse, only the header file is required. // #define _CRT_SECURE_NO_WARNINGS // Windows shenanigans #define STRUSE_IMPLEMENTATION // include implementation of struse in this file #include "struse.h" // https://github.com/Sakrac/struse/blob/master/struse.h #include #include #include // if the number of resolved labels exceed this in one late eval then skip // checking for relevance and just eval all unresolved expressions. #define MAX_LABELS_EVAL_ALL 16 // Max number of nested scopes (within { and }) #define MAX_SCOPE_DEPTH 32 // Max number of nested conditional expressions #define MAX_CONDITIONAL_DEPTH 64 // The maximum complexity of expressions to be evaluated #define MAX_EVAL_VALUES 32 #define MAX_EVAL_OPER 64 // Max capacity of each label pool #define MAX_POOL_RANGES 4 #define MAX_POOL_BYTES 128 // Internal status and error type enum StatusCode { STATUS_OK, // everything is fine STATUS_NOT_READY, // label could not be evaluated at this time STATUS_NOT_STRUCT, // return is not a struct. ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION, ERROR_TOO_MANY_VALUES_IN_EXPRESSION, ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION, ERROR_UNBALANCED_RIGHT_PARENTHESIS, ERROR_EXPRESSION_OPERATION, ERROR_EXPRESSION_MISSING_VALUES, ERROR_INSTRUCTION_NOT_ZP, ERROR_INVALID_ADDRESSING_MODE_FOR_BRANCH, ERROR_BRANCH_OUT_OF_RANGE, ERROR_LABEL_MISPLACED_INTERNAL, ERROR_BAD_ADDRESSING_MODE, ERROR_UNEXPECTED_CHARACTER_IN_ADDRESSING_MODE, ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT, ERROR_MODIFYING_CONST_LABEL, ERROR_OUT_OF_LABELS_IN_POOL, ERROR_INTERNAL_LABEL_POOL_ERROR, ERROR_POOL_RANGE_EXPRESSION_EVAL, ERROR_LABEL_POOL_REDECLARATION, ERROR_POOL_LABEL_ALREADY_DEFINED, ERROR_STRUCT_ALREADY_DEFINED, ERROR_REFERENCED_STRUCT_NOT_FOUND, ERROR_BAD_TYPE_FOR_DECLARE_CONSTANT, ERROR_REPT_COUNT_EXPRESSION, ERROR_STOP_PROCESSING_ON_HIGHER, // errors greater than this will stop execution ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY, ERROR_TOO_DEEP_SCOPE, ERROR_UNBALANCED_SCOPE_CLOSURE, ERROR_BAD_MACRO_FORMAT, ERROR_ALIGN_MUST_EVALUATE_IMMEDIATELY, ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION, ERROR_CONDITION_COULD_NOT_BE_RESOLVED, ERROR_ENDIF_WITHOUT_CONDITION, ERROR_ELSE_WITHOUT_IF, ERROR_STRUCT_CANT_BE_ASSEMBLED, ERROR_UNTERMINATED_CONDITION, ERROR_REPT_MISSING_SCOPE, STATUSCODE_COUNT }; // The following strings are in the same order as StatusCode const char *aStatusStrings[STATUSCODE_COUNT] = { "ok", "not ready", "name is not a struct", "Unexpected character in expression", "Too many values in expression", "Too many operators in expression", "Unbalanced right parenthesis in expression", "Expression operation", "Expression missing values", "Instruction can not be zero page", "Invalid addressing mode for branch instruction", "Branch out of range", "Internal label organization mishap", "Bad addressing mode", "Unexpected character in addressing mode", "Unexpected label assignment format", "Changing value of label that is constant", "Out of labels in pool", "Internal label pool release confusion", "Label pool range evaluation failed", "Label pool was redeclared within its scope", "Pool label already defined", "Struct already defined", "Referenced struct not found", "Declare constant type not recognized (dc.?)", "rept count expression could not be evaluated", "Errors after this point will stop execution", "Target address must evaluate immediately for this operation", "Scoping is too deep", "Unbalanced scope closure", "Unexpected macro formatting", "Align must evaluate immediately", "Out of memory for macro expansion", "Conditional could not be resolved", "#endif encountered outside conditional block", "#else or #elif outside conditional block", "Struct can not be assembled as is", "Conditional assembly (#if/#ifdef) was not terminated in file or macro", "rept is missing a scope ('{ ... }')", }; // Assembler directives enum AssemblerDirective { AD_ORG, // ORG: Assemble as if loaded at this address AD_LOAD, // LOAD: If applicable, instruct to load at this address AD_ALIGN, // ALIGN: Add to address to make it evenly divisible by this AD_MACRO, // MACRO: Create a macro AD_EVAL, // EVAL: Print expression to stdout during assemble AD_BYTES, // BYTES: Add 8 bit values to output AD_WORDS, // WORDS: Add 16 bit values to output AD_DC, // DC.B/DC.W: Declare constant (same as BYTES/WORDS) AD_TEXT, // TEXT: Add text to output AD_INCLUDE, // INCLUDE: Load and assemble another file at this address AD_INCBIN, // INCBIN: Load and directly insert another file at this address AD_CONST, // CONST: Prevent a label from mutating during assemble AD_LABEL, // LABEL: Create a mutable label (optional) AD_INCSYM, // INCSYM: Reference labels from another assemble AD_LABPOOL, // POOL: Create a pool of addresses to assign as labels dynamically AD_IF, // #IF: Conditional assembly follows based on expression AD_IFDEF, // #IFDEF: Conditional assembly follows based on label defined or not AD_ELSE, // #ELSE: Otherwise assembly AD_ELIF, // #ELIF: Otherwise conditional assembly follows AD_ENDIF, // #ENDIF: End a block of #IF/#IFDEF AD_STRUCT, // STRUCT: Declare a set of labels offset from a base address AD_REPT, // REPT: Repeat the assembly of the bracketed code a number of times AD_INCDIR, // INCDIR: Add a folder to search for include files }; // Operators are either instructions or directives enum OperationType { OT_NONE, OT_MNEMONIC, OT_DIRECTIVE }; // Opcode encoding typedef struct { unsigned int op_hash; unsigned char group; // group # unsigned char index; // ground index unsigned char type; // mnemonic or } OP_ID; // // 6502 instruction encoding according to this page // http://www.llx.com/~nparker/a2/opcodes.html // decoded instruction: // XXY10000 for branches // AAABBBCC for CC=00, 01, 10 // and some custom ops // enum AddressingMode { AM_REL_ZP_X, // 0 (zp,x) AM_ZP, // 1 zp AM_IMMEDIATE, // 2 #$hh AM_ABSOLUTE, // 3 $hhhh AM_REL_ZP_Y, // 4 (zp),y AM_ZP_X, // 5 zp,x AM_ABSOLUTE_Y, // 6 $hhhh,y AM_ABSOLUTE_X, // 7 $hhhh,x AM_RELATIVE, // 8 ($xxxx) AM_ACCUMULATOR, // 9 A AM_NONE, // 10 AM_INVALID, // 11 }; // How instruction argument is encoded enum CODE_ARG { CA_NONE, // single byte instruction CA_ONE_BYTE, // instruction carries one byte CA_TWO_BYTES, // instruction carries two bytes CA_BRANCH // instruction carries a relative address }; // opcode groups enum OP_GROUP { OPG_SUBROUT, OPG_CC01, OPG_CC10, OPG_STACK, OPG_BRANCH, OPG_FLAG, OPG_CC00, OPG_TRANS }; // opcode exception indices enum OP_INDICES { OPI_JSR = 1, OPI_LDX = 5, OPI_STX = 4, OPI_STA = 4, OPI_JMP = 1, }; #define RELATIVE_JMP_DELTA 0x20 // opcode names in groups (prefix by group size) const char aInstr[] = { "BRK,JSR,RTI,RTS\n" "ORA,AND,EOR,ADC,STA,LDA,CMP,SBC\n" "ASL,ROL,LSR,ROR,STX,LDX,DEC,INC\n" "PHP,PLP,PHA,PLA,DEY,TAY,INY,INX\n" "BPL,BMI,BVC,BVS,BCC,BCS,BNE,BEQ\n" "CLC,SEC,CLI,SEI,TYA,CLV,CLD,SED\n" "BIT,JMP,,STY,LDY,CPY,CPX\n" "TXA,TXS,TAX,TSX,DEX,,NOP" }; // group # + index => base opcode const unsigned char aMulAddGroup[][2] = { { 0x20,0x00 }, { 0x20,0x01 }, { 0x20,0x02 }, { 0x20,0x08 }, { 0x20,0x10 }, { 0x20,0x18 }, { 0x20,0x20 }, { 0x10,0x8a } }; char aCC00Modes[] = { AM_IMMEDIATE, AM_ZP, AM_INVALID, AM_ABSOLUTE, AM_INVALID, AM_ZP_X, AM_INVALID, AM_ABSOLUTE_X }; char aCC01Modes[] = { AM_REL_ZP_X, AM_ZP, AM_IMMEDIATE, AM_ABSOLUTE, AM_REL_ZP_Y, AM_ZP_X, AM_ABSOLUTE_X, AM_ABSOLUTE_Y }; char aCC10Modes[] = { AM_IMMEDIATE, AM_ZP, AM_NONE, AM_ABSOLUTE, AM_INVALID, AM_ZP_X, AM_INVALID, AM_ABSOLUTE_X }; unsigned char CC00ModeAdd[] = { 0xff, 4, 0, 12, 0xff, 20, 0xff, 28 }; unsigned char CC00Mask[] = { 0x0a, 0x08, 0x08, 0x2a, 0xae, 0x0e, 0x0e }; unsigned char CC10ModeAdd[] = { 0xff, 4, 0, 12, 0xff, 20, 0xff, 28 }; unsigned char CC10Mask[] = { 0xaa, 0xaa, 0xaa, 0xaa, 0x2a, 0xae, 0xaa, 0xaa }; // hardtexted strings static const strref c_comment("//"); static const strref word_char_range("!0-9a-zA-Z_@$!#"); static const strref label_char_range("!0-9a-zA-Z_@$!."); static const strref keyword_equ("equ"); static const strref str_label("label"); static const strref str_const("const"); static const strref struct_byte("byte"); static const strref struct_word("word"); // Binary search over an array of unsigned integers, may contain multiple instances of same key unsigned int FindLabelIndex(unsigned int hash, unsigned int *table, unsigned int count) { unsigned int max = count; unsigned int first = 0; while (count!=first) { int index = (first+count)/2; unsigned int read = table[index]; if (hash==read) { while (index && table[index-1]==hash) index--; // guarantee first identical index returned on match return index; } else if (hash>read) first = index+1; else count = index; } if (counthash) count--; return count; } // // // ASSEMBLER STATE // // // pairArray is basically two vectors sharing a size without constructors on growth or insert template class pairArray { protected: H *keys; V *values; unsigned int _count; unsigned int _capacity; public: pairArray() : keys(nullptr), values(nullptr), _count(0), _capacity(0) {} void reserve(unsigned int size) { if (size>_capacity) { H *new_keys = (H*)malloc(sizeof(H) * size); if (!new_keys) { return; } V *new_values = (V*)malloc(sizeof(V) * size); if (!new_values) { free(new_keys); return; } if (keys && values) { memcpy(new_keys, keys, sizeof(H) * _count); memcpy(new_values, values, sizeof(V) * _count); free(keys); free(values); } keys = new_keys; values = new_values; _capacity = size; } } bool insert(unsigned int pos) { if (pos>_count) return false; if (_count==_capacity) reserve(_capacity+64); if (pos<_count) { memmove(keys+pos+1, keys+pos, sizeof(H) * (_count-pos)); memmove(values+pos+1, values+pos, sizeof(V) * (_count-pos)); } memset(keys+pos, 0, sizeof(H)); memset(values+pos, 0, sizeof(V)); _count++; return true; } bool insert(unsigned int pos, H key) { if (insert(pos)) { keys[pos] = key; return true; } return false; } void remove(unsigned int pos) { if (pos<_count) { _count--; if (pos<_count) { memmove(keys+pos, keys+pos+1, sizeof(H) * (_count-pos)); memmove(values+pos, values+pos+1, sizeof(V) * (_count-pos)); } } } H* getKeys() { return keys; } H& getKey(unsigned int pos) { return keys[pos]; } V* getValues() { return values; } V& getValue(unsigned int pos) { return values[pos]; } unsigned int count() const { return _count; } unsigned int capacity() const { return _capacity; } void clear() { if (keys!=nullptr) free(keys); keys = nullptr; if (values!=nullptr) free(values); values = nullptr; _capacity = 0; _count = 0; } ~pairArray() { clear(); } }; // Data related to a label typedef struct { public: strref label_name; // the name of this label strref expression; // the expression of this label (optional, if not possible to evaluate yet) int value; bool evaluated; // a value may not yet be evaluated bool zero_page; // addresses known to be zero page bool pc_relative; // this is an inline label describing a point in the code bool constant; // the value of this label can not change } Label; // If an expression can't be evaluated immediately, this is required // to reconstruct the result when it can be. typedef struct { enum Type { // When an expression is evaluated late, determine how to encode the result LET_LABEL, // this evaluation applies to a label and not memory LET_ABS_REF, // calculate an absolute address and store at 0, +1 LET_BRANCH, // calculate a branch offset and store at this address LET_BYTE, // calculate a byte and store at this address }; unsigned char* target; // offset into output buffer int address; // current pc int scope; // scope pc strref label; // valid if this is not a target but another label strref expression; strref source_file; Type type; } LateEval; // A macro is a text reference to where it was defined typedef struct { strref name; strref macro; strref source_name; // source file name (error output) strref source_file; // entire source file (req. for line #) } Macro; // All local labels are removed when a global label is defined but some when a scope ends typedef struct { strref label; int scope_depth; bool scope_reserve; // not released for global label, only scope } LocalLabelRecord; // Label pools allows C like stack frame label allocation typedef struct { strref pool_name; short numRanges; // normally 1 range, support multiple for ease of use short scopeDepth; // Required for scope closure cleanup unsigned short ranges[MAX_POOL_RANGES*2]; // 2 shorts per range unsigned int usedMap[(MAX_POOL_BYTES+15)>>4]; // 2 bits per byte to store byte count of label StatusCode Reserve(int numBytes, unsigned int &addr); StatusCode Release(unsigned int addr); } LabelPool; // One member of a label struct struct MemberOffset { unsigned short offset; unsigned int name_hash; strref name; strref sub_struct; }; // Label struct typedef struct { strref name; unsigned short first_member; unsigned short numMembers; unsigned short size; } LabelStruct; // Source context is current file (include file, etc.) or current macro. typedef struct { strref source_name; // source file name (error output) strref source_file; // entire source file (req. for line #) strref code_segment; // the segment of the file for this context strref read_source; // current position/length in source file strref next_source; // next position/length in source file int repeat; // how many times to repeat this code segment void restart() { read_source = code_segment; } bool complete() { repeat--; return repeat <= 0; } } SourceContext; // Context stack is a stack of currently processing text class ContextStack { private: std::vector stack; SourceContext *currContext; public: ContextStack() : currContext(nullptr) { stack.reserve(32); } SourceContext& curr() { return *currContext; } void push(strref src_name, strref src_file, strref code_seg, int rept=1) { if (currContext) currContext->read_source = currContext->next_source; SourceContext context; context.source_name = src_name; context.source_file = src_file; context.code_segment = code_seg; context.read_source = code_seg; context.next_source = code_seg; context.repeat = rept; stack.push_back(context); currContext = &stack[stack.size()-1]; } void pop() { stack.pop_back(); currContext = stack.size() ? &stack[stack.size()-1] : nullptr; } bool has_work() { return currContext!=nullptr; } }; // The state of the assembler class Asm { public: pairArray labels; pairArray macros; pairArray labelPools; pairArray labelStructs; std::vector lateEval; std::vector localLabels; std::vector loadedData; // free when assembler is completed std::vector structMembers; // labelStructs refer to sets of structMembers std::vector includePaths; strovl symbols; // for building a symbol output file // context for macros / include files ContextStack contextStack; // target output memory unsigned char *output, *curr; size_t output_capacity; unsigned int address; unsigned int load_address; int scope_address[MAX_SCOPE_DEPTH]; int scope_depth; int conditional_depth; char conditional_nesting[MAX_CONDITIONAL_DEPTH]; bool conditional_consumed[MAX_CONDITIONAL_DEPTH]; bool set_load_address; bool symbol_export, last_label_local; bool errorEncountered; // Convert source to binary void Assemble(strref source, strref filename); // Clean up memory allocations, reset assembler state void Cleanup(); // Make sure there is room to write more code void CheckOutputCapacity(unsigned int addSize); // Macro management StatusCode AddMacro(strref macro, strref source_name, strref source_file, strref &left); StatusCode BuildMacro(Macro &m, strref arg_list); // Structs StatusCode BuildStruct(strref name, strref declaration); StatusCode EvalStruct(strref name, int &value); // Calculate a value based on an expression. StatusCode EvalExpression(strref expression, int pc, int scope_pc, int scope_end_pc, int &result); // Access labels Label* GetLabel(strref label); Label* AddLabel(unsigned int hash); StatusCode AssignLabel(strref label, strref line, bool make_constant = false); StatusCode AddressLabel(strref label); void LabelAdded(Label *pLabel, bool local=false); void IncludeSymbols(strref line); // Manage locals void MarkLabelLocal(strref label, bool scope_label = false); void FlushLocalLabels(int scope_exit = -1); // Label pools LabelPool* GetLabelPool(strref pool_name); StatusCode AddLabelPool(strref name, strref args); StatusCode AssignPoolLabel(LabelPool &pool, strref args); void FlushLabelPools(int scope_exit); // Late expression evaluation void AddLateEval(int pc, int scope_pc, unsigned char *target, strref expression, strref source_file, LateEval::Type type); void AddLateEval(strref label, int pc, int scope_pc, strref expression, LateEval::Type type); StatusCode CheckLateEval(strref added_label=strref(), int scope_end = -1); // Assembler steps StatusCode ApplyDirective(AssemblerDirective dir, strref line, strref source_file); AddressingMode GetAddressMode(strref line, bool flipXY, StatusCode &error, strref &expression); StatusCode AddOpcode(strref line, int group, int index, strref source_file); StatusCode BuildLine(OP_ID *pInstr, int numInstructions, strref line); StatusCode BuildSegment(OP_ID *pInstr, int numInstructions); // Display error in stderr void PrintError(strref line, StatusCode error); // Conditional Status bool ConditionalAsm(); // Assembly is currently enabled bool NewConditional(); // Start a new conditional block void CloseConditional(); // Close a conditional block void CheckConditionalDepth(); // Check if this conditional will nest the assembly (a conditional is already consumed) void ConsumeConditional(); // This conditional block is going to be assembled, mark it as consumed bool ConditionalConsumed(); // Has a block of this conditional already been assembled? void SetConditional(); // This conditional block is not going to be assembled so mark that it is nesting bool ConditionalAvail(); // Returns true if this conditional can be consumed void ConditionalElse(); // Conditional else that does not enable block void EnableConditional(bool enable); // This conditional block is enabled and the prior wasn't // Conditional statement evaluation (A==B? A?) StatusCode EvalStatement(strref line, bool &result); // Add include folder void AddIncludeFolder(strref path); char* LoadText(strref filename, size_t &size); char* LoadBinary(strref filename, size_t &size); // constructor Asm() : output(nullptr) { Cleanup(); localLabels.reserve(256); loadedData.reserve(16); lateEval.reserve(64); } }; // Clean up work allocations void Asm::Cleanup() { for (std::vector::iterator i = loadedData.begin(); i!=loadedData.end(); ++i) { char *data = *i; free(data); } if (symbols.get()) { free(symbols.charstr()); symbols.set_overlay(nullptr,0); } labelPools.clear(); loadedData.clear(); labels.clear(); macros.clear(); if (output) free(output); output = nullptr; curr = nullptr; output_capacity = 0; address = 0x1000; load_address = 0x1000; scope_depth = 0; conditional_depth = 0; conditional_nesting[0] = 0; conditional_consumed[0] = false; set_load_address = false; output_capacity = false; symbol_export = false; last_label_local = false; errorEncountered = false; } // Read in text data (main source, include, etc.) char* Asm::LoadText(strref filename, size_t &size) { strown<512> file(filename); std::vector::iterator i = includePaths.begin(); for(;;) { if (FILE *f = fopen(file.c_str(), "r")) { fseek(f, 0, SEEK_END); size_t _size = ftell(f); fseek(f, 0, SEEK_SET); if (char *buf = (char*)calloc(_size, 1)) { fread(buf, 1, _size, f); fclose(f); size = _size; return buf; } fclose(f); } if (i==includePaths.end()) break; file.copy(*i); if (file.get_last()!='/' && file.get_last()!='\\') file.append('/'); file.append(filename); ++i; } size = 0; return nullptr; } // Read in binary data (incbin) char* Asm::LoadBinary(strref filename, size_t &size) { strown<512> file(filename); std::vector::iterator i = includePaths.begin(); for(;;) { if (FILE *f = fopen(file.c_str(), "rb")) { fseek(f, 0, SEEK_END); size_t _size = ftell(f); fseek(f, 0, SEEK_SET); if (char *buf = (char*)malloc(_size)) { fread(buf, _size, 1, f); fclose(f); size = _size; return buf; } fclose(f); } if (i==includePaths.end()) break; file.copy(*i); if (file.get_last()!='/' && file.get_last()!='\\') file.append('/'); file.append(filename); ++i; } size = 0; return nullptr; } // Make sure there is room to assemble in void Asm::CheckOutputCapacity(unsigned int addSize) { size_t currSize = curr - output; if ((addSize + currSize) >= output_capacity) { size_t newSize = currSize * 2; if (newSize < 64*1024) newSize = 64*1024; if ((addSize+currSize) > newSize) newSize += newSize; unsigned char *new_output = (unsigned char*)malloc(newSize); curr = new_output + (curr-output); free(output); output = new_output; output_capacity = newSize; } } // // // MACROS // // // add a custom macro StatusCode Asm::AddMacro(strref macro, strref source_name, strref source_file, strref &left) { // name(optional params) { actual macro } strref name = macro.split_label(); macro.skip_whitespace(); if (macro[0]!='(' && macro[0]!='{') return ERROR_BAD_MACRO_FORMAT; unsigned int hash = name.fnv1a(); unsigned int ins = FindLabelIndex(hash, macros.getKeys(), macros.count()); Macro *pMacro = nullptr; while (ins < macros.count() && macros.getKey(ins)==hash) { if (name.same_str_case(macros.getValue(ins).name)) { pMacro = macros.getValues() + ins; break; } ++ins; } if (!pMacro) { macros.insert(ins, hash); pMacro = macros.getValues() + ins; } pMacro->name = name; int pos_bracket = macro.find('{'); if (pos_bracket < 0) { pMacro->macro = strref(); return ERROR_BAD_MACRO_FORMAT; } strref source = macro + pos_bracket; strref macro_body = source.scoped_block_skip(); pMacro->macro = strref(macro.get(), pos_bracket + macro_body.get_len() + 2); pMacro->source_name = source_name; pMacro->source_file = source_file; source.skip_whitespace(); left = source; return STATUS_OK; } // Compile in a macro StatusCode Asm::BuildMacro(Macro &m, strref arg_list) { strref macro_src = m.macro; strref params = macro_src[0]=='(' ? macro_src.scoped_block_skip() : strref(); params.trim_whitespace(); arg_list.trim_whitespace(); macro_src.skip_whitespace(); if (params) { arg_list = arg_list.scoped_block_skip(); strref pchk = params; strref arg = arg_list; int dSize = 0; while (strref param = pchk.split_token_trim(',')) { strref a = arg.split_token_trim(','); if (param.get_len() < a.get_len()) { int count = macro_src.substr_case_count(param); dSize += count * ((int)a.get_len() - (int)param.get_len()); } } int mac_size = macro_src.get_len() + dSize + 32; if (char *buffer = (char*)malloc(mac_size)) { loadedData.push_back(buffer); strovl macexp(buffer, mac_size); macexp.copy(macro_src); while (strref param = params.split_token_trim(',')) { strref a = arg_list.split_token_trim(','); macexp.replace(param, a); } contextStack.push(m.source_name, macexp.get_strref(), macexp.get_strref()); FlushLocalLabels(); return STATUS_OK; } else return ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION; } contextStack.push(m.source_name, m.source_file, macro_src); FlushLocalLabels(); return STATUS_OK; } // // // STRUCTS AND ENUMS // // StatusCode Asm::BuildStruct(strref name, strref declaration) { unsigned int hash = name.fnv1a(); unsigned int ins = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); LabelStruct *pStruct = nullptr; while (ins < labelStructs.count() && labelStructs.getKey(ins)==hash) { if (name.same_str_case(labelStructs.getValue(ins).name)) { pStruct = labelStructs.getValues() + ins; break; } ++ins; } if (pStruct) return ERROR_STRUCT_ALREADY_DEFINED; labelStructs.insert(ins, hash); pStruct = labelStructs.getValues() + ins; pStruct->name = name; pStruct->first_member = (unsigned short)structMembers.size(); unsigned int byte_hash = struct_byte.fnv1a(); unsigned int word_hash = struct_word.fnv1a(); unsigned short size = 0; unsigned short member_count = 0; while (strref line = declaration.line()) { line.trim_whitespace(); strref type = line.split_label(); line.skip_whitespace(); unsigned int type_hash = type.fnv1a(); unsigned short type_size = 0; LabelStruct *pSubStruct = nullptr; if (type_hash==byte_hash && struct_byte.same_str_case(type)) type_size = 1; else if (type_hash==word_hash && struct_word.same_str_case(type)) type_size = 2; else { unsigned int index = FindLabelIndex(type_hash, labelStructs.getKeys(), labelStructs.count()); while (index < labelStructs.count() && labelStructs.getKey(index)==type_hash) { if (type.same_str_case(labelStructs.getValue(index).name)) { pSubStruct = labelStructs.getValues() + index; break; } ++index; } if (!pSubStruct) { labelStructs.remove(ins); return ERROR_REFERENCED_STRUCT_NOT_FOUND; } type_size = pSubStruct->size; } // add the new member, don't grow vectors one at a time. if (structMembers.size() == structMembers.capacity()) structMembers.reserve(structMembers.size() + 64); struct MemberOffset member; member.offset = size; member.name = line.get_label(); member.name_hash = member.name.fnv1a(); member.sub_struct = pSubStruct ? pSubStruct->name : strref(); structMembers.push_back(member); size += type_size; member_count++; } pStruct->numMembers = member_count; pStruct->size = size; return STATUS_OK; } // Evaluate a struct offset as if it was a label StatusCode Asm::EvalStruct(strref name, int &value) { LabelStruct *pStruct = nullptr; unsigned short offset = 0; while (strref struct_seg = name.split_token('.')) { strref sub_struct = struct_seg; unsigned int seg_hash = struct_seg.fnv1a(); if (pStruct) { struct MemberOffset *member = &structMembers[pStruct->first_member]; bool found = false; for (int i=0; inumMembers; i++) { if (member->name_hash == seg_hash && member->name.same_str_case(struct_seg)) { offset += member->offset; sub_struct = member->sub_struct; found = true; break; } ++member; } if (!found) return ERROR_REFERENCED_STRUCT_NOT_FOUND; } if (sub_struct) { unsigned int hash = sub_struct.fnv1a(); unsigned int index = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); while (index < labelStructs.count() && labelStructs.getKey(index)==hash) { if (sub_struct.same_str_case(labelStructs.getValue(index).name)) { pStruct = labelStructs.getValues() + index; break; } ++index; } } else if (name) return STATUS_NOT_STRUCT; } if (pStruct == nullptr) return STATUS_NOT_STRUCT; value = offset; return STATUS_OK; } // // // EXPRESSIONS AND LATE EVALUATION // // // These are expression tokens in order of precedence (last is highest precedence) enum EvalOperator { EVOP_NONE, EVOP_VAL, // value => read from value queue EVOP_LPR, // left parenthesis EVOP_RPR, // right parenthesis EVOP_ADD, // + EVOP_SUB, // - EVOP_MUL, // * (note: if not preceded by value or right paren this is current PC) EVOP_DIV, // / EVOP_AND, // & EVOP_OR, // | EVOP_EOR, // ^ EVOP_SHL, // << EVOP_SHR, // >> EVOP_LOB, // low byte of 16 bit value EVOP_HIB, // high byte of 16 bit value }; // // EvalExpression // Uses the Shunting Yard algorithm to convert to RPN first // which makes the actual calculation trivial and avoids recursion. // https://en.wikipedia.org/wiki/Shunting-yard_algorithm // // Return: // STATUS_OK means value is completely evaluated // STATUS_NOT_READY means value could not be evaluated right now // ERROR_* means there is an error in the expression // StatusCode Asm::EvalExpression(strref expression, int pc, int scope_pc, int scope_end_pc, int &result) { int sp = 0; int numValues = 0; int numOps = 0; char op_stack[MAX_EVAL_OPER]; char ops[MAX_EVAL_OPER]; // RPN expression int values[MAX_EVAL_VALUES]; // RPN values (in order of RPN EVOP_VAL operations) values[0] = 0; // Initialize RPN if no expression EvalOperator prev_op = EVOP_NONE; while (expression) { int value = 0; expression.skip_whitespace(); // Read a token from the expression (op) EvalOperator op = EVOP_NONE; char c = expression.get_first(); switch (c) { case '$': ++expression; value = expression.ahextoui_skip(); op = EVOP_VAL; break; case '-': ++expression; op = EVOP_SUB; break; case '+': ++expression; op = EVOP_ADD; break; case '*': // asterisk means both multiply and current PC, disambiguate! if (prev_op==EVOP_VAL || prev_op==EVOP_RPR) op = EVOP_MUL; else { op = EVOP_VAL; value = pc; } ++expression; break; case '/': ++expression; op = EVOP_DIV; break; case '>': if (expression.get_len()>=2 && expression[1]=='>') { expression += 2; op = EVOP_SHR; } else { ++expression; op = EVOP_HIB; } break; case '<': if (expression.get_len()>=2 && expression[1]=='<') { expression += 2; op = EVOP_SHL; } else { ++expression; op = EVOP_LOB; } break; case '%': // % means both binary and scope closure, disambiguate! if (expression[1]=='0' || expression[1]=='1') { ++expression; value = expression.abinarytoui_skip(); op = EVOP_VAL; break; } if (scope_end_pc<0) return STATUS_NOT_READY; ++expression; op = EVOP_VAL; value = scope_end_pc; break; case '|': ++expression; op = EVOP_OR; break; case '&': ++expression; op = EVOP_AND; break; case '(': ++expression; op = EVOP_LPR; break; case ')': ++expression; op = EVOP_RPR; break; default: { if (c=='!' && !(expression+1).len_label()) { if (scope_pc<0) // ! by itself is current scope, !+label char is a local label return STATUS_NOT_READY; ++expression; op = EVOP_VAL; value = scope_pc; break; } else if (strref::is_number(c)) { value = expression.atoi_skip(); op = EVOP_VAL; } else if (c=='!' || strref::is_valid_label(c)) { strref label = expression.split_range_trim(label_char_range);//.split_label(); Label *pValue = GetLabel(label); if (!pValue) { StatusCode ret = EvalStruct(label, value); if (ret == STATUS_OK) { op = EVOP_VAL; break; } else if (ret != STATUS_NOT_STRUCT) return ret; // partial struct } if (!pValue || !pValue->evaluated) // this label could not be found (yet) return STATUS_NOT_READY; value = pValue->value; op = EVOP_VAL; } else return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION; break; } } // this is the body of the shunting yard algorithm if (op == EVOP_VAL) { values[numValues++] = value; ops[numOps++] = op; } else if (op == EVOP_LPR) { op_stack[sp++] = op; } else if (op == EVOP_RPR) { while (sp && op_stack[sp-1]!=EVOP_LPR) { sp--; ops[numOps++] = op_stack[sp]; } // check that there actually was a left parenthesis if (!sp || op_stack[sp-1]!=EVOP_LPR) return ERROR_UNBALANCED_RIGHT_PARENTHESIS; sp--; // skip open paren } else { while (sp) { EvalOperator p = (EvalOperator)op_stack[sp-1]; if (p==EVOP_LPR || op>p) break; ops[numOps++] = p; sp--; } op_stack[sp++] = op; } // check for out of bounds or unexpected input if (numValues==MAX_EVAL_VALUES) return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; else if (numOps==MAX_EVAL_OPER || sp==MAX_EVAL_OPER) return ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION; prev_op = op; } while (sp) { sp--; ops[numOps++] = op_stack[sp]; } // processing the result RPN will put the completed expression into values[0]. // values is used as both the queue and the stack of values since reads/writes won't // exceed itself. int valIdx = 0; for (int o = 0; o> sp--; values[sp-1] >>= values[sp]; break; case EVOP_LOB: // low byte values[sp-1] &= 0xff; break; case EVOP_HIB: values[sp-1] = (values[sp-1]>>8)&0xff; break; default: return ERROR_EXPRESSION_OPERATION; break; } } result = values[0]; return STATUS_OK; } // if an expression could not be evaluated, add it along with // the action to perform if it can be evaluated later. void Asm::AddLateEval(int pc, int scope_pc, unsigned char *target, strref expression, strref source_file, LateEval::Type type) { LateEval le; le.address = pc; le.scope = scope_pc; le.target = target; le.label.clear(); le.expression = expression; le.source_file = source_file; le.type = type; lateEval.push_back(le); } void Asm::AddLateEval(strref label, int pc, int scope_pc, strref expression, LateEval::Type type) { LateEval le; le.address = pc; le.scope = scope_pc; le.target = 0; le.label = label; le.expression = expression; le.source_file.clear(); le.type = type; lateEval.push_back(le); } // When a label is defined or a scope ends check if there are // any related late label evaluators that can now be evaluated. StatusCode Asm::CheckLateEval(strref added_label, int scope_end) { std::vector::iterator i = lateEval.begin(); bool evaluated_label = true; strref new_labels[MAX_LABELS_EVAL_ALL]; int num_new_labels = 0; if (added_label) new_labels[num_new_labels++] = added_label; while (evaluated_label) { evaluated_label = false; while (i != lateEval.end()) { int value = 0; // check if this expression is related to the late change (new label or end of scope) bool check = num_new_labels==MAX_LABELS_EVAL_ALL; for (int l=0; lexpression.find(new_labels[l]) >= 0; if (!check && scope_end>0) { int gt_pos = 0; while (gt_pos>=0 && !check) { gt_pos = i->expression.find_at('%', gt_pos); if (gt_pos>=0) { if (i->expression[gt_pos+1]=='%') gt_pos++; else check = true; gt_pos++; } } } if (check) { int ret = EvalExpression(i->expression, i->address, i->scope, scope_end, value); if (ret == STATUS_OK) { switch (i->type) { case LateEval::LET_BRANCH: value -= i->address; if (value<-128 || value>127) return ERROR_BRANCH_OUT_OF_RANGE; *i->target = (unsigned char)value; break; case LateEval::LET_BYTE: i->target[0] = value&0xff; break; case LateEval::LET_ABS_REF: i->target[0] = value&0xff; i->target[1] = (value>>8)&0xff; break; case LateEval::LET_LABEL: { Label *label = GetLabel(i->label); if (!label) return ERROR_LABEL_MISPLACED_INTERNAL; label->value = value; label->evaluated = true; if (num_new_labelslabel_name; evaluated_label = true; char f = i->label[0], l = i->label.get_last(); LabelAdded(label, f=='.' || f=='!' || f=='@' || l=='$'); break; } default: break; } i = lateEval.erase(i); } else ++i; } else ++i; } added_label.clear(); } return STATUS_OK; } // // // LABELS // // // Get a labelc record if it exists Label *Asm::GetLabel(strref label) { unsigned int label_hash = label.fnv1a(); unsigned int index = FindLabelIndex(label_hash, labels.getKeys(), labels.count()); while (index < labels.count() && label_hash == labels.getKey(index)) { if (label.same_str(labels.getValue(index).label_name)) return labels.getValues() + index; index++; } return nullptr; } // If exporting labels, append this label to the list void Asm::LabelAdded(Label *pLabel, bool local) { if (pLabel && pLabel->evaluated && symbol_export) { int space = 1 + str_label.get_len() + 1 + pLabel->label_name.get_len() + 1 + 9 + 2; if ((symbols.get_len()+space) > symbols.cap()) { strl_t new_size = ((symbols.get_len()+space)+8*1024); if (char *new_charstr = (char*)malloc(new_size)) { if (symbols.charstr()) { memcpy(new_charstr, symbols.charstr(), symbols.get_len()); free(symbols.charstr()); } symbols.set_overlay(new_charstr, new_size, symbols.get_len()); } } if (local && !last_label_local) symbols.append("{\n"); else if (!local && last_label_local) symbols.append("}\n"); symbols.append(local ? " ." : "."); symbols.append(pLabel->constant ? str_const : str_label); symbols.append(' '); symbols.append(pLabel->label_name); symbols.sprintf_append("=$%04x\n", pLabel->value); last_label_local = local; } } // Add a label entry Label* Asm::AddLabel(unsigned int hash) { unsigned int index = FindLabelIndex(hash, labels.getKeys(), labels.count()); labels.insert(index, hash); return labels.getValues() + index; } // mark a label as a local label void Asm::MarkLabelLocal(strref label, bool scope_reserve) { LocalLabelRecord rec; rec.label = label; rec.scope_depth = scope_depth; rec.scope_reserve = scope_reserve; localLabels.push_back(rec); } // find all local labels or up to given scope level and remove them void Asm::FlushLocalLabels(int scope_exit) { // iterate from end of local label records and early out if the label scope is lower than the current. std::vector::iterator i = localLabels.end(); while (i!=localLabels.begin()) { --i; if (i->scope_depth < scope_depth) break; strref label = i->label; if (!i->scope_reserve || i->scope_depth<=scope_exit) { unsigned int index = FindLabelIndex(label.fnv1a(), labels.getKeys(), labels.count()); while (indexscope_reserve) { if (LabelPool *pool = GetLabelPool(labels.getValue(index).expression)) { pool->Release(labels.getValue(index).value); break; } } labels.remove(index); break; } ++index; } i = localLabels.erase(i); } } } // Get a label pool by name LabelPool* Asm::GetLabelPool(strref pool_name) { unsigned int pool_hash = pool_name.fnv1a(); unsigned int ins = FindLabelIndex(pool_hash, labelPools.getKeys(), labelPools.count()); while (ins < labelPools.count() && pool_hash == labelPools.getKey(ins)) { if (pool_name.same_str(labelPools.getValue(ins).pool_name)) { return &labelPools.getValue(ins); } ins++; } return nullptr; } // When going out of scope, label pools are deleted. void Asm::FlushLabelPools(int scope_exit) { unsigned int i = 0; while (i= scope_exit) labelPools.remove(i); else ++i; } } // Add a label pool StatusCode Asm::AddLabelPool(strref name, strref args) { unsigned int pool_hash = name.fnv1a(); unsigned int ins = FindLabelIndex(pool_hash, labelPools.getKeys(), labelPools.count()); unsigned int index = ins; while (index < labelPools.count() && pool_hash == labelPools.getKey(index)) { if (name.same_str(labelPools.getValue(index).pool_name)) return ERROR_LABEL_POOL_REDECLARATION; index++; } // check that there is at least one valid address int ranges = 0; int num32 = 0; unsigned short aRng[256]; while (strref arg = args.split_token_trim(',')) { strref start = arg[0]=='(' ? arg.scoped_block_skip() : arg.split_token_trim('-'); int addr0 = 0, addr1 = 0; if (STATUS_OK != EvalExpression(start, address, scope_address[scope_depth], -1, addr0)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; if (STATUS_OK != EvalExpression(arg, address, scope_address[scope_depth], -1, addr1)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; if (addr1<=addr0 || addr0<0) return ERROR_POOL_RANGE_EXPRESSION_EVAL; aRng[ranges++] = addr0; aRng[ranges++] = addr1; num32 += (addr1-addr0+15)>>4; if (ranges >(MAX_POOL_RANGES*2) || num32 > ((MAX_POOL_BYTES+15)>>4)) return ERROR_POOL_RANGE_EXPRESSION_EVAL; } if (!ranges) return ERROR_POOL_RANGE_EXPRESSION_EVAL; LabelPool pool; pool.pool_name = name; pool.numRanges = ranges>>1; pool.scopeDepth = scope_depth; memset(pool.usedMap, 0, sizeof(unsigned int) * num32); for (int r = 0; rlabel_name = label; pLabel->expression = pool.pool_name; pLabel->evaluated = true; pLabel->value = addr; pLabel->zero_page = addr<0x100; pLabel->pc_relative = true; pLabel->constant = true; MarkLabelLocal(label, true); return error; } // Request a label from a pool StatusCode LabelPool::Reserve(int numBytes, unsigned int &ret_addr) { unsigned int *map = usedMap; unsigned short *pRanges = ranges; for (int r = 0; r=a0 && sequence= a0) { if ((m & chk)==0) { sequence++; if (sequence == numBytes) break; } else sequence = 0; --addr; m <<= 2; } } if (sequence == numBytes) { unsigned int index = (a1-addr-numBytes); unsigned int *addr_map = range_map + (index>>4); unsigned int m = numBytes << (index << 1); for (int b = 0; b=a0 && addr>4; index &= 0xf; unsigned int u = *map, m = 3 << (index << 1); unsigned int b = u & m, bytes = b >> (index << 1); if (bytes) { for (unsigned int f = 0; f>2; if (!_m) { m <<= 30; *map-- = u; } else { m = _m; } } *map = u; return STATUS_OK; } else return ERROR_INTERNAL_LABEL_POOL_ERROR; } else map += (a1-a0+15)>>4; } return STATUS_OK; } // assignment of label (