From 5d9572f0ee1749c997af84adf05754ec514be826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl-Henrik=20Sk=C3=A5rstedt?= Date: Tue, 29 Sep 2015 22:07:04 -0700 Subject: [PATCH] Adding the file to github project --- README.md | 320 ++++++++++ asm6502.cpp | 1765 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2085 insertions(+) create mode 100644 asm6502.cpp diff --git a/README.md b/README.md index 918e6e4..7c8dcc8 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,322 @@ # Asm6502 + 6502 Macro Assembler in a single c++ file using the struse single file text parsing library. Supports most syntaxes. + +Asm6502 is a struse example that implements a simple 6502 macro assembler. + +Every assembler seems to add or change its own quirks to the 6502 syntax. This implementation aims to support all of them at once as long as there is no contradiction. + +To keep up with this trend Asm6502 is adding the following features to the mix: + +1. Full expression evaluation everywhere values are used: [Expressions](#expressions) +2. C style scoping within '{' and '}': [Scopes](#scopes) +3. Reassignment of labels. This means there is no error if you declare the same label twice, but on the other hand you can do things like label = label + 2. +4. [Local labels](#labels) can be defined in a number of ways, such as leading period (.label) or leading at-sign (@label) or terminating dollar sign (label$). +5. [Directives](#directives) support both with and without leading period. +6. Labels don't need to end with colon, but they can. +7. No indentation required for instructions, meaning that labels can't be mnemonics, macros or directives. +8. As far as achievable, support the syntax of other 6502 assemblers. + +In summary, if you are familiar with any 6502 assembler syntax you should feel at home with Asm6502. If you're familiar with C programming expressions you should be familiar with '{', '}' scoping and complex expressions. + +There are no hard limits on binary size so if the address exceeds $ffff it will just wrap around to $0000. I'm not sure about the best way to handle that or if it really is a problem. + +## Prerequisite + +Asm6502.cpp requires struse.h which is a single file text parsing library that can be retrieved from https://github.com/Sakrac/struse. + +### References + +* [6502 opcodes](http://www.6502.org/tutorials/6502opcodes.html) +* [6502 opcode grid](http://www.llx.com/~nparker/a2/opcodes.html) +* [Codebase64 CPU section](http://codebase64.org/doku.php?id=base:6502_6510_coding) + +## Features + +* **Code** +* **Comments** +* **Labels** +* **Directives** +* **Macros** +* **Expressions** + +### Code + +Code is any valid mnemonic/opcode and addressing mode. At the moment only one opcode per line is assembled. + +### Comments + +Comments are currently line based and both ';' and '//' are accepted as delimiters. + +### Expressions + +Anywhere a number can be entered it can also be interpreted as a full expression, for example: + +``` +Get123: + bytes Get1-*, Get2-*, Get3-* +Get1: + lda #1 + rts +Get2: + lda #2 + rts +Get3: + lda #3 + rts +``` + +Would yield 3 bytes where the address of a label can be calculated by taking the address of the byte plus the value of the byte. + +### Labels + +Labels come in two flavors: **Addresses** (PC based) or **Values** (Evaluated from an expression). An address label is simply placed somewhere in code and a value label is follwed by '**=**' and an expression. All labels are rewritable so it is fine to do things like NumInstance = NumInstance+1. Value assignments can be prefixed with '.const' or '.label' but is not required to be prefixed by anything. + +*Local labels* exist inbetween *global labels* and gets discarded whenever a new global label is added. The syntax for local labels are one of: prefix with period, at-sign, exclamation mark or suffix with $, as in: **.local** or **!local** or **@local** or **local$**. Both value labels and address labels can be local labels. + +``` +Function: ; global label + ldx #32 +.local_label ; local label + dex + bpl .local_label + rts + +Next_Function: ; next global label, the local label above is now erased. + rts +``` + +### Directives + +Directives are assembler commands that control the code generation but that does not generate code by itself. Some assemblers prefix directives with a period (.org instead of org) so a leading period is accepted but not required for directives. + +* **ORG** (same as **PC**): Set the current compiling address. +* **LOAD** Set the load address for binary formats that support it. +* **ALIGN** Align the address to a multiple by filling with 0s +* **MACRO** Declare a macro +* **EVAL** Log an expression during assembly. +* **BYTES** Insert comma separated bytes at this address (same as **BYTE**) +* **WORDS** Insert comma separated 16 bit values at this address (same as **WORD**) +* **TEXT** Insert text at this address +* **INCLUDE** Include another source file and assemble at this address +* **INCBIN** Include a binary file at this address +* **CONST** Assign a value to a label and make it constant (error if reassigned with other value) +* **LABEL** Decorative directive to assign an expression to a label +* **INCSYM** Include a symbol file with an optional set of wanted symbols. + +**ORG** + +``` +org $2000 +(or pc $2000) +``` + +Sets the current assembler address to this address + +**LOAD** + +``` +load $2000 +``` + +For c64 .prg files this prefixes the binary file with this address. + +**ALIGN** + +``` +align $100 +``` + +Add bytes of 0 up to the next address divisible by the alignment + +**MACRO** + +See the 'Macro' section below + +**EVAL** + +Example: +``` +eval Current PC: * +``` +Might yield the following in stdout: +``` +Eval (15): Current PC : "*" = $2010 +``` + +When eval is encountered on a line print out "EVAL (\) \: \ = \" to stdout. This can be useful to see the size of things or debugging expressions. + +**BYTES** + +Adds the comma separated values on the current line to the assembled output, for example + +``` +RandomBytes: + bytes NumRandomBytes + { + bytes 13,1,7,19,32 + NumRandomBytes = RandomBytes - ! + } +``` + +byte is also recognized + +**WORDS** + +Adds comma separated 16 bit values similar to how **BYTES** work + +**TEXT** + +Copies the string in quotes on the same line. The plan is to do a petscii conversion step. Use the modifier 'petscii' or 'petscii_shifted' to convert alphabetic characters to range. + +Example: + +``` +text petscii_shifted "This might work" +``` + +**INCLUDE** + +Include another source file. This should also work with .sym files to import labels from another build. The plan is for Asm6502 to export .sym files as well. + +Example: + +``` +include "wizfx.s" +``` + + +**INCBIN** + +Include binary data from a file, this inserts the binary data at the current address. + +Example: + +``` +incbin "wizfx.gfx" +``` + +**CONST** + +Prefix a label assignment with 'const' or '.const' to cause an error if the label gets reassigned. + +``` +const zpData = $fe +``` + +**LABEL** + +Decorative directive to assign an expression to a label, label assignments are followed by '=' and an expression. + +These two assignments do the same thing (with different values): +``` +label zpDest = $fc +zpDest = $fa +``` + +**INCSYM** Include a symbol file with an optional set of wanted symbols. + +Open a symbol file and extract a set of symbols, or all symbols if no set was specified. + +``` +incsym Part1_Init, Part1_Update, Part1_Exit "part1.sym" +``` + +## Expression syntax + +Expressions contain values, such as labels or raw numbers and operators including +, -, \*, /, & (and), | (or), ^ (eor), << (shift left), >> (shift right) similar to how expressions work in C. Parenthesis are supported for managing order of operations where C style precedence needs to be overrided. In addition there are some special characters supported: + +* \*: Current address (PC). This conflicts with the use of \* as multiply so multiply will be interpreted only after a value or right parenthesis +* <: If less than is the first character in an expression this evaluates to the low byte (and $ff) +* >: If greater than is the first character in an expression this evaluates to the high byte (>>8) +* !: Start of scope (use like an address label in expression) +* %: First address after scope (use like an address label in expression) +* $: Preceeds hexadecimal value +* %: If immediately followed by '0' or '1' this is a binary value and not scope closure address + +## Macros + +A macro can be defined by the using the directive macro and includes the line within the following scope: + +Example: +``` +macro ShiftLeftA(Source) { + rol Source + rol A +} +``` + +The macro will be instantiated anytime the macro name is encountered: +``` +lda #0 +ShiftLeftA($a0) +``` + +The parameter field is optional for both the macro declaration and instantiation, if there is a parameter in the declaration but not in the instantiation the parameter will be removed from the macro. If there are no parameters in the declaration the parenthesis can be omitted and will be slightly more efficient to assemble, as in: + +``` +.macro GetBit { + asl + bne % + jsr GetByte +} +``` + +Currently macros with parameters use search and replace without checking if the parameter is a whole word, the plan is to fix this. + +## Scopes + +Scopes are lines inbetween '{' and '}' including macros. The purpose of scopes is to reduce the need for local labels and the scopes nest just like C code to support function level and loops and inner loop scoping. '!' is a label that is the first address of the scope and '%' the first address after the scope. + +This means you can write +``` +lda #0 +ldx #8 +{ + sta Label,x + dex + bpl ! +} +``` +(where ; represents line breaks) to construct a loop without adding a label. + +##Examples + +Using scoping to avoid local labels + +``` +; set zpTextPtr to a memory location with text +; return: y is the offset to the first space. +; (y==0 means either first is space or not found.) +FindFirstSpace + ldy #0 + { + lda (zpTextPtr),y + cmp #$20 + beq % ; found, exit + iny + bne ! ; not found, keep searching + } + rts +``` + +### Development Status + +Currently the assembler is in the first public revision and while features are tested individually it is fairly certain that untested combinations of features will indicate flaws and certain features are not in a complete state (such as the TEXT directive not bothering to convert ascii to petscii for example). + +**TODO** +* Bracket scoping closure ('}') should clean up local variables within that scope (better handling of local variables within macros). +* Macro parameters should replace only whole words instead of any substring +* Add 'import' directive as a catch-all include/incbin/etc. alternative +* ifdef / if / elif / else / endif conditional code generation directives +* rept / irp macro helpers (repeat, indefinite repeat) + +**FIXED** +* Context stack cleanup +* % in expressions is interpreted as binary value if immediately followed by 0 or 1 +* Add a const directive for labels that shouldn't be allowed to change (currently ignoring const) +* TEXT directive converts ascii to petscii (respect uppercase or lowercase petscii) (simplistic) + +Revisions: +* 2015-09-29 Moved Asm6502 out of Struse Samples. +* 2015-09-28 First commit \ No newline at end of file diff --git a/asm6502.cpp b/asm6502.cpp new file mode 100644 index 0000000..09d1e90 --- /dev/null +++ b/asm6502.cpp @@ -0,0 +1,1765 @@ +// +// asm6502.cpp +// +// +// Created by Carl-Henrik Skårstedt on 9/23/15. +// +// +// A simple 6502 assembler +// +// +// The MIT License (MIT) +// +// Copyright (c) 2015 Carl-Henrik Skårstedt +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software +// and associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +// https://github.com/Sakrac/struse/wiki/Asm6502-Syntax +// + +#define _CRT_SECURE_NO_WARNINGS // Windows shenanigans +#define STRUSE_IMPLEMENTATION // include implementation of struse in this file +#include "struse.h" +#include +#include +#include + +// if the number of resolved labels exceed this in one late eval then skip +// checking for relevance and just eval all unresolved expressions. +#define MAX_LABELS_EVAL_ALL 16 + +// Max number of nested scopes (within { and }) +#define MAX_SCOPE_DEPTH 32 + +// The maximum complexity of expressions to be evaluated +#define MAX_EVAL_VALUES 32 +#define MAX_EVAL_OPER 64 + +// Internal status and error type +enum StatusCode { + STATUS_OK, // everything is fine + STATUS_NOT_READY, // label could not be evaluated at this time + ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION, + ERROR_TOO_MANY_VALUES_IN_EXPRESSION, + ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION, + ERROR_UNBALANCED_RIGHT_PARENTHESIS, + ERROR_EXPRESSION_OPERATION, + ERROR_EXPRESSION_MISSING_VALUES, + ERROR_INSTRUCTION_NOT_ZP, + ERROR_INVALID_ADDRESSING_MODE_FOR_BRANCH, + ERROR_BRANCH_OUT_OF_RANGE, + ERROR_LABEL_MISPLACED_INTERNAL, + ERROR_BAD_ADDRESSING_MODE, + ERROR_UNEXPECTED_CHARACTER_IN_ADDRESSING_MODE, + ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT, + ERROR_MODIFYING_CONST_LABEL, + + ERROR_STOP_PROCESSING_ON_HIGHER, // errors greater than this will stop execution + + ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY, + ERROR_TOO_DEEP_SCOPE, + ERROR_UNBALANCED_SCOPE_CLOSURE, + ERROR_BAD_MACRO_FORMAT, + ERROR_ALIGN_MUST_EVALUATE_IMMEDIATELY, + ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION, +}; + +// The following strings are in the same order as StatusCode +const char *aStatusStrings[] = { + "ok", + "not ready", + "Unexpected character in expression", + "Too many values in expression", + "Too many operators in expression", + "Unbalanced right parenthesis in expression", + "Expression operation", + "Expression missing values", + "Instruction can not be zero page", + "Invalid addressing mode for branch instruction", + "Branch out of range", + "Internal label organization mishap", + "Bad addressing mode", + "Unexpected character in addressing mode", + "Unexpected label assignment format", + "Changing value of label that is constant", + "Errors after this point will stop execution", + "Target address must evaluate immediately for this operation", + "Scoping is too deep", + "Unbalanced scope closure", + "Unexpected macro formatting", + "Align must evaluate immediately", + "Out of memory for macro expansion", +}; + +// Operators are either instructions or directives +enum OperationType { + OT_NONE, + OT_MNEMONIC, + OT_DIRECTIVE +}; + +// Opcode encoding +typedef struct { + unsigned int op_hash; + unsigned char group; // group # + unsigned char index; // ground index + unsigned char type; // mnemonic or +} OP_ID; + +// +// 6502 instruction encoding according to this page +// http://www.llx.com/~nparker/a2/opcodes.html +// decoded instruction: +// XXY10000 for branches +// AAABBBCC for CC=00, 01, 10 +// and some custom ops +// + +enum AddressingMode { + AM_REL_ZP_X, // 0 (zp,x) + AM_ZP, // 1 zp + AM_IMMEDIATE, // 2 #$hh + AM_ABSOLUTE, // 3 $hhhh + AM_REL_ZP_Y, // 4 (zp),y + AM_ZP_X, // 5 zp,x + AM_ABSOLUTE_Y, // 6 $hhhh,y + AM_ABSOLUTE_X, // 7 $hhhh,x + AM_RELATIVE, // 8 ($xxxx) + AM_ACCUMULATOR, // 9 A + AM_NONE, // 10 + AM_INVALID, // 11 +}; + +// How instruction argument is encoded +enum CODE_ARG { + CA_NONE, // single byte instruction + CA_ONE_BYTE, // instruction carries one byte + CA_TWO_BYTES, // instruction carries two bytes + CA_BRANCH // instruction carries a relative address +}; + +// opcode groups +enum OP_GROUP { + OPG_SUBROUT, + OPG_CC01, + OPG_CC10, + OPG_STACK, + OPG_BRANCH, + OPG_FLAG, + OPG_CC00, + OPG_TRANS +}; + +// opcode exception indices +enum OP_INDICES { + OPI_JSR = 1, + OPI_LDX = 5, + OPI_STX = 4, + OPI_STA = 4, + OPI_JMP = 1, +}; + +// opcode names in groups (prefix by group size) +const char aInstr[] = { + "BRK,JSR,RTI,RTS\n" + "ORA,AND,EOR,ADC,STA,LDA,CMP,SBC\n" + "ASL,ROL,LSR,ROR,STX,LDX,DEC,INC\n" + "PHP,PLP,PHA,PLA,DEY,TAY,INY,INX\n" + "BPL,BMI,BVC,BVS,BCC,BCS,BNE,BEQ\n" + "CLC,SEC,CLI,SEI,TYA,CLV,CLD,SED\n" + "BIT,JMP,,STY,LDY,CPY,CPX\n" + "TXA,TXS,TAX,TSX,DEX,,NOP" +}; + +// group # + index => base opcode +const unsigned char aMulAddGroup[][2] = { + { 0x20,0x00 }, + { 0x20,0x01 }, + { 0x20,0x02 }, + { 0x20,0x08 }, + { 0x20,0x10 }, + { 0x20,0x18 }, + { 0x20,0x20 }, + { 0x10,0x8a } +}; + +char aCC00Modes[] = { AM_IMMEDIATE, AM_ZP, AM_INVALID, AM_ABSOLUTE, AM_INVALID, AM_ZP_X, AM_INVALID, AM_ABSOLUTE_X }; +char aCC01Modes[] = { AM_REL_ZP_X, AM_ZP, AM_IMMEDIATE, AM_ABSOLUTE, AM_REL_ZP_Y, AM_ZP_X, AM_ABSOLUTE_X, AM_ABSOLUTE_Y }; +char aCC10Modes[] = { AM_IMMEDIATE, AM_ZP, AM_NONE, AM_ABSOLUTE, AM_INVALID, AM_ZP_X, AM_INVALID, AM_ABSOLUTE_X }; + +unsigned char CC00ModeAdd[] = { 0xff, 4, 0, 12, 0xff, 20, 0xff, 28 }; +unsigned char CC00Mask[] = { 0x0a, 0x08, 0x08, 0x2a, 0xae, 0x0e, 0x0e }; +unsigned char CC10ModeAdd[] = { 0xff, 4, 0, 12, 0xff, 20, 0xff, 28 }; +unsigned char CC10Mask[] = { 0xaa, 0xaa, 0xaa, 0xaa, 0x2a, 0xae, 0xaa, 0xaa }; + +static const strref c_comment("//"); +static const strref word_char_range("!0-9a-zA-Z_@$!"); +static const strref label_char_range("!0-9a-zA-Z_@$!."); +static const strref keyword_equ("equ"); + +// pairArray is basically two vectors sharing a size without using constructors +template class pairArray { +protected: + H *keys; + V *values; + unsigned int _count; + unsigned int _capacity; +public: + pairArray() : keys(nullptr), values(nullptr), _count(0), _capacity(0) {} + void reserve(unsigned int size) { + if (size>_capacity) { + H *new_keys = (H*)malloc(sizeof(H) * size); if (!new_keys) { return; } + V *new_values = (V*)malloc(sizeof(V) * size); if (!new_values) { free(new_keys); return; } + if (keys && values) { + memcpy(new_keys, keys, sizeof(H) * _count); + memcpy(new_values, values, sizeof(V) * _count); + free(keys); free(values); + } + keys = new_keys; + values = new_values; + _capacity = size; + } + } + bool insert(unsigned int pos) { + if (pos>_count) + return false; + if (_count==_capacity) + reserve(_capacity+64); + if (pos<_count) { + memmove(keys+pos+1, keys+pos, sizeof(H) * (_count-pos)); + memmove(values+pos+1, values+pos, sizeof(V) * (_count-pos)); + } + memset(keys+pos, 0, sizeof(H)); + memset(values+pos, 0, sizeof(V)); + _count++; + return true; + } + bool insert(unsigned int pos, H key) { + if (insert(pos)) { + keys[pos] = key; + return true; + } + return false; + } + void remove(unsigned int pos) { + if (pos<_count) { + _count--; + if (pos<_count) { + memmove(keys+pos, keys+pos+1, sizeof(H) * (_count-pos)); + memmove(values+pos, values+pos+1, sizeof(V) * (_count-pos)); + } + } + } + H* getKeys() { return keys; } + H& getKey(unsigned int pos) { return keys[pos]; } + V* getValues() { return values; } + V& getValue(unsigned int pos) { return values[pos]; } + unsigned int count() const { return _count; } + unsigned int capacity() const { return _capacity; } + void clear() { + if (keys!=nullptr) + free(keys); + keys = nullptr; + if (values!=nullptr) + free(values); + values = nullptr; + _capacity = 0; + _count = 0; + } + ~pairArray() { clear(); } +}; + +// Data related to a label +typedef struct { +public: + strref label_name; // the name of this label + strref expression; // the expression of this label (optional, if not possible to evaluate yet) + int value; + bool evaluated; // a value may not yet be evaluated + bool zero_page; // addresses known to be zero page + bool pc_relative; // this is an inline label describing a point in the code + bool constant; // the value of this label can not change +} Label; + +// When an expression is evaluated late, determine how to encode the result +enum LateEvalType { + LET_LABEL, // this evaluation applies to a label and not memory + LET_ABS_REF, // calculate an absolute address and store at 0, +1 + LET_BRANCH, // calculate a branch offset and store at this address + LET_BYTE, // calculate a byte and store at this address +}; + +// If an expression can't be evaluated immediately, this is required +// to reconstruct the result when it can be. +typedef struct { + unsigned char* target; // offset into output buffer + int address; // current pc + int scope; // scope pc + strref label; // valid if this is not a target but another label + strref expression; + strref source_file; + LateEvalType type; +} LateEval; + +// A macro is a text reference to where it was defined +typedef struct { + strref name; + strref macro; + strref source_name; // source file name (error output) + strref source_file; // entire source file (req. for line #) +} Macro; + +// Source context is current file (include file, etc.) or current macro. +typedef struct { + strref source_name; // source file name (error output) + strref source_file; // entire source file (req. for line #) + strref code_segment; // the segment of the file for this context + strref read_source; // current position/length in source file +} SourceContext; + +class ContextStack { +private: + std::vector stack; + SourceContext *currContext; +public: + ContextStack() : currContext(nullptr) { stack.reserve(32); } + SourceContext& curr() { return *currContext; } + void push(strref src_name, strref src_file, strref code_seg) { + SourceContext context; + context.source_name = src_name; + context.source_file = src_file; + context.code_segment = code_seg; + context.read_source = code_seg; + stack.push_back(context); + currContext = &stack[stack.size()-1]; + } + void pop() { stack.pop_back(); currContext = stack.size() ? &stack[stack.size()-1] : nullptr; } + bool has_work() { return currContext!=nullptr; } +}; + +// Assembler directives such as org / pc / load / etc. +enum AssemblerDirective { + AD_ORG, + AD_LOAD, + AD_ALIGN, + AD_MACRO, + AD_EVAL, + AD_BYTES, + AD_WORDS, + AD_TEXT, + AD_INCLUDE, + AD_INCBIN, + AD_CONST, + AD_LABEL, + AD_INCSYM, +}; + +// The state of the assembly +class Asm { +public: + pairArray labels; + pairArray macros; + std::vector lateEval; + std::vector localLabels; // remove these labels when a global pc label is added + std::vector loadedData; // free when + strovl symbols; + + // context for macros / include files + ContextStack contextStack; + + // target output memory + unsigned char *output, *curr; + size_t output_capacity; + + unsigned int address; + unsigned int load_address; + int scope_address[MAX_SCOPE_DEPTH]; + int scope_depth; + bool set_load_address; + bool symbol_export; + + // Convert source to binary + void Assemble(strref source, strref filename); + + // Clean up memory allocations + void Cleanup(); + + // Make sure there is room to write more code + void CheckOutputCapacity(unsigned int addSize); + + // Add and build a macro + StatusCode AddMacro(strref macro, strref source_name, strref source_file); + StatusCode BuildMacro(Macro &m, strref arg_list); + + // Calculate a value based on an expression. + StatusCode EvalExpression(strref expression, int pc, int scope_pc, + int scope_end_pc, int &result); + + // Access labels + Label* GetLabel(strref label); + Label* AddLabel(unsigned int hash); + StatusCode AssignLabel(strref label, strref line, bool make_constant = false); + StatusCode AddressLabel(strref label); + void LabelAdded(Label *pLabel); + void IncSym(strref line); + + + // Late expression evaluation + void AddLateEval(int pc, int scope_pc, unsigned char *target, + strref expression, strref source_file, LateEvalType type); + void AddLateEval(strref label, int pc, int scope_pc, + strref expression, LateEvalType type); + StatusCode CheckLateEval(strref added_label=strref(), int scope_end = -1); + + // Manage locals + void MarkLabelLocal(strref label); + void FlushLocalLabels(); + + // Assembler steps + StatusCode ApplyDirective(AssemblerDirective dir, strref line, strref source_file); + AddressingMode GetAddressMode(strref line, bool flipXY, + StatusCode &error, strref &expression); + StatusCode AddOpcode(strref line, int group, int index, strref source_file); + StatusCode BuildSegment(OP_ID *pInstr, int numInstructions); + + // constructor + Asm() : address(0x1000), load_address(0x1000), scope_depth(0), set_load_address(false), + output(nullptr), curr(nullptr), output_capacity(0), symbol_export(false) + { localLabels.reserve(256); } +}; + +// Binary search over an array of unsigned integers, may contain multiple instances of same key +unsigned int FindLabelIndex(unsigned int hash, unsigned int *table, unsigned int count) +{ + unsigned int max = count; + unsigned int first = 0; + while (count!=first) { + int index = (first+count)/2; + unsigned int read = table[index]; + if (hash==read) { + while (index && table[index-1]==hash) + index--; // guarantee first identical index returned on match + return index; + } else if (hash>read) + first = index+1; + else + count = index; + } + if (counthash) + count--; + return count; +} + +// Read in text data (main source, include, etc.) +char* LoadText(strref filename, size_t &size) { + strown<512> file(filename); + if (FILE *f = fopen(file.c_str(), "r")) { + fseek(f, 0, SEEK_END); + size_t _size = ftell(f); + fseek(f, 0, SEEK_SET); + if (char *buf = (char*)calloc(_size, 1)) { + fread(buf, 1, _size, f); + fclose(f); + size = _size; + return buf; + } + fclose(f); + } + size = 0; + return nullptr; +} + +// Read in binary data (incbin) +char* LoadBinary(strref filename, size_t &size) { + strown<512> file(filename); + if (FILE *f = fopen(file.c_str(), "rb")) { + fseek(f, 0, SEEK_END); + size_t _size = ftell(f); + fseek(f, 0, SEEK_SET); + if (char *buf = (char*)malloc(_size)) { + fread(buf, _size, 1, f); + fclose(f); + size = _size; + return buf; + } + fclose(f); + } + size = 0; + return nullptr; +} + +// Clean up work allocations +void Asm::Cleanup() { + for (std::vector::iterator i = loadedData.begin(); i!=loadedData.end(); ++i) { + char *data = *i; + free(data); + } + if (symbols.get()) { + free(symbols.charstr()); + symbols.set_overlay(nullptr,0); + } + loadedData.clear(); + labels.clear(); + macros.clear(); + if (output) + free(output); + output = nullptr; + curr = nullptr; + output_capacity = 0; +} + +// Make sure there is room to assemble in +void Asm::CheckOutputCapacity(unsigned int addSize) { + size_t currSize = curr - output; + if ((addSize + currSize) >= output_capacity) { + size_t newSize = currSize * 2; + if (newSize < 64*1024) + newSize = 64*1024; + if ((addSize+currSize) > newSize) + newSize += newSize; + unsigned char *new_output = (unsigned char*)malloc(newSize); + curr = new_output + (curr-output); + free(output); + output = new_output; + output_capacity = newSize; + } +} + +// add a custom macro +StatusCode Asm::AddMacro(strref macro, strref source_name, strref source_file) +{ + // name(optional params) { actual macro } + strref name = macro.split_label(); + macro.skip_whitespace(); + if (macro[0]!='(' && macro[0]!='{') + return ERROR_BAD_MACRO_FORMAT; + unsigned int hash = name.fnv1a(); + unsigned int ins = FindLabelIndex(hash, macros.getKeys(), macros.count()); + Macro *pMacro = nullptr; + while (ins < macros.count() && macros.getKey(ins)==hash) { + if (name.same_str_case(macros.getValue(ins).name)) { + pMacro = macros.getValues() + ins; + break; + } + ++ins; + } + if (!pMacro) { + macros.insert(ins, hash); + pMacro = macros.getValues() + ins; + } + pMacro->name = name; + int pos_bracket = macro.find('{'); + if (pos_bracket < 0) { + pMacro->macro = strref(); + return ERROR_BAD_MACRO_FORMAT; + } + strref macro_body = (macro + pos_bracket).scoped_block_skip(); + pMacro->macro = strref(macro.get(), pos_bracket + macro_body.get_len() + 2); + pMacro->source_name = source_name; + pMacro->source_file = source_file; + return STATUS_OK; +} + + +// mark a label as a local label +void Asm::MarkLabelLocal(strref label) +{ + localLabels.push_back(label); +} + +// find all local labels and remove them +void Asm::FlushLocalLabels() +{ + std::vector::iterator i = localLabels.begin(); + while (i!=localLabels.end()) { + unsigned int index = FindLabelIndex(i->fnv1a(), labels.getKeys(), labels.count()); + while (indexsame_str_case(labels.getValue(index).label_name)) { + labels.remove(index); + break; + } + } + i = localLabels.erase(i); + } +} + +// if an expression could not be evaluated, add it along with +// the action to perform if it can be evaluated later. +void Asm::AddLateEval(int pc, int scope_pc, unsigned char *target, strref expression, strref source_file, LateEvalType type) +{ + LateEval le; + le.address = pc; + le.scope = scope_pc; + le.target = target; + le.label.clear(); + le.expression = expression; + le.source_file = source_file; + le.type = type; + + lateEval.push_back(le); +} + +void Asm::AddLateEval(strref label, int pc, int scope_pc, strref expression, LateEvalType type) +{ + LateEval le; + le.address = pc; + le.scope = scope_pc; + le.target = 0; + le.label = label; + le.expression = expression; + le.source_file.clear(); + le.type = type; + + lateEval.push_back(le); +} + +// When a label is defined or a scope ends check if there are +// any related late label evaluators that can now be evaluated. +StatusCode Asm::CheckLateEval(strref added_label, int scope_end) +{ + std::vector::iterator i = lateEval.begin(); + bool evaluated_label = true; + strref new_labels[MAX_LABELS_EVAL_ALL]; + int num_new_labels = 0; + if (added_label) + new_labels[num_new_labels++] = added_label; + + while (evaluated_label) { + evaluated_label = false; + while (i != lateEval.end()) { + int value = 0; + // check if this expression is related to the late change (new label or end of scope) + bool check = num_new_labels==MAX_LABELS_EVAL_ALL; + for (int l=0; lexpression.find(new_labels[l]) >= 0; + if (!check && scope_end>0) { + int gt_pos = 0; + while (gt_pos>=0 && !check) { + gt_pos = i->expression.find_at('%', gt_pos); + if (gt_pos>=0) { + if (i->expression[gt_pos+1]=='%') + gt_pos++; + else + check = true; + gt_pos++; + } + } + } + if (check) { + int ret = EvalExpression(i->expression, i->address, i->scope, scope_end, value); + if (ret == STATUS_OK) { + switch (i->type) { + case LET_BRANCH: + value -= i->address; + if (value<-128 || value>127) + return ERROR_BRANCH_OUT_OF_RANGE; + *i->target = (unsigned char)value; + break; + case LET_BYTE: + i->target[0] = value&0xff; + break; + case LET_ABS_REF: + i->target[0] = value&0xff; + i->target[1] = (value>>8)&0xff; + break; + case LET_LABEL: { + Label *label = GetLabel(i->label); + if (!label) + return ERROR_LABEL_MISPLACED_INTERNAL; + label->value = value; + label->evaluated = true; + if (num_new_labelslabel_name; + evaluated_label = true; + LabelAdded(label); + break; + } + default: + break; + } + i = lateEval.erase(i); + } else + ++i; + } else + ++i; + } + added_label.clear(); + } + return STATUS_OK; +} + +// Get a labelc record if it exists +Label *Asm::GetLabel(strref label) +{ + unsigned int label_hash = label.fnv1a(); + unsigned int index = FindLabelIndex(label_hash, labels.getKeys(), labels.count()); + while (index < labels.count() && label_hash == labels.getKey(index)) { + if (label.same_str(labels.getValue(index).label_name)) + return labels.getValues() + index; + index++; + } + return nullptr; +} + +static const strref str_label("label"); +static const strref str_const("const"); + +// If exporting labels, append this label to the list +void Asm::LabelAdded(Label *pLabel) +{ + if (pLabel && pLabel->evaluated && symbol_export) { + int space = 1 + str_label.get_len() + 1 + pLabel->label_name.get_len() + 1 + 9 + 2; + if ((symbols.get_len()+space) > symbols.cap()) { + strl_t new_size = ((symbols.get_len()+space)+8*1024); + char *new_charstr = (char*)malloc(new_size); + if (symbols.charstr()) { + memcpy(new_charstr, symbols.charstr(), symbols.get_len()); + free(symbols.charstr()); + } + symbols.set_overlay(new_charstr, new_size, symbols.get_len()); + } + symbols.append('.'); + symbols.append(pLabel->constant ? str_const : str_label); + symbols.append(' '); + symbols.append(pLabel->label_name); + symbols.sprintf_append("=$%04x\n", pLabel->value); + } +} + +// These are expression tokens in order of precedence (last is highest precedence) +enum EvalOperator { + EVOP_NONE, + EVOP_VAL, // value => read from value queue + EVOP_LPR, // left parenthesis + EVOP_RPR, // right parenthesis + EVOP_ADD, // + + EVOP_SUB, // - + EVOP_MUL, // * (note: if not preceded by value or right paren this is current PC) + EVOP_DIV, // / + EVOP_AND, // & + EVOP_OR, // | + EVOP_EOR, // ^ + EVOP_SHL, // << + EVOP_SHR // >> +}; + +// +// EvalExpression +// Uses the Shunting Yard algorithm to convert to RPN first +// which makes the actual calculation trivial and avoids recursion. +// https://en.wikipedia.org/wiki/Shunting-yard_algorithm +// +// Return: +// STATUS_OK means value is completely evaluated +// STATUS_NOT_READY means value could not be evaluated right now +// ERROR_* means there is an error in the expression +// + +StatusCode Asm::EvalExpression(strref expression, int pc, int scope_pc, int scope_end_pc, int &result) +{ + int sp = 0; + int numValues = 0; + int numOps = 0; + char op_stack[MAX_EVAL_OPER]; + + char ops[MAX_EVAL_OPER]; // RPN expression + int values[MAX_EVAL_VALUES]; // RPN values (in order of RPN EVOP_VAL operations) + + bool hiByte = false; + bool loByte = false; + values[0] = 0; + + if (expression[0]=='>') { hiByte = true; ++expression; } + else if (expression[0]=='<') { loByte = true; ++expression; } + + EvalOperator prev_op = EVOP_NONE; + while (expression) { + int value = 0; + expression.skip_whitespace(); + // Read a token from the expression (op) + EvalOperator op = EVOP_NONE; + char c = expression.get_first(); + switch (c) { + case '$': ++expression; value = expression.ahextoui_skip(); op = EVOP_VAL; break; + case '-': ++expression; op = EVOP_SUB; break; + case '+': ++expression; op = EVOP_ADD; break; + case '*': // asterisk means both multiply and current PC, disambiguate! + if (prev_op==EVOP_VAL || prev_op==EVOP_RPR) op = EVOP_MUL; + else { op = EVOP_VAL; value = pc; } + ++expression; + break; + case '/': ++expression; op = EVOP_DIV; break; + case '>': if (expression.get_len()>=2 && expression[1]=='>') { + expression += 2; op = EVOP_SHR; } break; + case '<': if (expression.get_len()>=2 && expression[1]=='<') { + expression += 2; op = EVOP_SHL; } break; + case '%': // % means both binary and scope closure, disambiguate! + if (expression[1]=='0' || expression[1]=='1') { + ++expression; value = expression.abinarytoui_skip(); op = EVOP_VAL; break; } + if (scope_end_pc<0) return STATUS_NOT_READY; + ++expression; op = EVOP_VAL; value = scope_end_pc; break; + case '|': ++expression; op = EVOP_OR; break; + case '&': ++expression; op = EVOP_AND; break; + case '(': ++expression; op = EVOP_LPR; break; + case ')': ++expression; op = EVOP_RPR; break; + default: { + if (c=='!' && !(expression+1).len_label()) { + if (scope_pc<0) // ! by itself is current scope, !+label char is a local label + return STATUS_NOT_READY; + ++expression; + op = EVOP_VAL; value = scope_pc; + break; + } else if (strref::is_number(c)) { + value = expression.atoi_skip(); op = EVOP_VAL; + } else if (c=='!' || strref::is_valid_label(c)) { + strref label = expression.split_range_trim(label_char_range);//.split_label(); + Label *pValue = GetLabel(label); + if (!pValue || !pValue->evaluated) // this label could not be found (yet) + return STATUS_NOT_READY; + value = pValue->value; op = EVOP_VAL; + } else + return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION; + break; + } + } + + // this is the body of the shunting yard algorithm + if (op == EVOP_VAL) { + values[numValues++] = value; + ops[numOps++] = op; + } else if (op == EVOP_LPR) { + op_stack[sp++] = op; + } else if (op == EVOP_RPR) { + while (sp && op_stack[sp-1]!=EVOP_LPR) { + sp--; + ops[numOps++] = op_stack[sp]; + } + // check that there actually was a left parenthesis + if (!sp || op_stack[sp-1]!=EVOP_LPR) + return ERROR_UNBALANCED_RIGHT_PARENTHESIS; + sp--; // skip open paren + } else { + while (sp) { + EvalOperator p = (EvalOperator)op_stack[sp-1]; + if (p==EVOP_LPR || op>p) + break; + ops[numOps++] = p; + sp--; + } + op_stack[sp++] = op; + } + // check for out of bounds or unexpected input + if (numValues==MAX_EVAL_VALUES) + return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; + else if (numOps==MAX_EVAL_OPER || sp==MAX_EVAL_OPER) + return ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION; + + prev_op = op; + } + while (sp) { + sp--; + ops[numOps++] = op_stack[sp]; + } + + // processing the result RPN will put the completed expression into values[0]. + // values is used as both the queue and the stack of values since reads/writes won't + // exceed itself. + int valIdx = 0; + for (int o = 0; o> + sp--; values[sp-1] >>= values[sp]; break; + default: + return ERROR_EXPRESSION_OPERATION; + break; + } + } + // check hi/lo byte filter + int val = values[0]; + if (hiByte) + val = (val>>8)&0xff; + else if (loByte) + val &= 0xff; + result = val; + + return STATUS_OK; +} + +// Add a label entry +Label* Asm::AddLabel(unsigned int hash) { + unsigned int index = FindLabelIndex(hash, labels.getKeys(), labels.count()); + labels.insert(index, hash); + return labels.getValues() + index; +} + +// unique key binary search +int LookupOpCodeIndex(unsigned int hash, OP_ID *lookup, int count) +{ + int first = 0; + while (count!=first) { + int index = (first+count)/2; + unsigned int read = lookup[index].op_hash; + if (hash==read) { + return index; + } else if (hash>read) + first = index+1; + else + count = index; + } + return -1; // index not found +} + +typedef struct { + const char *name; + AssemblerDirective directive; +} DirectiveName; + +DirectiveName aDirectiveNames[] { + { "PC", AD_ORG }, + { "ORG", AD_ORG }, + { "LOAD", AD_LOAD }, + { "ALIGN", AD_ALIGN }, + { "MACRO", AD_MACRO }, + { "EVAL", AD_EVAL }, + { "BYTE", AD_BYTES }, + { "BYTES", AD_BYTES }, + { "WORD", AD_WORDS }, + { "WORDS", AD_WORDS }, + { "TEXT", AD_TEXT }, + { "INCLUDE", AD_INCLUDE }, + { "INCBIN", AD_INCBIN }, + { "CONST", AD_CONST }, + { "LABEL", AD_LABEL }, + { "INCSYM", AD_INCSYM }, +}; + +static const int nDirectiveNames = sizeof(aDirectiveNames) / sizeof(aDirectiveNames[0]); + +int sortHashLookup(const void *A, const void *B) { + const OP_ID *_A = (const OP_ID*)A; + const OP_ID *_B = (const OP_ID*)B; + return _A->op_hash > _B->op_hash ? 1 : -1; +} + +int BuildInstructionTable(OP_ID *pInstr, strref instr_text, int maxInstructions) +{ + // create an instruction table (mnemonic hash lookup) + int numInstructions = 0; + char group_num = 0; + while (strref line = instr_text.next_line()) { + int index_num = 0; + while (line) { + strref mnemonic = line.split_token_trim(','); + if (mnemonic) { + OP_ID &op_hash = pInstr[numInstructions++]; + op_hash.op_hash = mnemonic.fnv1a_lower(); + op_hash.group = group_num; + op_hash.index = index_num; + op_hash.type = OT_MNEMONIC; + } + index_num++; + } + group_num++; + } + + // add assembler directives + for (int d=0; d force zp (needs more info) + ++line; + char c = line.get_first(); + if (c=='z' || c=='Z') { + force_zp = true; + ++line; + need_more = true; + } else + error = ERROR_UNEXPECTED_CHARACTER_IN_ADDRESSING_MODE; + break; + } + default: { // accumulator or absolute + if (line) { + if (line.get_label().same_str("A")) { + addrMode = AM_ACCUMULATOR; + } else { // absolute (zp, offs x, offs y) + addrMode = force_zp ? AM_ZP : AM_ABSOLUTE; + expression = line.split_token_trim(','); + bool relX = line && (line[0]=='x' || line[0]=='X'); + bool relY = line && (line[0]=='y' || line[0]=='Y'); + if ((flipXY && relY) || (!flipXY && relX)) + addrMode = addrMode==AM_ZP ? AM_ZP_X : AM_ABSOLUTE_X; + else if ((flipXY && relX) || (!flipXY && relY)) { + if (force_zp) { + error = ERROR_INSTRUCTION_NOT_ZP; + break; + } + addrMode = AM_ABSOLUTE_Y; + } + } + } + break; + } + } + } + return addrMode; +} + +// Action based on assembler directive +StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref source_file) +{ + StatusCode error = STATUS_OK; + switch (dir) { + case AD_ORG: { // org / pc: current address of code + int addr; + if (line[0]=='=' || keyword_equ.is_prefix_word(line)) // optional '=' or equ + line.next_word_ws(); + if ((error = EvalExpression(line, address, scope_address[scope_depth], -1, addr))) { + error = error == STATUS_NOT_READY ? ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY : error; + break; + } + address = addr; + scope_address[scope_depth] = address; + if (!set_load_address) { + load_address = address; + set_load_address = true; + } + break; + } + case AD_LOAD: { // load: address for target to load code at + int addr; + if (line[0]=='=' || keyword_equ.is_prefix_word(line)) + line.next_word_ws(); + if ((error = EvalExpression(line, address, scope_address[scope_depth], -1, addr))) { + error = error == STATUS_NOT_READY ? ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY : error; + break; + } + address = addr; + scope_address[scope_depth] = address; + if (!set_load_address) { + load_address = address; + set_load_address = true; + } + break; + } + case AD_ALIGN: // align: align address to multiple of value, fill space with 0 + if (line) { + int value; + int status = EvalExpression(line, address, scope_address[scope_depth], -1, value); + if (status == STATUS_NOT_READY) + error = ERROR_ALIGN_MUST_EVALUATE_IMMEDIATELY; + else if (status == STATUS_OK && value>0) { + int add = (address + value-1) % value; + address += add; + CheckOutputCapacity(add); + for (int a = 0; aSTATUS_NOT_READY) + break; + else if (error==STATUS_NOT_READY) + AddLateEval(address, scope_address[scope_depth], curr, exp, source_file, LET_BYTE); + CheckOutputCapacity(1); + *curr++ = value; + address++; + } + break; + case AD_WORDS: // words: add words (16 bit values) by comma separated values + while (strref exp = line.split_token_trim(',')) { + int value; + error = EvalExpression(exp, address, scope_address[scope_depth], -1, value); + if (error>STATUS_NOT_READY) + break; + else if (error==STATUS_NOT_READY) + AddLateEval(address, scope_address[scope_depth], curr, exp, source_file, LET_ABS_REF); + CheckOutputCapacity(2); + *curr++ = (char)value; + *curr++ = (char)(value>>8); + address+=2; + } + break; + case AD_TEXT: { // text: add text within quotes + // for now just copy the windows ascii. TODO: Convert to petscii. + // https://en.wikipedia.org/wiki/PETSCII + // ascii: no change + // shifted: a-z => $41.. A-Z => $61.. + // unshifted: a-z, A-Z => $41 + strref text_prefix = line.before('"').get_trimmed_ws(); + line = line.between('"', '"'); + CheckOutputCapacity(line.get_len()); + { + if (!text_prefix || text_prefix.same_str("ascii")) { + memcpy(curr, line.get(), line.get_len()); + curr += line.get_len(); + address += line.get_len(); + } else if (text_prefix.same_str("petscii")) { + while (line) { + char c = line[0]; + *curr++ = (c>='a' && c<='z') ? (c-'a'+'A') : (c>0x60 ? ' ' : line[0]); + address++; + ++line; + } + } else if (text_prefix.same_str("petscii_shifted")) { + while (line) { + char c = line[0]; + *curr++ = (c>='a' && c<='z') ? (c-'a'+0x61) : + ((c>='A' && c<='Z') ? (c-'A'+0x61) : (c>0x60 ? ' ' : line[0])); + address++; + ++line; + } + } + } + break; + } + case AD_MACRO: { // macro: create an assembler macro + strref from_here = contextStack.curr().code_segment + + strl_t(line.get()-contextStack.curr().code_segment.get()); + int block_start = from_here.find('{'); + if (block_start > 0) { + strref block = (from_here + block_start).scoped_block_skip(); + error = AddMacro(strref(line.get(), strl_t(block.get()+block.get_len()+1-line.get())), + contextStack.curr().source_name, contextStack.curr().source_file); + contextStack.curr().read_source += + strl_t(block.get()+block.get_len()+1-contextStack.curr().read_source.get()); + } + break; + } + case AD_INCLUDE: { // include: assemble another file in place + line = line.between('"', '"'); + size_t size = 0; + if (char *buffer = LoadText(line, size)) { + loadedData.push_back(buffer); + strref src(buffer, strl_t(size)); + contextStack.push(line, src, src); + } + break; + } + case AD_INCBIN: { // incbin: import binary data in place + line = line.between('"', '"'); + strown<512> filename(line); + size_t size = 0; + if (char *buffer = LoadBinary(line, size)) { + CheckOutputCapacity((unsigned int)size); + memcpy(curr, buffer, size); + free(buffer); + curr += size; + address += (unsigned int)size; + } + break; + } + case AD_LABEL: + case AD_CONST: { + line.trim_whitespace(); + strref label = line.split_range_trim(word_char_range, line[0]=='.' ? 1 : 0); + if (line[0]=='=' || keyword_equ.is_prefix_word(line)) { + line.next_word_ws(); + AssignLabel(label, line, dir==AD_CONST); + } else + error = ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT; + break; + } + case AD_INCSYM: { + IncSym(line); + break; + } + + } + return error; +} + +// Push an opcode to the output buffer +StatusCode Asm::AddOpcode(strref line, int group, int index, strref source_file) +{ + StatusCode error = STATUS_OK; + int base_opcode = aMulAddGroup[group][1] + index * aMulAddGroup[group][0]; + strref expression; + + // Get the addressing mode and the expression it refers to + AddressingMode addrMode = GetAddressMode(line, + group==OPG_CC10&&index>=OPI_STX&&index<=OPI_LDX, error, expression); + + int value = 0; + bool evalLater = false; + if (expression) { + error = EvalExpression(expression, address, scope_address[scope_depth], -1, value); + if (error == STATUS_NOT_READY) { + evalLater = true; + error = STATUS_OK; + } + if (error != STATUS_OK) + return error; + } + + // check if address is in zero page range and should use a ZP mode instead of absolute + if (!evalLater && value>=0 && value<0x100) { + switch (addrMode) { + case AM_ABSOLUTE: + addrMode = AM_ZP; + break; + case AM_ABSOLUTE_X: + addrMode = AM_ZP_X; + break; + default: + break; + } + } + + CODE_ARG codeArg = CA_NONE; + unsigned char opcode = base_opcode; + + // analyze addressing mode per mnemonic group + switch (group) { + case OPG_BRANCH: + if (addrMode != AM_ABSOLUTE) { + error = ERROR_INVALID_ADDRESSING_MODE_FOR_BRANCH; + break; + } + codeArg = CA_BRANCH; + break; + + case OPG_SUBROUT: + if (index==1) { // jsr + if (addrMode != AM_ABSOLUTE) + error = ERROR_INVALID_ADDRESSING_MODE_FOR_BRANCH; + else + codeArg = CA_TWO_BYTES; + } + break; + case OPG_STACK: + case OPG_FLAG: + case OPG_TRANS: + codeArg = CA_NONE; + break; + case OPG_CC00: + // jump relative exception + if (addrMode==AM_RELATIVE && index==OPI_JMP) { + base_opcode += 0x20; + addrMode = AM_ABSOLUTE; + } + if (addrMode>7 || (CC00Mask[index]&(1<7 || (addrMode==AM_IMMEDIATE && index==OPI_STA)) + error = ERROR_BAD_ADDRESSING_MODE; + else { + opcode = base_opcode + addrMode*4; + switch (addrMode) { + case AM_ABSOLUTE: + case AM_ABSOLUTE_Y: + case AM_ABSOLUTE_X: + codeArg = CA_TWO_BYTES; + break; + default: + codeArg = CA_ONE_BYTE; + break; + } + } + break; + case OPG_CC10: { + if (addrMode == AM_NONE || addrMode == AM_ACCUMULATOR) { + if (index>=4) + error = ERROR_BAD_ADDRESSING_MODE; + else { + opcode = base_opcode + 8; + codeArg = CA_NONE; + } + } else { + if (addrMode>7 || (CC10Mask[index]&(1<127) { + error = ERROR_BRANCH_OUT_OF_RANGE; + break; + } + *curr++ = opcode; + *curr++ = evalLater ? 0 : (unsigned char)((int)value-(int)address); + break; + case CA_ONE_BYTE: + *curr++ = opcode; + if (evalLater) + AddLateEval(address, scope_address[scope_depth], curr, expression, source_file, LET_BYTE); + *curr++ = (char)value; + address += 2; + break; + case CA_TWO_BYTES: + *curr++ = opcode; + if (evalLater) + AddLateEval(address, scope_address[scope_depth], curr, expression, source_file, LET_ABS_REF); + *curr++ = (char)value; + *curr++ = (char)(value>>8); + address += 3; + break; + case CA_NONE: + *curr++ = opcode; + address++; + break; + } + } + return error; +} + +// Compile in a macro +StatusCode Asm::BuildMacro(Macro &m, strref arg_list) +{ + strref macro_src = m.macro; + strref params = macro_src[0]=='(' ? macro_src.scoped_block_skip() : strref(); + params.trim_whitespace(); + arg_list.trim_whitespace(); + macro_src.skip_whitespace(); + if (params) { + arg_list = arg_list.scoped_block_skip(); + strref pchk = params; + strref arg = arg_list; + int dSize = 0; + while (strref param = pchk.split_token_trim(',')) { + strref a = arg.split_token_trim(','); + if (param.get_len() < a.get_len()) { + int count = macro_src.substr_case_count(param); + dSize += count * ((int)a.get_len() - (int)param.get_len()); + } + } + int mac_size = macro_src.get_len() + dSize + 32; + if (char *buffer = (char*)malloc(mac_size)) { + loadedData.push_back(buffer); + strovl macexp(buffer, mac_size); + macexp.copy(macro_src); + while (strref param = params.split_token_trim(',')) { + strref a = arg_list.split_token_trim(','); + macexp.replace(param, a); + } + contextStack.push(m.source_name, macexp.get_strref(), macexp.get_strref()); + FlushLocalLabels(); + return STATUS_OK; + } else + return ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION; + } + contextStack.push(m.source_name, m.source_file, macro_src); + FlushLocalLabels(); + return STATUS_OK; +} + +void Asm::IncSym(strref line) +{ + // include symbols listed or all if no listing + strref symlist = line.before('"').get_trimmed_ws(); + line = line.between('"', '"'); + size_t size; + if (char *buffer = LoadText(line, size)) { + strref symfile(buffer, strl_t(size)); + while (strref symdef = symfile.line()) { + strref symtype = symdef.split_token(' '); + strref label = symdef.split_token_trim('='); + // first word is either .label or .const + bool constant = symtype.same_str(".const"); + if (symlist) { + strref symchk = symlist; + while (strref symwant = symchk.split_token_trim(',')) { + if (symwant.same_str_case(label)) { + AssignLabel(label, symdef, constant); + break; + } + } + } else + AssignLabel(label, symdef, constant); + } + loadedData.push_back(buffer); + } +} + +// assignment of label (