diff --git a/docs/command_line_options.md b/docs/command_line_options.md new file mode 100644 index 0000000..64a074b --- /dev/null +++ b/docs/command_line_options.md @@ -0,0 +1,116 @@ +# Command Line Options for x65 + +These are the current options for controlling x65 from the command line. + +## lst + -lst / -lst=(file.lst) + +Generate disassembly text from result(file or stdout) + +## tsl + -tsl=(file) + +generate listing file in TASS style + +## tl + + -tl=(file) + +Generate labels in TASS style + +## opcodes + -opcodes / -opcodes=(file.s) + +Use with -cpu=... to dump all available opcodes for that CPU (file or stdout) + +## endm + -endm + +macros end with endm or endmacro instead of scoped('{' - '}') and rept/repeat emds with endr instead of being scoped. + +## cpu + -cpu=[6502/6502ill/65c02/65c02wdc/65816] + +declare CPU type, use with argument + +## acc [65816] + -acc=[8/16] + +set the accumulator mode for 65816 at start, default is 8 bits + +## xy [65816] + -xy=8/16 + +set the index register mode for 65816 at start, default is 8 bits + + +## org + -org=$2000 or -org=4096 + +force assembly for first encountered non-specific address section at given address + +## kickasm + -kickasm + +use Kick Assembler syntax (in progress) + +## merlin + -merlin + +use Merlin syntax + +## c64 + -c64 + +(default) Include 2 byte load address in binary output + +## a2b + -a2b + +Produce an Apple II Dos 3.3 Binary + +## bin + -bin + +Produce raw binary + +## a2p + -a2p + +Produce an Apple II ProDos Binary + +## a2o + -a2o + +Produce an Apple II GS OS executable (relocatable) + +## mrg + -mrg + +Force merge all sections (use with -a2o) + +## sect + -sect + +display sections loaded and built + +## sym + -sym (file.sym) + +generate symbol file + +## obj + -obj (file.x65) + +Produce an object file instead of a binary for later linking + +## vice + -vice (file.vs) + +export a vice monitor command file (including vice symbols) + +## xrefimp + -xrefimp + +import directive means xref, not include/incbin and export directive means xdef, not export section. + diff --git a/docs/directives.md b/docs/directives.md new file mode 100644 index 0000000..34a0b15 --- /dev/null +++ b/docs/directives.md @@ -0,0 +1,481 @@ +# X65 Directives + +Directives are commands that control the assembler and include controls for conditional assembly, exporting multible binary files, creating linkable object files etc. + +The directives are case insensitive and can be preceeded by a dot + + .rept 8 { dc.b 1<<rept } + +is the same as + + REPT 8 { dc.b 1<<rept } + +Some directives change behavior based on [command line options](command_line_options.md), such as -endm, -xrefimp, -kickasm and -merlin. + + +### CPU, PROCESSOR + +Assemble for this target, valid options are: + * 6502 + * 6502ill (illegal opcodes) + * 65c02 + * 6502wdc (adds 18 extra instructions: stp, wai, bbr0-7 & bbs0-7) + * 65816 + +example: + + cpu 6502ill + +### PC, ORG + +Assemble as if loaded at this address + +### LOAD + +If applicable, instruct to load at this address + +### EXPORT + +Export this section or disable export Note that with the -xdefimp command line option this means XDEF instead and the EXPORT directive is not available. + +### SECTION, SEG, SEGMENT + +Enable code that will be assigned a start address during a link step, or alternatively its own load address. BSS and ZP sections will not be included in the binary output, and sections can be separately exported using the EXPORT directive. + +### MERGE + +Merge named sections in order listed + +### LINK + +Put sections with this name at this address (must be ORG / fixed address section) + +### XDEF + +Externally declare a symbol. When using the command line option -xdefimp EXPORT means the same thing. + +### XREF + +Reference an external symbol. When using the command line option -xdefimp IMPORT means the same thing. + +### INCOBJ + +Read in an object file saved from a previous build (that was assembled using the -obj command line option). + +### ALIGN + +Add to address to make it evenly divisible by this. This only works at the start of a SECTION or in the middle of a section that is assembled to a fixed address. + +### MACRO, MAC + +Create a macro. When used with the command line option -endm the macro ends with a ENDMACRO or ENDM directive, and if not using -endm the macro is defined within braces ( { and } ). + + ; standard macro usage + MACRO ldaneg(x) { + lda #-x + } + + ; -endm macro usage + MACRO ldaneg(x) + lda #-x + ENDM + + +### FUNCTION + +A user function is a pre-defined one-line expression that can be used in a similar way as a macro, but instead of generating binary data it returns a single integer value. + + ; user defined function + FUNCTION alignto(address, alignment) (address + alignment-1) & (~alignment) + +Note that functions must evaluate at the time of reference, if any symbol is not evaluated it will fail. This differs from in-place expressions that can have references that will be evaluated at a later time in assembly or at link time. + +### EVAL, PRINT, ECHO + +Print expression to stdout during assemble. The syntax is: + + EVAL <message>: <expression> + +for example + + EVAL Current Address: * + + test_stack = 0 + eval Checking referenced function, Should be 0: .referenced(test_stack) + eval Checking defined function, Should be 1: .defined(test_stack) + +### DC, DV + +Declare constant / Declare Value. The directive can be specific by appending .b for byte size, .w for word size, .t for triple size or .l for long size. The default size is 1 byte. + + Test: + dc.b $20, *-Test + +### BYTE, BYTES + +Same as dc.b + +### WORD, WORDS + +Same as dc.w + +### LONG + +Same as dc.l + +### TEXT + +Add text to output, the order of characters can be changed with a string symbol, for instance: + + STRING FontOrder = " ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$%&*" + TEXT [FontOrder] "HELLO #1!" + +### INCLUDE + +Load and assemble another file at this address. + +### INCBIN + +Load another file and include as binary data at this address. + +### INCSYM + +Load symbols from a .sym file + + INCSYM "Main.Sym" + +Symbols can also be selected by a list on the same line: + + INCSYM InitMain, UpdateMain, ShutdownMain, "Main.Sym" + +### INCDIR + +Add a folder to search for include files. + +### IMPORT + +Generic version of INCLUDE, INCBIN with custom arguments + + ; include a raw binary file + IMPORT "data.bin" + IMPORT binary "data.bin" + + ; include a source file + IMPORT source "defines.i" + + ; include a binary C64 file omitting the load address (first 2 bytes) + IMPORT c64 "main.prg" + + ; include a text file + IMPORT text "text.txt" + IMPORT text petscii "petscii.txt" + IMPORT text petscii_shifted "petscii.txt" + IMPORT text <string symbol> "custom.txt" ; see TEXT directive + +Note that if the command line argument -xdefimp is used then IMPORT is equivalent to XREF instead. + +### CONST + +Declare a symbol as const, assgning it again will cause an error. + + CONST VICBank = $4000 + +The constness of a symbol can be tested with the IFCONST directive or the CONST() eval function. + +### LABEL + +Optional directive create a mutable label, a way to specify non-CONST. It has no actual function. + +### STRING + +Declare a string symbol. Strings are a little bit limited but can be used for ordering characters in a TEXT declaration, or it can be used as assembler source. + + ; Some custom ordered text + TEXT [FontOrder] "MAKE IT SO!" + + ; Macro for (x=start; x<end; x++) + macro for.x Start, End { + ldx #Start + if Start < End + string _ForEnd = "inx\ncpx #End\nbne _ForLoop" + elif Start > End + { + if (-1 == End) & (Start<129) + string _ForEnd = "dex\nbpl _ForLoop" + else + string _ForEnd = "dex\ncpx #End\nbne _ForLoop" + endif + } + else + string _ForEnd = "" + endif + _ForLoop + } + + macro forend { + _ForEnd ; _ForEnd defined by a variation of the for macro + undef _ForEnd + undef _ForLoop + } + + for.x(5, 1) + lda buf1,x + sta buf2,x + forend + +### UNDEF + +Remove a symbol + + like_bananas = 1 + UNDEF like_bananas + +### LABPOOL, POOL + +Create a pool of addresses to assign as labels dynamically. This acts as a linear stack allocator for temporary storage and is deallocated when the scope ends if declared as a local symbol. + +Pools can be defined as part of a larger pool. + + pool zpGlobal $40-$f8 ; all zero page usage + zpGlobal pool zpLocal 16 ; temporary storage for regular functions + zpGlobal pool zpUtility 16 ; temporary storage for utility functions + zpGlobal pool zpInterrupt 8 ; temporary storage for interrupts + zpGlobal pool zpBuffer 64 ; per module storage + +Allocate from a pool by using the pool name + + zpBuffer zpIntroTimer.w ; frame counter, 2 bytes + zpBuffer zpScrollChar.8 ; 8 bytes of rol char for scroll + + { + zpLocal .zpSrc.w ; 2 bytes source address + zpLocal .zpDst.w ; 2 bytes dest address + .. + } ; at this point .zpSrc and .zpDst are deallocated and can be reused by other code. + { + zpLocal .zpCount ; 1 byte, same address as .zpSrc used above + } + +### IF + +Begin conditional code. Whatever lines follow will be assembled only if the expression following the IF evaluates to a non-zero value, the conditional block ends with ELSE, ELSEIF or ENDIF. + + conditional_code = 1 + IF conditional_code + ... ; this will be assembled because conditional_code is not zero + ENDIF + +### IFDEF, IFNDEF + +Similar to IF but only takes one symbol and the following lines will be assembled only if the symbol was defined previously (IFDEF) or not defined previously (IFNDEF) + + defined_symbol = 0 + IFDEF defined_symbol + ... ; this will be assembled because defined_symbol exists + ENDIF + +### IFCONST + +Similar to IF but like IFDEF only takes one symbol and the following lines will be assembled if the symbol is CONST. The symbol should be defined prior to testing it. + +CONST() is also an Eval Function that can be used to form more complex expressions using IF. IFCONST is equivalent to IF CONST(<symbol>) + +### IFBLANK, IFNBLANK + +Checks if the argument exists, mostly for use in macros to test if an argument exists. + +BLANK() is also an Eval Function, IFBLANK is equivalent to IF BLANK(...) + +### ELSE + +Requires a prior IF, the following line will be assembled only if the prior conditional block was not assembled. ELSE must be terminated by an ENDIF + + IF 1 + lda #0 + ELSE + lda #2 + ENDIF + +### ELIF + +Requires a prior IF and allows another expression check before ending the conditional blocks + + IFDEF monkey + lda #monkey_value + ELIF DEFINED(zebra) + lda #zebra_value + ELSE + lda #human_value + ENDIF + +### ENDIF + +Terminated a conditional segment of blocks. + +### STRUCT + +Declare a set of labels offset from a base address. + +Example: + + STRUCT ArtSet { + word ArtTiles + word ArtColors + word ArtMasks + byte bgColor + } + +Members of the structure can be referenced by the struct name dot member name: + + lda ArtSetData + ArtSet.bgColor + + ArtSetData: + ds SIZEOF(ArtSet) + +### ENUM + +Declare a set of incremental labels. Values can either be assigned or one more than the previous. The default first value is 0. + + enum PlayerIndex { + None = -1, + One, + Two + Three, + Four, + Count ; there are this many players + } + +Enum values can be referenced by enum name dot value name: + + ldx #PlayerIndex.One + { + inx + cpx #PlayerIndex.Count + bcc ! + } + + lda #PlayerIndex.Four + +### REPT, REPEAT + +Repeats the code within { and } following the REPT directive and counter. Within the REPT code the symbol REPT has the current iteration count, starting at 0. + + const words = 10 + + .rept words * 2 { dc.b rept / 2 } + +If the command line option -endm is used then REPT uses ENDR instead of the braced scope so the equivalent to the above would be + + .rept words * 2 + dc.b rept / 2 + .endr + +### A16, A8, XY16, XY8, I16, I8 + +Specific to 65816 assembly, controls the current accumulator and index register width (8 or 16 bits). Different assemblers use different names so various alternatives are allowed. + +### DUMMY, DUMMY_END + +Creates a dummy section between DUMMY and DUMMY_END directives. + +### DS, RES + +Define "section", Reserve. Reserves a number of bytes at the current address. The first argument is the number of bytes and the second argument is optional and is the byte to fill with. The main purpose is to reserve space in a BSS or ZP section. + +### SCOPE, ENDSCOPE + +A specialized version of a scope, does the same this as a brace scope (code between { and }) but additionally marks all labels defined within as local. An unimplemented feature is that the scope can be named and then labels defined can be accessed outside the scope as + +<scope name>::<label> or <scope name>.label (TODO!) + +### PUSH, PULL + +Creates a stack for a mutable symbol so that it can temporarily be redefined and then restored. + + do_thing = 1 + + IF do_thing + .. ; do thing + ENDIF + + PUSH do_thing + do_thing = 0 + + IF do_thing + .. ; do not do thing + ENDIF + + PULL do_thing + + IF do_thing + .. ; restored symbol, let's do thing again! + ENDIF + +### ABORT, ERR + +Stops assembly with an error if encountered and prints the rest of the line to the output. + +--- + + +# Merlin Specific Directives + +### MX + +### STR + +### DA + +### DN + +### ASC + +### PUT + +### DDB + +### DB + +### DFB + +### HEX + +### DO + +### FIN + +### EJECT + +### OBJ + +### TR + +### END + +### REL + +### USR + +### DUM + +### DEND + +### LST, LSTDO + +### LUP + +### SAV, DSK + +### LNK + +### XC + +### ENT + +### EXT + +### ADR + +### ADRL + +### CYC diff --git a/docs/errors.md b/docs/errors.md new file mode 100644 index 0000000..5dcad39 --- /dev/null +++ b/docs/errors.md @@ -0,0 +1,175 @@ +# List of x65 error messages + +## Undefined code + +Could not recognize code at this point in the file + +## Unexpected character in expression + +A character in an expression has confused the assembler evaluator + +## Too many values in expression + +There is a limit to the number of values encountered in a single expression, feel free to change that number if you must. + +## Too many operators in expression + +There is a limit to the number of operators encountered in a single expression, this can also be modified as needed. + +## Unbalanced right parenthesis in expression + +A right parenthesis without a corresponding left parenthesis was encountered in the expression + +## Expression operation + +The expression evaluator has confused itself with an unrecognized operator. + +## Expression missing values + +Not enough values to complete an expression operator + +## Instruction can not be zero page + +An attempt to force a zero page command that does not support it was foiled by the assembler. + +## Invalid addressing mode for instruction + +Indeed! + +## Internal label organization mishap + +Internal error + +## Bad addressing mode + +Don't be bad + +## Unexpected character in addressing mode + +What gives? + +## Unexpected label assignment format + +Equal sign or EQU is desired for assigning const or mutable (LABEL) labels. + +## Changing value of label that is constant + +You declared that you would not change your mind but you did. I can't deal with it. + +## Out of labels in pool + +A label pool was declared at a certain size that has now been exceeded. + +## Internal label pool release confusion + +Internal error + +## Label pool range evaluation failed + +Could not determine the range for the label pool at the current line of assembly + +## Label pool was redeclared within its scope + +No recursive pool dipping please. + +## Pool label already defined + +Once is enough + +## Struct already defined + +Don't repeat yourself + +## Referenced struct not found + +But specify it at least once. + +## Declare constant type not recognized (dc.?) + +Specify word size using something that can be understood + +## rept count expression could not be evaluated + +The count needs to be evaluated at the current line of assembly + +## hex must be followed by an even number of hex numbers + +## DS directive failed to evaluate immediately + +The count needs to be evaluated at the current line of assembly + +## File is not a valid x65 object file + +A file that was referenced with an INCOBJ directive was not recognized as a valid x65 object file + +## Failed to read include file +## Using symbol PULL without first using a PUSH + +Withdrawing beyond your deposits amounts to robbery. + +## User invoked error + +An ABORT or ERR directive was assembled + +## Errors after this point will stop execution + +This is a placeholder error message + +## Branch is out of range + +Max branch distance was exceeded at this point in assembly + +## Function declaration is missing name or expression + +A FUNCTION directive requires a name, open/close parenthesis and an expression. Parameters within the parenthesis is optional. + +## Function could not resolve the expression + +The expression could not be evaluated at this point in assembly + +## Expression evaluateion recursion too deep +## Target address must evaluate immediately for this operation +## Scoping is too deep +## Unbalanced scope closure +## Unexpected macro formatting +## Align must evaluate immediately +## Out of memory for macro expansion + +Your memory is not enough + +## Problem with macro argument +## Conditional could not be resolved +## #endif encountered outside conditional block + +ENDIF directive without an IF or equivalent + +## #else or #elif outside conditional block + +ELSE or ELIF directive without an IF or equivalent + +## Struct can not be assembled as is +## Enum can not be assembled as is +## Conditional assembly (#if/#ifdef) was not terminated in file or macro + +an IF or equivalent does not have a matching ENDIF directive + +## rept is missing a scope ('{ ... }') + +You want me to repeat what exactly? + +## Link can only be used in a fixed address section +## Link can not be used in dummy sections +## Can not process this line +## Unexpected target offset for reloc or late evaluation +## CPU is not supported +## Can't append sections +## Zero page / Direct page section out of range +## Attempting to assign an address to a non-existent section +## Attempting to assign an address to a fixed address section +## Can not link a zero page section with a non-zp section +## Out of memory while building +## Can not write to file +## Assembly aborted +## Condition too deeply nested + +There is a limit to the number of IFs within IFs. \ No newline at end of file diff --git a/docs/eval_functions.md b/docs/eval_functions.md new file mode 100644 index 0000000..3c1abf0 --- /dev/null +++ b/docs/eval_functions.md @@ -0,0 +1,48 @@ +# Eval Functions + +Eval Functions are used like symbols in expressions but are always followed by parenthesis with optional arguments. + +### DEFINED, DEF + + .if .def(symbol) + .endif + +Evaluates to 1 if the symbol has been defined or 0 if it has not been encountered to this point in the current assembly. + +### REFERENCED + + .if .referenced(symbol) + .endif + +Evaluates to 1 if the symbol has been referenced in the current assembly, the symbol should be defined at this point. + +### BLANK + + .if .blank() + .endif + +Evaluates to 1 if the contents within the parenthesis is empty, primarily for use within macros. + +### CONST + + if .const(symbol) + .endif + +Evaluates to 1 if the symbol has been declared CONST, the symbol should be defined at this point. + +### SIZEOF + + STRUCT Module { + word Init + word Update + word Shutdown + } + + ds SIZEOF( Module ) + +Returns the byte size of a given struct. + +### TRIGSIN + +Not implemented, experimental math feature, currently returns 0. + diff --git a/docs/macro_samples.md b/docs/macro_samples.md new file mode 100644 index 0000000..937e8d5 --- /dev/null +++ b/docs/macro_samples.md @@ -0,0 +1,164 @@ +# x65macro.i + +This is a file under macros and is intended as an example to look at for understanding macro features, it is not super tested for correctness. This information is included in the header file itself but to ease reading copied here. The macros folder also has [more detailed documentation](../macros/README.MD). + +## Suffix definition + +The letters after the period has the following meanings: +- b: byte +- w: word (2 bytes) +- t: triple (3 bytes) +- l: long (4 bytes) +- n: number of bytes in value +- c: copy result to target +- i: immediate, for example add a value to the contents of an address +- x: use the x register for operation as a counter or an offset +- y: use the y register for operation +- r: relative; ry=(zp),y +- a: use the contents of an address for operation (16 bits) +- s: custom step size (instead of +1 or -1) for loops +- p: positive +- m: negative +- o: use label pool for counter + +## operations + +The base operations provided by these macros are: + +- set: Assign a value to the contents of an address +- move: Move the contents of an address to another address +- add: addition +- sub: subtraction +- asrm: arithmetic shift right +- aslm: arithmetic shift left +- neg: negate a number +- abs: make a number positive +- copy: copy memory from one location to another +- for: iterate between two numbers with optional step size +- mnop: insert multiple nop at this point + +set.b / .w / .t / .l Value, Target + - set the contents of an 1-4 byte location to a value + - uses accumulator + +move.b / .w / .t / .l / .n Src,Trg + - copy 1-4 (or n) bytes from Src location to Trg location + - uses accumulator + +asrm.n Target, Size + - shift a signed multi byte number right + - uses accumulator + +asrm.nx Target, Size + - shift a signed multi byte number right offset by the x register + - no registers touched + +aslm.n Target, Size + - shift a multi byte number left + - no registers touched + +aslm.nx Target, Size + - shift a multi byte number left offset by the x register + - no registers changed + +neg.cn Source, Target, Size + - negate and copy a multi byte number + - uses accumulator + +neg.n Target, Size + - negate a number in place + - uses accumulator + +abs.n Trg, Size + - make a number absolute + - uses accumulator + +neg.nx Trg, Size + - negate a number in place offset by the x register + - uses accumulator + +add.n Address1, Address2, Target, Bytes + - add contents of two memory locations into a target lcoation + - uses accumulator + +sub.n Address1, Address2, Target, Bytes + - Target = Address1 - Address2 + - uses accumulator + +add.ni Address, Value, Target, Bytes + - add a fixed value to a memory location into a target + - uses accumulator + +sub.ni Address, Value, Target, Bytes + - Target = Address - Value + - uses accumulator + +add.wi Address, Value, Target + - Subtract 16 bit Value from contents of Address and store at Target + - uses accumulator + +sub.wi Address1, Address2, Target + - add contents of two 16 bit addresses into a target 16 bit location + - uses accumulator + +mnop Count + - add Count nops + +copy.x Source, Target, Size + - copy up to 256 bytes using the x register as a counter + - uses accumulator and x register + +copy.y Source, Target, Size + - copy up to 256 bytes using the y register as a counter + - uses accumulator and y register + +copy.ry zpSrcPtr,zpTrgPtr,Size + - copy a fixed length buffer using relative zp y indexing + - size is up to a page, changing Y and A + +copy.ry128 zpSrcPtr,zpTrgPtr,Size + - copy up to 128 bytes using the y register + +copy.o Src,Trg,Size,PoolZP + - copy more than 256 bytes using zero page label pool addresses + - uses accumulator, x and y register + +copy.a Src,Trg,Size + - copy more than 256 bytes using absolute indexed in a loop + - uses accumulator, x and y register + +copy.zp Src,Trg,Size,zpTmp1,zpTmp2 + - copy more than 256 bytes using two pairs of zero page values + - uses accumulator, x and y register + +for.x Start, End + - iterate using the x register from Start to End, End is not inclusive + so to iterate from 31 to 0 use for.x 31, -1 + - uses x register + - end for loop with forend macro + +for.y Start, End + - same as for.x but with the y register + - uses y register + - end for loop with forend macro + +for.w Start, End, Counter + - for loop for 16 bit counter + - uses accumulator + - end for loop with forend macro + +for.ws Start, End, Counter, Step + - for loop for 16 bit counter with a step value + - uses accumulator + - end for loop with forend macro + +for.wsp Start, End, Counter, Step { + - for (word Counter=start; Counter<end; Counter += Step), Step>0 + - uses accumulator + +for.wsm Start, End, Counter, Step { + - for (word Counter=start; Counter<end; Counter += Step), Step<0 + - uses accumulator + +forend + - terminates for loops diff --git a/docs/readme.md b/docs/readme.md new file mode 100644 index 0000000..53cf690 --- /dev/null +++ b/docs/readme.md @@ -0,0 +1,64 @@ +# x65 Assembler + +x65 is an open source 6502 series assembler that supports object files, +linking, fixed address assembling and a relocatable executable. + +Assemblers have existed for a long time and what they do is well documented, +x65 tries to accomodate most expectations of syntax from Kick Assembler (a +Java 6502 assembler) to Merlin (an Apple II assembler). + +For debugging, dump_x65 is a tool that will show all content of x65 object +files, and x65dsasm is a disassembler intended to review the assembled +result. + +## Noteworthy features: + +* Code with sections, object files and linking or single file fixed + address, or mix it up with fixed address sections in object files. +* Assembler listing with cycle counting for code review. +* Export multiple binaries with a single link operation. +* C style scoping within '{' and '}' with local and pool labels + respecting scopes. +* Conditional assembly with if/ifdef/else etc. +* Assembler directives representing a variety of features. +* Local labels can be defined in a number of ways, such as leading + period (.label) or leading at-sign (@label) or terminating + dollar sign (label$). +* String Symbols system allows building user expressions and macros + during assembly. +* Reassignment of symbols and labels by default. +* No indentation required for instructions, meaning that labels can't + be mnemonics, macros or directives. +* Supporting the syntax of other 6502 assemblers (Merlin syntax + requires command line argument, -endm adds support for sources + using macro/endmacro and repeat/endrepeat combos rather + than scoeps). +* Apple II GS executable output. + +## Command Line Options + +Controls the assembler for the entire file + +See [Command Line Options](command_line_options.md) + +## Directives + +Controls the assembler on a line basis + +See [Directives](directives.md) + +## Eval Functions + +Functions that returns values for use in expressions. + +See [Eval Functions](eval_functions.md) + +## Macro examples + +Some info about the included example x65macro.i file. + +See [Macro Samples](macro_samples.md) + +## List of Errors + +See [Error List](errors.md) diff --git a/macros/x65macro.i b/macros/x65macro.i index 49f3d53..f4ab7b6 100644 --- a/macros/x65macro.i +++ b/macros/x65macro.i @@ -635,7 +635,7 @@ _ForLoop macro for.y Start, End { ldx #Start if Start < End - string _ForEnd = "iny\ncpx #End\nbne _ForLoop" + string _ForEnd = "iny\ncpy #End\nbne _ForLoop" elif Start > End { if (-1 == End) & (Start<129) diff --git a/sln/dump_x65/dump_x65.vcxproj b/sln/dump_x65/dump_x65.vcxproj index af24e18..996b984 100644 --- a/sln/dump_x65/dump_x65.vcxproj +++ b/sln/dump_x65/dump_x65.vcxproj @@ -23,32 +23,32 @@ <ProjectGuid>{57EFF4A4-7BF2-43F0-AD62-A79092DA67D1}</ProjectGuid> <Keyword>Win32Proj</Keyword> <RootNamespace>dump_x65</RootNamespace> - <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion> + <WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion> </PropertyGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>false</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <WholeProgramOptimization>true</WholeProgramOptimization> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>false</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <WholeProgramOptimization>true</WholeProgramOptimization> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> diff --git a/sln/x65.vcxproj b/sln/x65.vcxproj index 7cced22..5568169 100644 --- a/sln/x65.vcxproj +++ b/sln/x65.vcxproj @@ -29,26 +29,26 @@ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>false</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <WholeProgramOptimization>true</WholeProgramOptimization> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>true</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> <ConfigurationType>Application</ConfigurationType> <UseDebugLibraries>false</UseDebugLibraries> - <PlatformToolset>v140</PlatformToolset> + <PlatformToolset>v141</PlatformToolset> <WholeProgramOptimization>true</WholeProgramOptimization> <CharacterSet>NotSet</CharacterSet> </PropertyGroup> diff --git a/struse.h b/struse.h index 97be4b0..991cff7 100644 --- a/struse.h +++ b/struse.h @@ -35,7 +35,7 @@ Add this #define to *one* C++ file before #include "struse.h" to create the impl #ifndef __STRUSE_H__ #define __STRUSE_H__ -#include <inttypes.h> // uint8_t etc. +#include <inttypes.h> // uint32_t etc. #include <string.h> // memcpy, memmove #include <stdio.h> // printf, vsnprintf #include <stdarg.h> // va_list @@ -128,12 +128,15 @@ public: // convert hexadecimal string to unsigned integer size_t ahextoui() const; uint64_t ahextou64() const; - uint64_t ahextoui_skip(); + size_t ahextoui_skip(); size_t abinarytoui_skip(); // output string with newline (printf) void writeln(); + // single digit number + static char num_to_char(uint8_t num) { return num<10 ? (num+'0'):(num+'a'-10); } + // is character empty such as space, tab, linefeed etc.? static bool is_ws(uint8_t c) { return c <= ' '; } static bool is_ws(char c) { return (uint8_t)c <= ' '; } @@ -238,6 +241,9 @@ public: // check if string is a valid floating point number bool is_float_number() const { return valid() && len_float_number() == length; } + // check if matching first char and skip if match + bool grab_char( char c ) { if(length && string[0]==c) { length--; string++; return true; } return false; } + // wildcard search strref find_wildcard(const strref wild, strl_t pos = 0, bool case_sensitive = true) const; strref next_wildcard(const strref wild, strref prev, bool case_sensitive = true) const { @@ -282,6 +288,7 @@ public: (str.get_len()==length || !is_alphanumeric((uint8_t)str[length])); } bool is_prefix_case_of(const strref str) const { return prefix_len_case(str)==get_len(); } bool is_prefix_float_number() const { return len_float_number() > 0; } + bool grab_prefix( const char* str ) { strl_t p = prefix_len( str ); if( !str[ p ] ) { skip( p ); return true; } return false; } // suffix compare strl_t suffix_len(const strref str) const; @@ -364,6 +371,9 @@ public: // find any char from str or char range or char - with backslash prefix int find_range_char_within_range(const strref range_find, const strref range_within, strl_t pos = 0) const; + // find but not within parenthesis + int find_skip_parens(char token) const; + // counts int substr_count(const strref str) const; // count the occurrences of the argument in this string int substr_count_bookend(const strref str, const strref bookend) const; @@ -521,6 +531,10 @@ public: strref before_or_full(char c) const { int o = find(c); if (o>=0) return strref(string, o); return *this; } + strref before_or_full_track_parens(char c) const { + int o = find_skip_parens(c); if (o >= 0) return strref(string, o); return *this; + } + strref before_last(char c) const { int o = find_last(c); if (o>=0) return strref(string, o); return strref(); } @@ -575,20 +589,29 @@ public: strref split_token_any(const strref chars); strref split_token_trim(char c); strref split_token_any_trim(const strref chars); + strref split_token_track_parens(char c); + strref split_token_trim_track_parens(char c); strref split_range(const strref range, strl_t pos=0); strref split_range_trim(const strref range, strl_t pos=0); strref split_label(); strref split_lang(); + strref split_num(); // get a snippet, previous and full current line around a position strref get_snippet( strl_t pos ); // grab a block of text starting with (, [ or { and end with the corresponding number of ), ] or } - strref scoped_block_skip(); + strref scoped_block_skip( bool quotes = false ); // scoped_block_skip with C style comments strl_t scoped_block_comment_len(); + strl_t scoped_block_utf8_comment_len(); strref scoped_block_comment_skip(bool include = false) { strref ret = split(scoped_block_comment_len()); if (!include) { ++ret; ret.clip(1); } return ret; } + strref scoped_block_utf8_comment_skip( bool include = false ) { + strref ret = split( scoped_block_utf8_comment_len() ); + if( !include ) { ++ret; ret.clip( 1 ); } + return ret; + } // check matching characters that are terminated by any character in term or ends strl_t match_chars_str(const strref match, const strref term = strref()); @@ -610,6 +633,7 @@ public: int l = strref(string+f, length-f).find(b); if (l<0) l = 0; return strref(string+f, l); } strref get_quote_xml() const; + strref skip_quote_xml(); int find_quoted_xml(char d) const; // returns length up to the delimiter d with xml quotation rules, or -1 if delimiter not found int find_quoted(char d) const; // returns length up to the delimiter d with c/c++ quotation rules, or -1 if delimiter not found @@ -628,10 +652,12 @@ strl_t _strmod_append(char *string, strl_t length, strl_t cap, const char *str); strl_t _strmod_append(char *string, strl_t length, strl_t cap, strref str); strl_t _strmod_insert(char *string, strl_t length, strl_t cap, const strref sub, strl_t pos); strl_t _strmod_utf8_tolower(char *string, strl_t length, strl_t cap); +strl_t _strmod_write_utf8( char *string, strl_t cap, size_t code, strl_t pos ); void _strmod_substrcopy(char *string, strl_t length, strl_t cap, strl_t src, strl_t dst, strl_t chars); void _strmod_tolower(char *string, strl_t length); void _strmod_toupper(char *string, strl_t length); strl_t _strmod_format_insert(char *string, strl_t length, strl_t cap, strl_t pos, strref format, const strref *args); +strl_t _strmod_append_num(char* str, strl_t left, uint32_t num, strl_t size, uint32_t radix); strl_t _strmod_remove(char *string, strl_t length, char a); strl_t _strmod_remove(char *string, strl_t length, strl_t start, strl_t len); strl_t _strmod_exchange(char *string, strl_t length, strl_t cap, strl_t start, strl_t size, const strref insert); @@ -721,6 +747,7 @@ public: bool is_prefix_word(const strref str) const { return get_strref().is_prefix_word(str); } bool is_prefix_case_of(const strref str) const { return get_strref().is_prefix_case_of(str); } bool is_prefix_float_number() const { return get_strref().is_prefix_float_number(); } + bool grab_prefix( const char* str ) { return get_strref().grab_prefix( str ); } // whole word compare (prefix match + next char is whitespace or end of string) bool is_word(const strref str) const { return get_strref().is_word(str); } @@ -908,6 +935,11 @@ public: void format_insert(const strref format, const strref *args, strl_t pos) { set_len_int(_strmod_format_insert(charstr(), len(), cap(), pos, format, args)); } + strmod& append_num(uint32_t num, strl_t size, strl_t radix) { + add_len_int( _strmod_append_num( charstr() + len(), cap() - len(), num, size, radix ) ); + return *this; + } + // c style sprintf (work around windows _s preference) #ifdef _WIN32 int sprintf(const char *format, ...) { va_list args; va_start(args, format); @@ -1567,7 +1599,7 @@ size_t strref::ahextoui() const { const char *scan = string; strl_t left = length; - while (*scan<=0x20 && left) { + while (left && *scan<=0x20) { scan++; left--; } @@ -1624,7 +1656,7 @@ uint64_t strref::ahextou64() const return hex; } // convert a hexadecimal string to an unsigned integer -uint64_t strref::ahextoui_skip() +size_t strref::ahextoui_skip() { const char *scan = string; strl_t left = length; @@ -1638,8 +1670,8 @@ uint64_t strref::ahextoui_skip() scan += 2; left -= 2; } - if( left > 16 ) { left = 16; } - uint64_t hex = 0; + if (left > 16) { left = 16; } + size_t hex = 0; while (left) { char c = *scan; if (c>='0' && c<='9') @@ -1733,9 +1765,8 @@ int strref::count_char(char c) const strref strref::skip_bom() { const uint8_t* buf = get_u(); - if( length >= 3 && buf && buf[ 0 ] == 0xef && buf[ 1 ] == 0xbb && buf[ 2 ] == 0xbf ) - { - return strref( string + 3, length - 3 ); + if (length >= 3 && buf && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) { + return strref(string + 3, length - 3); } return *this; } @@ -1863,6 +1894,20 @@ int strref::find_last(char c, char d) const return -1; } +int strref::find_skip_parens(char token) const +{ + int parens = 0; + const char* scan = string; + strl_t left = length; + while (left && (parens || *scan != token)) { + if (*scan == '(') { ++parens; } else if (*scan == ')' && parens) { --parens; } + --left; + ++scan; + } + if (left) { return length - left; } + return -1; +} + // compare a string with a substring case sensitive static bool int_compare_substr_case(const char *scan, strl_t length, const char *check, strl_t chk_len) { @@ -4046,6 +4091,28 @@ strref strref::get_quote_xml() const return strref(); } +// if this string begins as an xml quote return that. +strref strref::skip_quote_xml() +{ + char quote_char = get_first(); + if( quote_char != '"' && quote_char != '\'' ) + return strref(); + + const char *scan = string + 1; + strl_t left = length - 1; + while( left ) { + char c = *scan++; + if( c == quote_char ) { + strref ret( string + 1, length - left - 1 ); + string = scan+1; + length = left-2; + return ret; + } + --left; + } + return strref(); +} + // find the character d outside of a quote int strref::find_quoted(char d) const { @@ -4086,6 +4153,15 @@ strref strref::split_token( char c ) { return r; } +strref strref::split_token_track_parens(char c) +{ + int t = find_skip_parens(c); + if (t < 0) t = (int)length; + strref r = strref(string, strl_t(t)); + *this += t + 1; + return r; +} + strref strref::split_token_any( const strref chars ) { strref r; int t = find_any_char_of( chars ); @@ -4096,6 +4172,13 @@ strref strref::split_token_any( const strref chars ) return r; } +strref strref::split_token_trim_track_parens(char c) +{ + strref r = split_token_track_parens(c); + skip_whitespace(); + r.trim_whitespace(); + return r; +} strref strref::split_token_trim( char c ) { strref r = split_token( c ); skip_whitespace(); @@ -4140,6 +4223,19 @@ strref strref::split_label() { return r; } +strref strref::split_num() { + skip_whitespace(); + strref r( string, 0 ); + while( length && *string >= '0' && *string <= '9' ) { + r.length++; + string++; + length--; + } + skip_whitespace(); + return r; +} + + // split string based on common programming tokens (words, quotes, scopes, numbers) strref strref::split_lang() { @@ -4177,18 +4273,22 @@ strref strref::get_snippet( strl_t pos ) } // grab a block of text starting with (, [ or { and end with the corresponding number of ), ] or } -strref strref::scoped_block_skip() +strref strref::scoped_block_skip(bool quotes) { char scope = get_first(); if (length && (scope == '(' || scope == '[' || scope == '{')) { char close = scope=='(' ? ')' : (scope=='[' ? ']' : '}'); const char *scan = string; + bool inQuote = false; strl_t depth = 0; strl_t left = length; do { char c = *scan++; left--; - if (c==scope) + if( inQuote ) { + if( c == '"' ) { inQuote = false; } + } else if( quotes && c=='"' ) { inQuote = true; } + else if( c == scope ) depth++; else if (c==close) depth--; @@ -4236,6 +4336,31 @@ strl_t strref::scoped_block_comment_len() return 0; } +strl_t strref::scoped_block_utf8_comment_len() +{ + strref str = *this; + size_t scope = str.pop_utf8(); + if( length && ( scope == '(' || scope == '[' || scope == '{' || scope == '<' ) ) + { + char close = scope == '<' ? '>' : ( scope == '(' ? ')' : ( scope == '[' ? ']' : '}' ) ); + strl_t depth = 1; + do { + size_t c = str.pop_utf8(); + if( c == '/' && str.get_len() && ( str[0] == '/' || str[1] == '*' ) ) { + c = str.pop_utf8(); + strl_t skip = c == '/' ? str.len_next_line() : str.find_or_full( "*/" ); + str += skip; + } + else if( c == scope ) + depth++; + else if( c == close ) + depth--; + } while( depth && str.valid() ); + if( !depth ) + return strl_t( str.string - string ); + } + return 0; +} // return the current line of text and move this string ahead to the next. @@ -4513,6 +4638,25 @@ strl_t _strmod_format_insert(char *string, strl_t length, strl_t cap, strl_t pos return length; } +strl_t _strmod_append_num( char* str, strl_t left, uint32_t num, strl_t size, uint32_t radix ) +{ + strl_t div = 1; + if( !size ) { + uint32_t mul = 1; + do { ++size; mul *= radix; } while( mul <= num ); + } + for( strl_t n = 1; n<size; ++n ) { div *= radix; } + strl_t added = 0; + for( strl_t a = 0; a<size && left; ++a ) { + char v = (num / div) % radix + '0'; + div /= radix; + *str++ = v <= '9' ? v : (v + 'a' - '0' - 10); + --left; + ++added; + } + return added; +} + // remove all instances of a character from a string strl_t _strmod_remove(char *string, strl_t length, char a) { diff --git a/test/Test65816_OpCodes.s b/test/Test65816_OpCodes.s index f924b11..2cd242e 100644 --- a/test/Test65816_OpCodes.s +++ b/test/Test65816_OpCodes.s @@ -278,7 +278,7 @@ TestOpcodes: mvp $21,$20 mvn $21,$20 pea $2120 - pei ($21) + pei $21 per $2120 rep $21 rep #$21 diff --git a/test/ca65directive.s b/test/ca65directive.s new file mode 100644 index 0000000..d749dc8 --- /dev/null +++ b/test/ca65directive.s @@ -0,0 +1,107 @@ +; TEST CODE FROM EXOMIZER +cpu 65816 +.org $52000 + +.REPT 7 + dc.b rept +.ENDR + +eval Checking defined function, Should be 0: .defined(test_stack) +test_stack = 0 +eval Checking referenced function, Should be 0: .referenced(test_stack) +eval Checking defined function, Should be 1: .defined(test_stack) +PUSH test_stack +eval Checking referenced function, Should be 1: .referenced(test_stack) +test_stack = 10 +eval Push Before Pull: test_stack +PULL test_stack +eval Pull original: test_stack + +eval Checking symbol is not const (0): .const(test_stack) +const ConstAddress = $1000 +eval Checking symbol is const (1): .const(ConstAddress) + +eval This should be blank (1): .blank() +eval This should be blank (1): .blank({}) +eval This should be not be blank (0): .blank({monkeys}) + +.ifconst test_stack +eval Checking ifconst with non-const symbol, should not print: +.endif + +.ifconst ConstAddress +eval Checking ifconst with const symbol, this should print: +.endif + +struct MyStruct { + word addr + byte value +} + +eval Size of MyStruct (3): .sizeof(MyStruct) + +DISP_BRIGHTNESS_MASK = $f0 +DISP_BLANKING_SHIFT = 7 + +.function inidisp(blanking, brightness) (<(((~blanking & 1) << DISP_BLANKING_SHIFT) | (brightness & ~DISP_BRIGHTNESS_MASK))) + +eval Function test, should be (<(((~7&1)<<7)|(12&~$f0) = <((0<<7)|(c&f)) = $c: inidisp(7, 12) + +zp_len_lo = $a7 +zp_len_hi = $a8 + +zp_src_lo = $ae +zp_src_hi = zp_src_lo + 1 + +zp_bits_hi = $fc + +zp_bitbuf = $fd +zp_dest_lo = zp_bitbuf + 1 ; dest addr lo +zp_dest_hi = zp_bitbuf + 2 ; dest addr hi + +.MACRO mac_refill_bits + pha + jsr get_crunched_byte + rol + sta zp_bitbuf + pla +.ENDMACRO +.MACRO mac_get_bits +.SCOPE + adc #$80 ; needs c=0, affects v + asl + bpl gb_skip +gb_next: + asl zp_bitbuf + bne gb_ok + mac_refill_bits +gb_ok: + rol + bmi gb_next +gb_skip: + bvc skip +gb_get_hi: + sec + sta zp_bits_hi + jsr get_crunched_byte +skip: +.ENDSCOPE +.ENDMACRO + + +.ifdef UNDEFINED_SYMBOL + dc.b -1 ; should not be assembled + error 1 +.else + dc.b 1 ; should be assembled +.endif + +const CONSTANT = 32 + +.eval CONSTANT + + mac_get_bits + mac_get_bits + +get_crunched_byte: + rts \ No newline at end of file diff --git a/test/unittest.bat b/test/unittest.bat index 5477e85..eedf75f 100644 --- a/test/unittest.bat +++ b/test/unittest.bat @@ -85,6 +85,16 @@ echo Merlin LUP test failed goto exit :merlup_pass +echo CA65 directives Test >>results\unittest.txt +..\bin\x64\x65.exe ca65directive.s -lst -endm >>results\unittest.txt +if %errorlevel% GTR 0 goto ca65_fail +rem check data here when relevant +if %errorlevel% EQU 0 goto ca65_pass +:ca65_fail +echo CA65 directives failed +goto exit +:ca65_pass + rem REVIEW MACROS! rem echo x65macro.i Test >>results\unittest.txt rem echo --------------- >>results\unittest.txt diff --git a/x65.cpp b/x65.cpp index e02a9f8..1d73a49 100644 --- a/x65.cpp +++ b/x65.cpp @@ -39,6 +39,31 @@ #include <stdio.h> #include <stdlib.h> #include <inttypes.h> +#include <assert.h> + +// Command line arguments +static const strref cmdarg_listing("lst"); // -lst / -lst=(file.lst) : generate disassembly text from result(file or stdout) +static const strref cmdarg_tass_listing("tsl"); // -tsl=(file) : generate listing file in TASS style +static const strref cmdarg_tass_labels("tl"); // -tl=(file) : generate labels in TASS style +static const strref cmdarg_allinstr("opcodes"); // -opcodes / -opcodes=(file.s) : dump all available opcodes(file or stdout) +static const strref cmdarg_endmacro("endm"); // -endm : macros end with endm or endmacro instead of scoped('{' - '}') +static const strref cmdarg_cpu("cpu"); // declare CPU type, use with argument: -cpu=6502/65c02/65c02wdc/65816 +static const strref cmdarg_acc("acc"); // [65816] -acc=8/16: set the accumulator mode for 65816 at start, default is 8 bits +static const strref cmdarg_xy("xy"); // [65816] -xy=8/16: set the index register mode for 65816 at start, default is 8 bits +static const strref cmdarg_org("org"); // -org = $2000 or - org = 4096: force fixed address code at address +static const strref cmdarg_kickasm("kickasm"); // -kickasm: use Kick Assembler syntax +static const strref cmdarg_merlin("merlin"); // -merlin: use Merlin syntax +static const strref cmdarg_c64("c64"); // -c64 : Include load address(default) +static const strref cmdarg_a2b("a2b"); // -a2b : Apple II Dos 3.3 Binary +static const strref cmdarg_bin("bin"); // -bin : Produce raw binary\n" +static const strref cmdarg_a2p("a2p"); // -a2p : Apple II ProDos Binary +static const strref cmdarg_a2o("a2o"); // -a2o : Apple II GS OS executable (relocatable) +static const strref cmdarg_mrg("mrg"); // -mrg : Force merge all sections (use with -a2o) +static const strref cmdarg_sect("sect"); // -sect: display sections loaded and built +static const strref cmdarg_sym("sym"); // -sym (file.sym) : generate symbol file +static const strref cmdarg_obj("obj"); // -obj (file.x65) : generate object file for later linking +static const strref cmdarg_vice("vice"); // -vice (file.vs) : export a vice symbol file +static const strref cmdarg_xrefimp("xrefimp"); // -xrefimp : import directive means xref, not include/incbin // if the number of resolved labels exceed this in one late eval then skip // checking for relevance and just eval all unresolved expressions. @@ -114,11 +139,15 @@ enum StatusCode { ERROR_DS_MUST_EVALUATE_IMMEDIATELY, ERROR_NOT_AN_X65_OBJECT_FILE, ERROR_COULD_NOT_INCLUDE_FILE, + ERROR_PULL_WITHOUT_PUSH, ERROR_USER, ERROR_STOP_PROCESSING_ON_HIGHER, // errors greater than this will stop execution ERROR_BRANCH_OUT_OF_RANGE, + ERROR_INCOMPLETE_FUNCTION, + ERROR_FUNCTION_DID_NOT_RESOLVE, + ERROR_EXPRESSION_RECURSION, ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY, ERROR_TOO_DEEP_SCOPE, ERROR_UNBALANCED_SCOPE_CLOSURE, @@ -153,72 +182,76 @@ enum StatusCode { // The following strings are in the same order as StatusCode const char *aStatusStrings[STATUSCODE_COUNT] = { - "ok", - "relative section", - "not ready", - "XREF dependent result", - "name is not a struct", - "Exporting binary without code or data section", - "Undefined code", - "Unexpected character in expression", - "Too many values in expression", - "Too many operators in expression", - "Unbalanced right parenthesis in expression", - "Expression operation", - "Expression missing values", - "Instruction can not be zero page", - "Invalid addressing mode for instruction", - "Branch out of range", - "Internal label organization mishap", - "Bad addressing mode", - "Unexpected character in addressing mode", - "Unexpected label assignment format", - "Changing value of label that is constant", - "Out of labels in pool", - "Internal label pool release confusion", - "Label pool range evaluation failed", - "Label pool was redeclared within its scope", - "Pool label already defined", - "Struct already defined", - "Referenced struct not found", - "Declare constant type not recognized (dc.?)", - "rept count expression could not be evaluated", - "hex must be followed by an even number of hex numbers", - "DS directive failed to evaluate immediately", - "File is not a valid x65 object file", - "Failed to read include file", - "User invoked error", + "ok", // STATUS_OK, // everything is fine + "relative section", // STATUS_RELATIVE_SECTION, // value is relative to a single section + "not ready", // STATUS_NOT_READY, // label could not be evaluated at this time + "XREF dependent result", // STATUS_XREF_DEPENDENT, // evaluated but relied on an XREF label to do so + "name is not a struct", // STATUS_NOT_STRUCT, // return is not a struct. + "Exporting binary without code or data section", // STATUS_EXPORT_NO_CODE_OR_DATA_SECTION, + "Undefined code", // ERROR_UNDEFINED_CODE = FIRST_ERROR, + "Unexpected character in expression", // ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION, + "Too many values in expression", // ERROR_TOO_MANY_VALUES_IN_EXPRESSION, + "Too many operators in expression", // ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION, + "Unbalanced right parenthesis in expression", // ERROR_UNBALANCED_RIGHT_PARENTHESIS, + "Expression operation", // ERROR_EXPRESSION_OPERATION, + "Expression missing values", // ERROR_EXPRESSION_MISSING_VALUES, + "Instruction can not be zero page", // ERROR_INSTRUCTION_NOT_ZP, + "Invalid addressing mode for instruction", // ERROR_INVALID_ADDRESSING_MODE, + "Internal label organization mishap", // ERROR_LABEL_MISPLACED_INTERNAL, + "Bad addressing mode", // ERROR_BAD_ADDRESSING_MODE, + "Unexpected character in addressing mode", // ERROR_UNEXPECTED_CHARACTER_IN_ADDRESSING_MODE, + "Unexpected label assignment format", // ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT, + "Changing value of label that is constant", // ERROR_MODIFYING_CONST_LABEL, + "Out of labels in pool", // ERROR_OUT_OF_LABELS_IN_POOL, + "Internal label pool release confusion", // ERROR_INTERNAL_LABEL_POOL_ERROR, + "Label pool range evaluation failed", // ERROR_POOL_RANGE_EXPRESSION_EVAL, + "Label pool was redeclared within its scope", // ERROR_LABEL_POOL_REDECLARATION, + "Pool label already defined", // ERROR_POOL_LABEL_ALREADY_DEFINED, + "Struct already defined", // ERROR_STRUCT_ALREADY_DEFINED, + "Referenced struct not found", // ERROR_REFERENCED_STRUCT_NOT_FOUND, + "Declare constant type not recognized (dc.?)", // ERROR_BAD_TYPE_FOR_DECLARE_CONSTANT, + "rept count expression could not be evaluated", // ERROR_REPT_COUNT_EXPRESSION, + "hex must be followed by an even number of hex numbers", // ERROR_HEX_WITH_ODD_NIBBLE_COUNT, + "DS directive failed to evaluate immediately", // ERROR_DS_MUST_EVALUATE_IMMEDIATELY, + "File is not a valid x65 object file", // ERROR_NOT_AN_X65_OBJECT_FILE, + "Failed to read include file", // ERROR_COULD_NOT_INCLUDE_FILE, + "Using symbol PULL without first using a PUSH", // ERROR_PULL_WITHOUT_PUSH + "User invoked error", // ERROR_USER, - "Errors after this point will stop execution", + "Errors after this point will stop execution", // ERROR_STOP_PROCESSING_ON_HIGHER, // errors greater than this will stop execution - "Target address must evaluate immediately for this operation", - "Scoping is too deep", - "Unbalanced scope closure", - "Unexpected macro formatting", - "Align must evaluate immediately", - "Out of memory for macro expansion", - "Problem with macro argument", - "Conditional could not be resolved", - "#endif encountered outside conditional block", - "#else or #elif outside conditional block", - "Struct can not be assembled as is", - "Enum can not be assembled as is", - "Conditional assembly (#if/#ifdef) was not terminated in file or macro", - "rept is missing a scope ('{ ... }')", - "Link can only be used in a fixed address section", - "Link can not be used in dummy sections", - "Can not process this line", - "Unexpected target offset for reloc or late evaluation", - "CPU is not supported", - "Can't append sections", - "Zero page / Direct page section out of range", - "Attempting to assign an address to a non-existent section", - "Attempting to assign an address to a fixed address section", - "Can not link a zero page section with a non-zp section", - "Out of memory while building", - "Can not write to file", - "Assembly aborted", - "Condition too deeply nested", + "Branch is out of range", // ERROR_BRANCH_OUT_OF_RANGE, + "Function declaration is missing name or expression", // ERROR_INCOMPLETE_FUNCTION, + "Function could not resolve the expression", // ERROR_FUNCTION_DID_NOT_RESOLVE + "Expression evaluateion recursion too deep", // ERROR_EXPRESSION_RECURSION + "Target address must evaluate immediately for this operation", // ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY, + "Scoping is too deep", // ERROR_TOO_DEEP_SCOPE, + "Unbalanced scope closure", // ERROR_UNBALANCED_SCOPE_CLOSURE, + "Unexpected macro formatting", // ERROR_BAD_MACRO_FORMAT, + "Align must evaluate immediately", // ERROR_ALIGN_MUST_EVALUATE_IMMEDIATELY, + "Out of memory for macro expansion", // ERROR_OUT_OF_MEMORY_FOR_MACRO_EXPANSION, + "Problem with macro argument", // ERROR_MACRO_ARGUMENT, + "Conditional could not be resolved", // ERROR_CONDITION_COULD_NOT_BE_RESOLVED, + "#endif encountered outside conditional block", // ERROR_ENDIF_WITHOUT_CONDITION, + "#else or #elif outside conditional block", // ERROR_ELSE_WITHOUT_IF, + "Struct can not be assembled as is", // ERROR_STRUCT_CANT_BE_ASSEMBLED, + "Enum can not be assembled as is", // ERROR_ENUM_CANT_BE_ASSEMBLED, + "Conditional assembly (#if/#ifdef) was not terminated in file or macro", // ERROR_UNTERMINATED_CONDITION, + "rept is missing a scope ('{ ... }')", // ERROR_REPT_MISSING_SCOPE, + "Link can only be used in a fixed address section", // ERROR_LINKER_MUST_BE_IN_FIXED_ADDRESS_SECTION, + "Link can not be used in dummy sections", // ERROR_LINKER_CANT_LINK_TO_DUMMY_SECTION, + "Can not process this line", // ERROR_UNABLE_TO_PROCESS, + "Unexpected target offset for reloc or late evaluation", // ERROR_SECTION_TARGET_OFFSET_OUT_OF_RANGE, + "CPU is not supported", // ERROR_CPU_NOT_SUPPORTED, + "Can't append sections", // ERROR_CANT_APPEND_SECTION_TO_TARGET, + "Zero page / Direct page section out of range", // ERROR_ZEROPAGE_SECTION_OUT_OF_RANGE, + "Attempting to assign an address to a non-existent section", // ERROR_NOT_A_SECTION, + "Attempting to assign an address to a fixed address section", // ERROR_CANT_REASSIGN_FIXED_SECTION, + "Can not link a zero page section with a non-zp section", // ERROR_CANT_LINK_ZP_AND_NON_ZP, + "Out of memory while building", // ERROR_OUT_OF_MEMORY, + "Can not write to file", // ERROR_CANT_WRITE_TO_FILE, + "Assembly aborted", // ERROR_ABORTED, + "Condition too deeply nested", // ERROR_CONDITION_TOO_NESTED, }; // Assembler directives @@ -246,11 +279,16 @@ enum AssemblerDirective { AD_CONST, // CONST: Prevent a label from mutating during assemble AD_LABEL, // LABEL: Create a mutable label (optional) AD_STRING, // STRING: Declare a string symbol + AD_FUNCTION, // FUNCTION: Declare a user defined function AD_UNDEF, // UNDEF: remove a string or a label AD_INCSYM, // INCSYM: Reference labels from another assemble AD_LABPOOL, // POOL: Create a pool of addresses to assign as labels dynamically AD_IF, // #IF: Conditional assembly follows based on expression AD_IFDEF, // #IFDEF: Conditional assembly follows based on label defined or not + AD_IFNDEF, // #IFNDEF: Conditional assembly inverted from IFDEF + AD_IFCONST, // #IFCONST: Conditional assembly follows based on label being const + AD_IFBLANK, // #IFBLANK: Conditional assembly follows based on rest of line empty + AD_IFNBLANK, // #IFNBLANK: Conditional assembly follows based on rest of line not empty AD_ELSE, // #ELSE: Otherwise assembly AD_ELIF, // #ELIF: Otherwise conditional assembly follows AD_ENDIF, // #ENDIF: End a block of #IF/#IFDEF @@ -268,6 +306,10 @@ enum AssemblerDirective { AD_LST, // LST: Controls symbol listing AD_DUMMY, // DUM: Start a dummy section (increment address but don't write anything???) AD_DUMMY_END, // DEND: End a dummy section + AD_SCOPE, // SCOPE: Begin ca65 style scope + AD_ENDSCOPE, // ENDSCOPR: End ca65 style scope + AD_PUSH, // PUSH: Push the value of a variable symbol on a stack + AD_PULL, // PULL: Pull the value of a variable symbol from its stack, must be pushed first AD_DS, // DS: Define section, zero out # bytes or rewind the address if negative AD_USR, // USR: MERLIN user defined pseudo op, runs some code at a hard coded address on apple II, on PC does nothing. AD_SAV, // SAV: MERLIN version of export but contains full filename, not an appendable name @@ -282,6 +324,16 @@ enum AssemblerDirective { AD_ERROR, }; +// evaluation functions +enum EvalFuncs { + EF_DEFINED, // DEFINED(label) 1 if label is defined + EF_REFERENCED, // REFERENCED(label) 1 if label has been referenced in this file + EF_BLANK, // BLANK() 1 if the contents within the parenthesis is empty + EF_CONST, // CONST(label) 1 if label is a const label + EF_SIZEOF, // SIZEOF(struct) returns size of structs + EF_SIN, // SIN(index, period, amplitude) +}; + // Operators are either instructions or directives enum OperationType { OT_NONE, @@ -312,12 +364,13 @@ enum EvalOperator { EVOP_EOR, // r, ^ EVOP_SHL, // s, << EVOP_SHR, // t, >> - EVOP_NEG, // u, negate value - EVOP_STP, // v, Unexpected input, should stop and evaluate what we have - EVOP_NRY, // w, Not ready yet - EVOP_XRF, // x, value from XREF label - EVOP_EXP, // y, sub expression - EVOP_ERR, // z, Error + EVOP_NOT, // u, ~ + EVOP_NEG, // v, negate value + EVOP_STP, // w, Unexpected input, should stop and evaluate what we have + EVOP_NRY, // x, Not ready yet + EVOP_XRF, // y, value from XREF label + EVOP_EXP, // z, sub expression + EVOP_ERR, // z+1, Error }; // Opcode encoding @@ -958,12 +1011,17 @@ DirectiveName aDirectiveNames[] { { "CONST", AD_CONST }, { "LABEL", AD_LABEL }, { "STRING", AD_STRING }, + { "FUNCTION", AD_FUNCTION }, { "UNDEF", AD_UNDEF }, { "INCSYM", AD_INCSYM }, { "LABPOOL", AD_LABPOOL }, { "POOL", AD_LABPOOL }, { "IF", AD_IF }, { "IFDEF", AD_IFDEF }, + { "IFNDEF", AD_IFNDEF }, + { "IFCONST", AD_IFCONST }, + { "IFBLANK", AD_IFBLANK }, // #IFBLANK: Conditional assembly follows based on rest of line empty + { "IFNBLANK", AD_IFNBLANK }, // #IFDEF: Conditional assembly follows based on rest of line not empty { "ELSE", AD_ELSE }, { "ELIF", AD_ELIF }, { "ENDIF", AD_ENDIF }, @@ -981,6 +1039,11 @@ DirectiveName aDirectiveNames[] { { "DUMMY", AD_DUMMY }, { "DUMMY_END", AD_DUMMY_END }, { "DS", AD_DS }, // Define space + { "RES", AD_DS }, // Reserve space + { "SCOPE", AD_SCOPE }, // SCOPE: Begin ca65 style scope + { "ENDSCOPE", AD_ENDSCOPE },// ENDSCOPR: End ca65 style scope + { "PUSH", AD_PUSH }, + { "PULL", AD_PULL }, { "ABORT", AD_ABORT }, { "ERR", AD_ABORT }, // DASM version of ABORT }; @@ -1021,8 +1084,24 @@ DirectiveName aDirectiveNamesMerlin[] { { "CYC", AD_CYC }, // MERLIN: Start and stop cycle counter }; +struct EvalFuncNames { + const char* name; + EvalFuncs function; +}; + +EvalFuncNames aEvalFunctions[] = { + { "DEFINED", EF_DEFINED }, // DEFINED(label) 1 if label is defined + { "DEF", EF_DEFINED }, // DEFINED(label) 1 if label is defined + { "REFERENCED", EF_REFERENCED }, // REFERENCED(label) 1 if label has been referenced in this file + { "BLANK", EF_BLANK }, // BLANK() 1 if the contents within the parenthesis is empty + { "CONST", EF_CONST }, // CONST(label) 1 if label is a const label + { "SIZEOF", EF_SIZEOF}, // SIZEOF(struct) returns size of structs + { "TRIGSIN", EF_SIN }, // TRIGSIN(index, period, amplitude) +}; + static const int nDirectiveNames = sizeof(aDirectiveNames) / sizeof(aDirectiveNames[0]); static const int nDirectiveNamesMerlin = sizeof(aDirectiveNamesMerlin) / sizeof(aDirectiveNamesMerlin[0]); +static const int nEvalFuncs = sizeof(aEvalFunctions) / sizeof(aEvalFunctions[0]); // Binary search over an array of unsigned integers, may contain multiple instances of same key uint32_t FindLabelIndex(uint32_t hash, uint32_t *table, uint32_t count) @@ -1129,6 +1208,176 @@ public: } }; + + +template< class KeyType, class ValueType, class CountType = size_t > struct HashTable { + CountType size, maxSteps, used; + KeyType* keys; + ValueType* values; + + static CountType HashFunction(KeyType v) { return CountType(((v + (v >> 27) + (v << 29)) + 14695981039346656037) * 1099511628211); } + static CountType HashIndex(KeyType hash, CountType tableSize) { return hash & (tableSize - 1); } + static CountType GetNextIndex(KeyType hash, CountType tableSize) { return (hash + 1) & (tableSize - 1); } + static CountType KeyToIndex(KeyType key, CountType tableSize) { return HashIndex(HashFunction(key), tableSize); } + static CountType FindKeyIndex(KeyType hash, CountType hashTableSize, KeyType* hashKeys, CountType maxKeySteps) { + CountType index = KeyToIndex(hash, hashTableSize); + while (hashKeys) { + KeyType key = hashKeys[index]; + if (!key || key == hash) { return index; } + index = GetNextIndex(index, hashTableSize); + if (!maxKeySteps--) { break; } + } + return index; + } + + CountType KeyToIndex(KeyType key) { return KeyToIndex(key, size); } + + CountType InsertKey(KeyType key, CountType index) { + const KeyType* hashKeys = keys; + CountType currSize = size; + CountType insertSteps = 0; + while (KeyType k = hashKeys[index]) { + if (k == key) { return index; } // key already exists + CountType kfirst = KeyToIndex(k, currSize); + CountType ksteps = kfirst > index ? (currSize + index - kfirst) : (index - kfirst); + if (insertSteps > ksteps) { return index; } + index = GetNextIndex(index, size); + ++insertSteps; + } + return index; + } + + CountType FindKeyIndex(KeyType hash) const { return FindKeyIndex(hash, size, keys, maxSteps); } + + CountType Steps(KeyType hash) { + CountType slot = KeyToIndex(hash, size); + CountType numSteps = 0; + while (keys[slot] && keys[slot] != hash) { + ++numSteps; + slot = GetNextIndex(slot, size); + } + return numSteps; + } + + void UpdateSteps(CountType first, CountType slot) { + CountType steps = slot > first ? (slot - first) : (size + slot - first); + if (steps > maxSteps) { maxSteps = steps; } + } + + ValueType* InsertFitted(KeyType key) { + assert(key); // key may not be 0 + CountType first = KeyToIndex(key); + CountType slot = InsertKey(key, first); + UpdateSteps(first, slot); + if (keys[slot]) { + if (keys[slot] == key) { return &values[slot]; } else { + KeyType prvKey = keys[slot]; + ValueType prev_value = values[slot]; + keys[slot] = key; + for (;; ) { + CountType prev_first = KeyToIndex(prvKey); + CountType slotRH = InsertKey(prvKey, prev_first); + UpdateSteps(prev_first, slotRH); + if (keys[slotRH] && keys[slotRH] != prvKey) { + KeyType tmpKey = keys[slotRH]; + keys[slotRH] = prvKey; + prvKey = tmpKey; + ValueType temp_value = values[slotRH]; + values[slotRH] = prev_value; + prev_value = temp_value; + } else { + keys[slotRH] = prvKey; + values[slotRH] = prev_value; + ++used; + return &values[slot]; + } + } + } + } + keys[slot] = key; + ++used; + return &values[slot]; + } + + HashTable() { Reset(); } + + void Reset() { + used = 0; + size = 0; + maxSteps = 0; + keys = nullptr; + values = nullptr; + } + + ~HashTable() { Clear(); } + + void Clear() { + if (values) { + for (CountType i = 0, n = size; i < n; ++i) { + values[i].~ValueType(); + } + free(values); + } + if (keys) { free(keys); } + Reset(); + } + + CountType GetUsed() const { return used; } + bool TableMax() const { return used && (used << 4) >= (size * 13); } + + void Grow() { + KeyType *prevKeys = keys; + ValueType *prevValues = values; + CountType prevSize = size, newSize = prevSize ? (prevSize << 1) : 64; + size = newSize; + keys = (KeyType*)calloc(1, newSize * sizeof(KeyType)); + values = (ValueType*)calloc(1, newSize * sizeof(ValueType)); + maxSteps = 0; + for (CountType i = 0; i < newSize; ++i) { new (values + i) ValueType; } + if (used) { + used = 0; + for (CountType i = 0; i < prevSize; i++) { + if (KeyType key = prevKeys[i]) { *InsertFitted(key) = prevValues[i]; } + } + } + if (prevKeys) { free(prevKeys); } + if (prevValues) { + for (CountType i = 0; i != prevSize; ++i) { prevValues[i].~ValueType(); } + free(prevValues); + } + } + + ValueType* InsertKey(KeyType key) + { + if (!size || TableMax()) { Grow(); } + return InsertFitted(key); + } + + ValueType* InsertKeyValue(KeyType key, ValueType& value) + { + ValueType* value_ptr = InsertKey(key); + *value_ptr = value; + return value_ptr; + } + + bool KeyExists(KeyType key) + { + return size && key && keys[FindKeyIndex(key)] == key; + } + + ValueType* GetValue(KeyType key) + { + if (size && key) { + CountType slot = FindKeyIndex(key); + if (keys[slot] == key) { + return &values[slot]; + } + } + return nullptr; + } +}; + + // relocs are cheaper than full expressions and work with // local labels for relative sections which would otherwise // be out of scope at link time. @@ -1195,7 +1444,6 @@ public: strovl string_value; // string contents if modified, initialized to null string StatusCode Append(strref append); - StatusCode ParseLine(strref line); strref get() { return string_value.valid() ? string_value.get_strref() : string_const; } void clear() { @@ -1327,6 +1575,7 @@ public: bool constant; // the value of this label can not change bool external; // this label is globally accessible bool reference; // this label is accessed from external and can't be used for evaluation locally + bool referenced; // this label has been found via GetLabel and can be assumed to be referenced for some purpose } Label; @@ -1380,10 +1629,10 @@ typedef struct sLabelPool { strref pool_name; int16_t numRanges; // normally 1 range, support multiple for ease of use int16_t depth; // Required for scope closure cleanup - uint16_t start; - uint16_t end; - uint16_t scopeUsed[MAX_SCOPE_DEPTH][2]; // last address assigned + scope depth - StatusCode Reserve(uint16_t numBytes, uint16_t &ret_addr, uint16_t scope); + uint32_t start; + uint32_t end; + uint32_t scopeUsed[MAX_SCOPE_DEPTH][2]; // last address assigned + scope depth + StatusCode Reserve(uint32_t numBytes, uint32_t &ret_addr, uint16_t scope); void ExitScope(uint16_t scope); } LabelPool; @@ -1417,10 +1666,14 @@ struct EvalContext { int relative_section; // return can be relative to this section int file_ref; // can access private label from this file or -1 int rept_cnt; // current repeat counter - EvalContext() {} + int recursion; // track recursion depth + StatusCode internalErr; // if an error occured during an internal stage of evaluation + EvalContext() : pc(0), scope_pc(0), scope_end_pc(0), scope_depth(0), relative_section(-1), + file_ref(-1), rept_cnt(0), recursion(0), internalErr(STATUS_OK) {} EvalContext(int _pc, int _scope, int _close, int _sect, int _rept_cnt) : pc(_pc), scope_pc(_scope), scope_end_pc(_close), scope_depth(-1), - relative_section(_sect), file_ref(-1), rept_cnt(_rept_cnt) {} + relative_section(_sect), file_ref(-1), rept_cnt(_rept_cnt), + recursion(0), internalErr(STATUS_OK) {} }; // Source context is current file (include file, etc.) or current macro. @@ -1465,6 +1718,31 @@ public: bool empty() const { return stack.size() == 0; } }; +// Support for the PULL and PUSH directives +typedef union { int value; char* string; } ValueOrString; +typedef std::vector < ValueOrString > SymbolStack; +class SymbolStackTable : public HashTable< uint64_t, SymbolStack* > { +public: + void PushSymbol(Label* symbol); + StatusCode PullSymbol(Label* symbol); + void PushSymbol(StringSymbol* string); + StatusCode PullSymbol(StringSymbol* string); + ~SymbolStackTable(); +}; + +// user declared functions +struct UserFunction { + const char* name; + const char* params; + const char* expression; +}; +class UserFunctionMap : public HashTable<uint64_t, UserFunction*> { +public: + UserFunction *Get(strref name); + StatusCode Add(strref name, strref params, strref expresion); + ~UserFunctionMap(); +}; + // The state of the assembler class Asm { public: @@ -1485,6 +1763,9 @@ public: std::vector<ExtLabels> externals; // external labels organized by object file MapSymbolArray map; + SymbolStackTable symbolStacks; // enable push/pull of symbols + UserFunctionMap userFunctions; // user defined expression functions + // CPU target struct mnem *opcode_table; int opcode_count; @@ -1513,13 +1794,17 @@ public: int scope_depth; int brace_depth; // scope depth defined only by braces, not files + strref export_base_name; // binary output name if available + strref last_label; // most recently defined label for Merlin macro + + // ca65 style scope (for now treat global symbols as local symbols, no outside name lookup) + int directive_scope_depth; + // Eval relative result (only valid if EvalExpression returns STATUS_RELATIVE_SECTION) int lastEvalSection; int lastEvalValue; int8_t lastEvalShift; - strref export_base_name; // binary output name if available - strref last_label; // most recently defined label for Merlin macro int8_t list_flags; // listing flags accumulating for each line bool accumulator_16bit; // 65816 specific software dependent immediate mode bool index_reg_16bit; // -"- @@ -1527,6 +1812,7 @@ public: bool error_encountered; // if any error encountered, don't export binary bool list_assembly; // generate assembler listing bool end_macro_directive; // whether to use { } or macro / endmacro for macro scope + bool import_means_xref; // Convert source to binary void Assemble(strref source, strref filename, bool obj_target); @@ -1595,18 +1881,24 @@ public: StatusCode EvalStruct(strref name, int &value); StatusCode BuildEnum(strref name, strref declaration); + // determine a value from a user function with given parameters + int EvalUserFunction(UserFunction* user, strref params, EvalContext& etx); + + // Check if function is a valid function and if so evaluate the expression + bool EvalFunction(strref function, strref &expression, EvalContext& etx, int &value); + // Calculate a value based on an expression. EvalOperator RPNToken_Merlin(strref &expression, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value); - EvalOperator RPNToken(strref &expression, const struct EvalContext &etx, + EvalOperator RPNToken(strref &expression, EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value, strref &subexp); - StatusCode EvalExpression(strref expression, const struct EvalContext &etx, int &result); + StatusCode EvalExpression(strref expression, struct EvalContext &etx, int &result); char* PartialEval( strref expression ); void SetEvalCtxDefaults( struct EvalContext &etx ); int ReptCnt() const; // Access labels - Label* GetLabel(strref label); + Label * GetLabel(strref label, bool reference_check = false); Label* GetLabel(strref label, int file_ref); Label* AddLabel(uint32_t hash); bool MatchXDEF(strref label); @@ -1642,6 +1934,7 @@ public: StatusCode Directive_Rept(strref line); StatusCode Directive_Macro(strref line); StatusCode Directive_String(strref line); + StatusCode Directive_Function(strref line); StatusCode Directive_Undef(strref line); StatusCode Directive_Include(strref line); StatusCode Directive_Incbin(strref line, int skip=0, int len=0); @@ -1702,6 +1995,122 @@ public: Cleanup(); localLabels.reserve(256); loadedData.reserve(16); lateEval.reserve(64); } }; + +void SymbolStackTable::PushSymbol(Label* symbol) +{ + uint64_t key = symbol->label_name.fnv1a_64(symbol->pool_name.fnv1a_64()); + SymbolStack** ppStack = InsertKey(key); // ppStack will exist but contains a pointer that may not exist + if (!*ppStack) { *ppStack = new SymbolStack; } + ValueOrString val; + val.value = symbol->value; + (*ppStack)->push_back(val); +} + +StatusCode SymbolStackTable::PullSymbol(Label* symbol) +{ + uint64_t key = symbol->label_name.fnv1a_64(symbol->pool_name.fnv1a_64()); + SymbolStack** ppStack = GetValue(key); + if (!ppStack || !(*ppStack)->size()) { return ERROR_PULL_WITHOUT_PUSH; } + symbol->value = (**ppStack)[(*ppStack)->size() - 1].value; + (*ppStack)->pop_back(); + return STATUS_OK; +} + +void SymbolStackTable::PushSymbol(StringSymbol* string) +{ + uint64_t key = string->string_name.fnv1a_64(); + SymbolStack** ppStack = InsertKey(key); // ppStack will exist but contains a pointer that may not exist + if (!*ppStack) { *ppStack = new SymbolStack; } + ValueOrString val; + val.string = nullptr; + if (string->string_value) { + val.string = (char*)malloc(string->string_value.get_len() + 1); + memcpy(val.string, string->string_value.get(), string->string_value.get_len()); + val.string[string->string_value.get_len()] = 0; + } + (*ppStack)->push_back(val); +} + +StatusCode SymbolStackTable::PullSymbol(StringSymbol* string) +{ + uint64_t key = string->string_name.fnv1a_64(); + SymbolStack** ppStack = GetValue(key); + if (!ppStack || !(*ppStack)->size()) { return ERROR_PULL_WITHOUT_PUSH; } + char* str = (**ppStack)[(*ppStack)->size() - 1].string; + if (!str && string->string_value) { + free(string->string_value.charstr()); + string->string_value.invalidate(); + } else { + if (string->string_value.empty() || string->string_value.cap() < (strlen(str) + 1)) { + if (string->string_value.charstr()) { free(string->string_value.charstr()); } + string->string_value.set_overlay((char*)malloc(strlen(str) + 1), (strl_t)strlen(str) + 1); + } + string->string_value.copy(str); + free(str); + } + (*ppStack)->pop_back(); + return STATUS_OK; +} + +SymbolStackTable::~SymbolStackTable() +{ + for (size_t i = 0; i < size; ++i) { + if (keys[i] && values[i]) { + delete values[i]; + values[i] = nullptr; + keys[i] = 0; + } + } +} + + +UserFunction* UserFunctionMap::Get(strref name) +{ + UserFunction** ret = GetValue(name.fnv1a_64()); + if (ret) { return *ret; } + return nullptr; +} + +StatusCode UserFunctionMap::Add(strref name, strref params, strref expresion) +{ + if (!name || !expresion) { return ERROR_INCOMPLETE_FUNCTION; } + strl_t stringlen = name.get_len() + 1 + (params ? (params.get_len() + 1) : 0) + expresion.get_len() + 1; + UserFunction *func = (UserFunction*)calloc(1, sizeof(UserFunction) + stringlen); + char* strings = (char*)(func + 1); + func->name = strings; + memcpy(strings, name.get(), name.get_len()); + strings[name.get_len()] = 0; + strings += name.get_len() + 1; + if (params) { + func->params = strings; + memcpy(strings, params.get(), params.get_len()); + strings[params.get_len()] = 0; + strings += params.get_len() + 1; + } + func->expression = strings; + memcpy(strings, expresion.get(), expresion.get_len()); + + if (UserFunction** existing = GetValue(name.fnv1a_64())) { + free(*existing); + *existing = func; + } else { + InsertKeyValue(name.fnv1a_64(), func); + } + return STATUS_OK; +} + +UserFunctionMap::~UserFunctionMap() +{ + for (size_t i = 0; i < size; ++i) { + if (keys[i] && values[i]) { + free(values[i]); + values[i] = nullptr; + keys[i] = 0; + } + } +} + + // Clean up work allocations void Asm::Cleanup() { for (std::vector<char*>::iterator i = loadedData.begin(); i != loadedData.end(); ++i) { @@ -1737,9 +2146,11 @@ void Asm::Cleanup() { conditional_depth = 0; conditional_nesting[0] = 0; conditional_consumed[0] = false; + directive_scope_depth = 0; error_encountered = false; list_assembly = false; end_macro_directive = false; + import_means_xref = false; accumulator_16bit = false; // default 65816 8 bit immediate mode index_reg_16bit = false; // other CPUs won't be affected. cycle_counter_level = 0; @@ -1865,7 +2276,7 @@ char* Asm::LoadBinary(strref filename, size_t &size) { if (file.get_last()!='/' && file.get_last()!='\\') file.append('/'); file.append(filename); -#ifdef WIN32 +#ifdef _WIN32 file.replace('/', '\\'); #endif ++i; @@ -2792,8 +3203,9 @@ StatusCode Asm::AddMacro(strref macro, strref source_name, strref source_file, s } else { return ERROR_BAD_MACRO_FORMAT; } } else { name = macro.split_range(label_end_char_range); - macro.skip_whitespace(); + while (macro.get_first() == ' ' || macro.get_first() == '\t') { ++macro; } strref left_line = macro.get_line(); + if (left_line.get_first() == ';' || left_line.has_prefix("//")) { left_line.clear(); } left_line.skip_whitespace(); left_line = left_line.before_or_full(';').before_or_full(c_comment); if (left_line && left_line[0]!='(' && left_line[0]!='{') { @@ -2837,7 +3249,10 @@ StatusCode Asm::AddMacro(strref macro, strref source_name, strref source_file, s for (;;) { f = macro.find(endm, f+1); if (f<0) { return ERROR_BAD_MACRO_FORMAT; } - if (f==0||strref::is_ws(macro[f-1])) { break; } + if (f == 0 || strref::is_ws(macro[f - 1]) || macro[f - 1] == '.') { + if (f && macro[f - 1] == '.') { --f; } + break; + } } pMacro->macro = macro.get_substr(0, f); macro += f; @@ -3126,6 +3541,128 @@ StatusCode Asm::EvalStruct(strref name, int &value) { return STATUS_OK; } + +// +// +// USER FUNCTION EVAL +// +// + +int Asm::EvalUserFunction(UserFunction* user, strref params, EvalContext& etx) +{ + strref expression(user->expression); + strref orig_param(user->params); + strref paraiter = orig_param; + strref in_params = params; + int newSize = expression.get_len(); + while (strref param = paraiter.split_token(',')) { + strref replace = in_params.split_token(','); + param.trim_whitespace(); + replace.trim_whitespace(); + + if (param.get_len() < replace.get_len()) { + int count = expression.substr_count(param); + newSize += count * (replace.get_len() - param.get_len()); + } + } + + char* subst = (char*)malloc(newSize); + strovl subststr(subst, newSize); + subststr.copy(expression); + while (strref param = orig_param.split_token(',')) { + strref replace = params.split_token(','); + param.trim_whitespace(); + replace.trim_whitespace(); + + subststr.replace_bookend(param, replace, macro_arg_bookend); + } + + int value = 0; + etx.internalErr = EvalExpression(subststr.get_strref(), etx, value); + if (etx.internalErr != STATUS_OK && etx.internalErr < FIRST_ERROR) { + etx.internalErr = ERROR_FUNCTION_DID_NOT_RESOLVE; + } + + free(subst); + + return value; +} + +// +// +// EVAL FUNCTIONS +// +// + +bool Asm::EvalFunction(strref function, strref& expression, EvalContext& etx, int &value) +{ + // all eval functions take a parenthesis with arguments + if (expression.get_first() != '(') { return false; } + + strref expRet = expression; + strref params = expRet.scoped_block_comment_skip(); + params.trim_whitespace(); + if (function.get_first() == '.') { ++function; } + + // look up user defined function + if (UserFunction* user = userFunctions.Get(function)) { + expression = expRet; + value = EvalUserFunction(user, params, etx); + return true; + } + + // built-in function + for (int i = 0; i < nEvalFuncs; ++i) { + if (function.same_str(aEvalFunctions[i].name)) { + switch (aEvalFunctions[i].function) { + case EF_DEFINED: + expression = expRet; + value = GetLabel(params, true) != nullptr ? 1 : 0; + return true; + case EF_REFERENCED: + expression = expRet; + if (Label* label = GetLabel(params, true)) { value = label->referenced; return true; } + return true; + case EF_BLANK: + expression = expRet; + if (params.get_first() == '{') { params = params.scoped_block_comment_skip(); } + params.trim_whitespace(); + value = params.is_empty(); + return true; + case EF_CONST: + expression = expRet; + if (Label* label = GetLabel(params, true)) { + value = label->constant ? 1 : 0; + } + return true; + case EF_SIZEOF: + { + expression = expRet; + uint32_t hash = params.fnv1a(); + uint32_t index = FindLabelIndex(hash, labelStructs.getKeys(), labelStructs.count()); + value = 0; + while (index < labelStructs.count() && labelStructs.getKey(index) == hash) { + if (params.same_str_case(labelStructs.getValue(index).name)) { + value = (labelStructs.getValues() + index)->size; + break; + } + ++index; + } + return true; + } + + case EF_SIN: + expression = expRet; + value = 0; // TODO: implement trigsin + return true; + } + return false; + } + } + return false; +} + + // // // EXPRESSIONS AND LATE EVALUATION @@ -3210,18 +3747,18 @@ EvalOperator Asm::RPNToken_Merlin(strref &expression, const struct EvalContext & } // Get a single token from most non-apple II assemblers -EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value, strref &subexp) +EvalOperator Asm::RPNToken(strref &exp, EvalContext &etx, EvalOperator prev_op, int16_t §ion, int &value, strref &subexp) { char c = exp.get_first(); switch (c) { case '$': ++exp; value = (int)exp.ahextoui_skip(); return EVOP_VAL; case '-': ++exp; return EVOP_SUB; - case '+': ++exp; return EVOP_ADD; - case '*': // asterisk means both multiply and current PC, disambiguate! - ++exp; + case '+': ++exp; return EVOP_ADD; + case '*': ++exp; // asterisk means both multiply and current PC, disambiguate! if (exp[0] == '*') return EVOP_STP; // double asterisks indicates comment else if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) return EVOP_MUL; - value = etx.pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); return EVOP_VAL; + value = etx.pc; section = int16_t(CurrSection().IsRelativeSection() ? SectionId() : -1); + return EVOP_VAL; case '/': ++exp; return EVOP_DIV; case '=': if (exp[1] == '=') { exp += 2; return EVOP_EQU; } return EVOP_STP; case '>': if (exp.get_len() >= 2 && exp[1] == '>') { exp += 2; return EVOP_SHR; } @@ -3240,6 +3777,7 @@ EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOpera case '^': if (prev_op == EVOP_VAL || prev_op == EVOP_RPR) { ++exp; return EVOP_EOR; } ++exp; return EVOP_BAB; case '&': ++exp; return EVOP_AND; + case '~': ++exp; return EVOP_NOT; case '(': if (prev_op != EVOP_VAL) { ++exp; return EVOP_LPR; } return EVOP_STP; case ')': ++exp; return EVOP_RPR; case ',': @@ -3267,6 +3805,7 @@ EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOpera } if (!pLabel && label.same_str("rept")) { value = etx.rept_cnt; return EVOP_VAL; } if (!pLabel) { if (StringSymbol *pStr = GetString(label)) { subexp = pStr->get(); return EVOP_EXP; } } + if (!pLabel) { if (EvalFunction(label, exp, etx, value)) { return EVOP_VAL; } } if (!pLabel || !pLabel->evaluated) return EVOP_NRY; // this label could not be found (yet) value = pLabel->value; section = int16_t(pLabel->section); return pLabel->reference ? EVOP_XRF : EVOP_VAL; } @@ -3300,7 +3839,7 @@ static int mul_as_shift(int scalar) { #define MAX_EXPR_STACK 2 -StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, int &result) +StatusCode Asm::EvalExpression(strref expression, EvalContext &etx, int &result) { int numValues = 0; int numOps = 0; @@ -3309,10 +3848,14 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, char ops[MAX_EVAL_OPER]; // RPN expression int values[MAX_EVAL_VALUES]; // RPN values (in order of RPN EVOP_VAL operations) - int16_t section_ids[MAX_EVAL_SECTIONS]; // local index of each referenced section - int16_t section_val[MAX_EVAL_VALUES] = { 0 }; // each value can be assigned to one section, or -1 if fixed - int16_t num_sections = 0; // number of sections in section_ids (normally 0 or 1, can be up to MAX_EVAL_SECTIONS) + int16_t section_ids[MAX_EVAL_SECTIONS]; // local index of each referenced section + int16_t section_val[MAX_EVAL_VALUES] = { 0 }; // each value can be assigned to one section, or -1 if fixed + int16_t num_sections = 0; // number of sections in section_ids (normally 0 or 1, can be up to MAX_EVAL_SECTIONS) bool xrefd = false; + + // don't allow too deep recursion of this function, this should be rare as it takes a user function or variadic macro to recurse. + if (etx.recursion > 3) { return ERROR_EXPRESSION_RECURSION; } + etx.recursion++; // increment recursion of EvalExpression body values[0] = 0; // Initialize RPN if no expression { int sp = 0; @@ -3332,10 +3875,10 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, } else { op = RPNToken(expression, etx, prev_op, section, value, subexp); } - if (op==EVOP_ERR) { return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION; } - else if (op==EVOP_NRY) { return STATUS_NOT_READY; } + if (op == EVOP_ERR) { etx.recursion--; return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION; } + else if (op==EVOP_NRY) { etx.recursion--; return STATUS_NOT_READY; } else if (op == EVOP_EXP) { - if (exp_sp>=MAX_EXPR_STACK) { return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; } + if (exp_sp>=MAX_EXPR_STACK) { etx.recursion--; return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; } expression_stack[exp_sp++] = expression; expression = subexp; op = EVOP_LPR; @@ -3350,7 +3893,7 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, if (index_section<0) { if (num_sections<=MAX_EVAL_SECTIONS) { section_ids[index_section = num_sections++] = section; - } else { return STATUS_NOT_READY; } + } else { etx.recursion--; return STATUS_NOT_READY; } } } @@ -3367,7 +3910,7 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, ops[numOps++] = op_stack[sp]; } // check that there actually was a left parenthesis - if (!sp||op_stack[sp-1]!=EVOP_LPR) { return ERROR_UNBALANCED_RIGHT_PARENTHESIS; } + if (!sp||op_stack[sp-1]!=EVOP_LPR) { etx.recursion--; return ERROR_UNBALANCED_RIGHT_PARENTHESIS; } sp--; // skip open paren } else if (op == EVOP_STP) { break; @@ -3390,8 +3933,9 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, } } // check for out of bounds or unexpected input - if (numValues==MAX_EVAL_VALUES) { return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; } + if (numValues==MAX_EVAL_VALUES) { etx.recursion--; return ERROR_TOO_MANY_VALUES_IN_EXPRESSION; } else if (numOps==MAX_EVAL_OPER||sp==MAX_EVAL_OPER) { + etx.recursion--; return ERROR_TOO_MANY_OPERATORS_IN_EXPRESSION; } prev_op = op; @@ -3402,9 +3946,12 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, ops[numOps++] = op_stack[sp]; } } + etx.recursion--; // recursion only occurs in loop above // Check if dependent on XREF'd symbol - if (xrefd) { return STATUS_XREF_DEPENDENT; } + if (xrefd) { + return STATUS_XREF_DEPENDENT; + } // processing the result RPN will put the completed expression into values[0]. // values is used as both the queue and the stack of values since reads/writes won't @@ -3419,7 +3966,7 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, EvalOperator op = (EvalOperator)ops[o]; shift_bits = 0; prev_val = ri ? values[ri-1] : prev_val; - if (op!=EVOP_VAL && op!=EVOP_LOB && op!=EVOP_HIB && op!=EVOP_BAB && op!=EVOP_SUB && ri<2) { + if (op!=EVOP_VAL && op!=EVOP_LOB && op!=EVOP_HIB && op!=EVOP_BAB && op!=EVOP_SUB && op!=EVOP_NOT && ri<2) { break; // ignore suffix operations that are lacking values } switch (op) { @@ -3511,6 +4058,9 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, shift_bits = -values[ri]; prev_val = values[ri - 1]; values[ri - 1] >>= values[ri]; break; + case EVOP_NOT: + if (ri) { values[ri - 1] = ~values[ri - 1]; } + break; case EVOP_LOB: // low byte if (ri) { values[ri-1] &= 0xff; } break; @@ -3808,12 +4358,14 @@ StatusCode Asm::CheckLateEval(strref added_label, int scope_end, bool print_miss // // Get a label record if it exists -Label *Asm::GetLabel(strref label) { +Label *Asm::GetLabel(strref label, bool reference_check) { uint32_t label_hash = label.fnv1a(); uint32_t index = FindLabelIndex(label_hash, labels.getKeys(), labels.count()); while (index < labels.count() && label_hash == labels.getKey(index)) { if (label.same_str(labels.getValue(index).label_name)) { - return labels.getValues()+index; + Label *label = labels.getValues() + index; + if (!reference_check) { label->referenced = true; } + return label; } index++; } @@ -3828,7 +4380,9 @@ Label *Asm::GetLabel(strref label, int file_ref) { uint32_t index = FindLabelIndex(label_hash, labs.labels.getKeys(), labs.labels.count()); while (index < labs.labels.count() && label_hash == labs.labels.getKey(index)) { if (label.same_str(labs.labels.getValue(index).label_name)) { - return labs.labels.getValues()+index; + Label *label = labs.labels.getValues()+index; + label->referenced = true; + return label; } index++; } @@ -3921,7 +4475,7 @@ StatusCode Asm::AddLabelPool(strref name, strref args) { // check that there is at least one valid address int ranges = 0; int num32 = 0; - uint16_t aRng[256]; + uint32_t aRng[256]; struct EvalContext etx; SetEvalCtxDefaults(etx); while (strref arg = args.split_token_trim(',')) { @@ -3936,8 +4490,8 @@ StatusCode Asm::AddLabelPool(strref name, strref args) { if (addr1<=addr0||addr0<0) { return ERROR_POOL_RANGE_EXPRESSION_EVAL; } - aRng[ranges++] = (uint16_t)addr0; - aRng[ranges++] = (uint16_t)addr1; + aRng[ranges++] = (uint32_t)addr0; + aRng[ranges++] = (uint32_t)addr1; num32 += (addr1-addr0+15)>>4; if (ranges>2||num32>((MAX_POOL_BYTES+15)>>4)) { return ERROR_POOL_RANGE_EXPRESSION_EVAL; @@ -3967,10 +4521,10 @@ StatusCode Asm::AssignPoolLabel(LabelPool &pool, strref label) { strref size = label; label = size.split_label(); if (strref::is_number(size.get_first())) { - uint16_t bytes = (uint16_t)size.atoi(); + uint32_t bytes = (uint32_t)size.atoi(); if (!bytes) { return ERROR_POOL_RANGE_EXPRESSION_EVAL; } if (!GetLabelPool(label)) { - uint16_t addr; + uint32_t addr; StatusCode error = pool.Reserve(bytes, addr, (uint16_t)brace_depth); if( error == STATUS_OK ) { // permanently remove this chunk from the parent pool @@ -3992,13 +4546,13 @@ StatusCode Asm::AssignPoolLabel(LabelPool &pool, strref label) { return ERROR_POOL_RANGE_EXPRESSION_EVAL; } strref type = label; - uint16_t bytes = 1; + uint32_t bytes = 1; int sz = label.find_at( '.', 1 ); if (sz > 0) { label = type.split( sz ); ++type; if (strref::is_number(type.get_first())) { - bytes = (uint16_t)type.atoi(); + bytes = (uint32_t)type.atoi(); } else { switch (strref::tolower(type.get_first())) { case 'l': bytes = 4; break; @@ -4009,7 +4563,7 @@ StatusCode Asm::AssignPoolLabel(LabelPool &pool, strref label) { } } if (GetLabel(label)) { return ERROR_POOL_LABEL_ALREADY_DEFINED; } - uint16_t addr; + uint32_t addr; StatusCode error = pool.Reserve(bytes, addr, (uint16_t)brace_depth); if (error!=STATUS_OK) { return error; } Label *pLabel = AddLabel(label.fnv1a()); @@ -4022,6 +4576,7 @@ StatusCode Asm::AssignPoolLabel(LabelPool &pool, strref label) { pLabel->constant = true; pLabel->external = false; pLabel->reference = false; + pLabel->referenced = false; bool local = false; if (label[ 0 ] == '.' || label[ 0 ] == '@' || label[ 0 ] == '!' || label[ 0 ] == ':' || label.get_last() == '$') { @@ -4033,7 +4588,7 @@ StatusCode Asm::AssignPoolLabel(LabelPool &pool, strref label) { } // Request a label from a pool -StatusCode sLabelPool::Reserve(uint16_t numBytes, uint16_t &ret_addr, uint16_t scope) { +StatusCode sLabelPool::Reserve(uint32_t numBytes, uint32_t &ret_addr, uint16_t scope) { if (numBytes>(end-start)||depth==MAX_SCOPE_DEPTH) { return ERROR_OUT_OF_LABELS_IN_POOL; } if (!depth||scope!=scopeUsed[depth-1][1]) { scopeUsed[depth][0] = end; @@ -4078,7 +4633,7 @@ StatusCode Asm::AssignLabel(strref label, strref expression, bool make_constant) if (pLabel->constant && pLabel->evaluated && val!=pLabel->value) { return (status==STATUS_NOT_READY) ? STATUS_OK : ERROR_MODIFYING_CONST_LABEL; } - } else { pLabel = AddLabel(label.fnv1a()); } + } else { pLabel = AddLabel(label.fnv1a()); pLabel->referenced = false; } pLabel->label_name = label; pLabel->pool_name.clear(); @@ -4110,6 +4665,7 @@ StatusCode Asm::AddressLabel(strref label) bool constLabel = false; if (!pLabel) { pLabel = AddLabel(label.fnv1a()); + pLabel->referenced = false; // if this label already exists but is changed then it may already have been referenced } else if (pLabel->constant && pLabel->value!=CurrSection().GetPC()) { return ERROR_MODIFYING_CONST_LABEL; } else { constLabel = pLabel->constant; } @@ -4125,7 +4681,8 @@ StatusCode Asm::AddressLabel(strref label) pLabel->constant = constLabel; last_label = label; bool local = label[0]=='.' || label[0]=='@' || label[0]=='!' || label[0]==':' || label.get_last()=='$'; - LabelAdded(pLabel, local); + if (directive_scope_depth > 0) { local = true; } + LabelAdded(pLabel, local); // TODO: in named scopes the label can still be referenced outside the scope directive if (local) { MarkLabelLocal(label); } status = CheckLateEval(label); if (!local && label[0]!=']') { // MERLIN: Variable label does not invalidate local labels @@ -4214,7 +4771,7 @@ StringSymbol *Asm::AddString(strref string_name, strref string_value) } // append a string to another string -StatusCode StringSymbol::Append(strref append) +StatusCode sStringSymbols::Append(strref append) { if (!append) return STATUS_OK; @@ -4513,6 +5070,25 @@ StatusCode Asm::Directive_String(strref line) return STATUS_OK; } +StatusCode Asm::Directive_Function(strref line) +{ + line.skip_whitespace(); + strref function_name = line.split_label(), params; + + line.skip_whitespace(); + if (line.get_first() == '(') { + params = line.scoped_block_comment_skip(); + params.trim_whitespace(); + } + + line.skip_whitespace(); + userFunctions.Add(function_name, params, line); + + return STATUS_OK; +} + + + StatusCode Asm::Directive_Undef(strref line) { strref name = line.split_range_trim(Merlin() ? label_end_char_range_merlin : label_end_char_range); @@ -4609,7 +5185,7 @@ StatusCode Asm::Directive_Incbin(strref line, int skip, int len) StatusCode Asm::Directive_Import(strref line) { line.skip_whitespace(); - + int skip = 0; // binary import skip this amount int len = 0; // binary import load up to this amount strref param; // read out skip & max len parameters @@ -4624,7 +5200,7 @@ StatusCode Asm::Directive_Import(strref line) if (param) { struct EvalContext etx; SetEvalCtxDefaults(etx); - EvalExpression(param.split_token_trim(','), etx, skip); + EvalExpression(param.split_token_trim_track_parens(','), etx, skip); if (param) { EvalExpression(param, etx, len); } } } @@ -4803,6 +5379,7 @@ StatusCode Asm::Directive_XREF(strref label) pLabelXREF->external = true; pLabelXREF->constant = false; pLabelXREF->reference = true; + pLabelXREF->referenced = false; // referenced is only within the current object file } return STATUS_OK; } @@ -4813,7 +5390,7 @@ StatusCode Asm::Directive_DC(strref line, int width, strref source_file) struct EvalContext etx; SetEvalCtxDefaults(etx); line.trim_whitespace(); - while (strref exp_dc = line.split_token_trim(',')) { + while (strref exp_dc = line.split_token_trim_track_parens(',')) { int value = 0; if (!CurrSection().IsDummySection()) { if (Merlin() && exp_dc.get_first() == '#') // MERLIN allows for an immediate declaration on data @@ -4854,7 +5431,7 @@ StatusCode Asm::Directive_DS(strref line) } struct EvalContext etx; SetEvalCtxDefaults(etx); - strref size = line.split_token_trim(','); + strref size = line.split_token_trim_track_parens(','); if (STATUS_OK != EvalExpression(size, etx, value)) return ERROR_DS_MUST_EVALUATE_IMMEDIATELY; int fill = 0; @@ -5014,6 +5591,7 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc return ERROR_CPU_NOT_SUPPORTED; case AD_EXPORT: + if (import_means_xref) { return Directive_XDEF(line); } line.trim_whitespace(); CurrSection().export_append = line.split_label(); break; @@ -5171,6 +5749,7 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc return Directive_Incbin(line); case AD_IMPORT: + if (import_means_xref) { return Directive_XREF(line); } return Directive_Import(line); case AD_LABEL: @@ -5187,6 +5766,9 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc case AD_STRING: return Directive_String(line); + + case AD_FUNCTION: + return Directive_Function(line); case AD_UNDEF: return Directive_Undef(line); @@ -5215,16 +5797,62 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc case AD_IFDEF: if (NewConditional()) { // Start new conditional block CheckConditionalDepth(); // Check if nesting - bool conditional_result; - error = EvalStatement(line, conditional_result); - strref name = line.get_trimmed_ws(); - if (GetLabel(name) != nullptr || GetString(name) != nullptr) + // ifdef doesn't need to evaluate the value, just determine if it exists or not + strref label = line.split_range_trim(label_end_char_range); + if( GetLabel(label, etx.file_ref) ) ConsumeConditional(); else SetConditional(); } break; + case AD_IFNDEF: + if (NewConditional()) { // Start new conditional block + CheckConditionalDepth(); // Check if nesting + // ifdef doesn't need to evaluate the value, just determine if it exists or not + strref label = line.split_range_trim(label_end_char_range); + if (!GetLabel(label, etx.file_ref)) + ConsumeConditional(); + else + SetConditional(); + } + break; + + case AD_IFCONST: + if (NewConditional()) { // Start new conditional block + CheckConditionalDepth(); // Check if nesting + // ifdef doesn't need to evaluate the value, just determine if it exists or not + strref label_name = line.split_range_trim(label_end_char_range); + if (Label* label = GetLabel(label_name, etx.file_ref)) { + if (label->constant) { ConsumeConditional(); } + else { SetConditional(); } + } + else { SetConditional(); } + } + break; + + case AD_IFBLANK: + if (NewConditional()) { // Start new conditional block + CheckConditionalDepth(); // Check if nesting + line.trim_whitespace(); + if (line.is_empty()) + ConsumeConditional(); + else + SetConditional(); + } + break; + + case AD_IFNBLANK: + if (NewConditional()) { // Start new conditional block + CheckConditionalDepth(); // Check if nesting + line.trim_whitespace(); + if (!line.is_empty()) + ConsumeConditional(); + else + SetConditional(); + } + break; + case AD_ELSE: if (ConditionalAsm()) { if (ConditionalConsumed()) @@ -5327,9 +5955,38 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc break; } break; - + case AD_DS: return Directive_DS(line); + + case AD_SCOPE: + directive_scope_depth++; + return EnterScope(); + + case AD_ENDSCOPE: + directive_scope_depth--; + return ExitScope(); + + case AD_PUSH: + line.trim_whitespace(); + if (Label *label = GetLabel(line)) { + symbolStacks.PushSymbol(label); + return STATUS_OK; + } else if( StringSymbol* string = GetString(line)) { + symbolStacks.PushSymbol(string); + return STATUS_OK; + } + return ERROR_UNABLE_TO_PROCESS; + + case AD_PULL: + line.trim_whitespace(); + if (Label *label = GetLabel(line)) { + return symbolStacks.PullSymbol(label); + } else if (StringSymbol* string = GetString(line)) { + return symbolStacks.PullSymbol(string); + } + return ERROR_UNABLE_TO_PROCESS; + } return error; } @@ -5358,7 +6015,7 @@ StatusCode Asm::GetAddressMode(strref line, bool flipXY, uint32_t &validModes, A else { suffix.clear(); } ++block_suffix; block_suffix.clip(1); block_suffix.trim_whitespace(); ++line; line.skip_whitespace(); - strref block = block_suffix.split_token_trim( ',' ); + strref block = block_suffix.split_token_trim_track_parens( ',' ); validModes &= AMM_ZP_REL_X | AMM_ZP_Y_REL | AMM_REL | AMM_ZP_REL | AMM_REL_X | AMM_ZP_REL_L | AMM_ZP_REL_Y_L | AMM_STK_REL_Y | AMM_REL_L; if( line.get_first() == '>' ) { // [>$aaaa] if( c == '[' ) { addrMode = AMB_REL_L; validModes &= AMM_REL_L; expression = block+1; } @@ -5497,7 +6154,7 @@ StatusCode Asm::GetAddressMode(strref line, bool flipXY, uint32_t &validModes, A addrMode = AMB_ACC; } else { // absolute (zp, offs x, offs y) addrMode = force_24 ? AMB_ABS_L : (force_zp ? AMB_ZP : AMB_ABS); - expression = line.split_token_trim(','); + expression = line.split_token_trim_track_parens(','); if( force_abs ) { validModes &= AMM_ABS | AMM_ABS_X | AMM_ABS_Y | AMM_REL | AMM_REL_X; } if( force_zp ) { validModes &= AMM_ZP | AMM_ZP_X | AMM_ZP_REL_X | AMM_ZP_Y_REL | AMM_ZP_REL | AMM_ZP_ABS | AMM_ZP_REL_L | AMM_ZP_REL_Y_L | AMM_FLIPXY; } @@ -5536,7 +6193,7 @@ StatusCode Asm::AddOpcode(strref line, int index, strref source_file) { switch (validModes) { case AMC_BBR: addrMode = AMB_ZP_ABS; - expression = line.split_token_trim(','); + expression = line.split_token_trim_track_parens(','); if (!expression || !line) return ERROR_INVALID_ADDRESSING_MODE; break; @@ -5551,7 +6208,7 @@ StatusCode Asm::AddOpcode(strref line, int index, strref source_file) { break; case AMM_BLK_MOV: addrMode = AMB_BLK_MOV; - expression = line.before_or_full(','); + expression = line.before_or_full_track_parens(','); break; default: error = GetAddressMode(line, !!(validModes & AMM_FLIPXY), validModes, addrMode, op_param, expression); @@ -5736,7 +6393,7 @@ StatusCode Asm::AddOpcode(strref line, int index, strref source_file) { struct EvalContext etx; SetEvalCtxDefaults(etx); etx.pc = CurrSection().GetPC()-2; - line.split_token_trim(','); + line.split_token_trim_track_parens(','); error = EvalExpression(line, etx, value); if (error==STATUS_NOT_READY || error == STATUS_XREF_DEPENDENT) AddLateEval(CurrSection().DataOffset(), CurrSection().GetPC(), scope_address[scope_depth], line, source_file, LateEval::LET_BYTE); @@ -5893,15 +6550,13 @@ StatusCode Asm::BuildLine(strref line) { error = AssignLabel(label, line); line.clear(); list_flags |= ListLine::KEYWORD; - } - else if (keyword_equ.is_prefix_word(line)) { + } else if (keyword_equ.is_prefix_word(line)) { line += keyword_equ.get_len(); line.skip_whitespace(); error = AssignLabel(label, line); line.clear(); list_flags |= ListLine::KEYWORD; - } - else { + } else { uint32_t nameHash = label.fnv1a(); uint32_t macro = FindLabelIndex(nameHash, macros.getKeys(), macros.count()); bool gotConstruct = false; @@ -6864,7 +7519,7 @@ StatusCode Asm::ReadObjectFile(strref filename, int link_to_section) int16_t f = (int16_t)l.flags; int external = f & ObjFileLabel::OFL_XDEF; if (external == ObjFileLabel::OFL_XDEF) { - if (!lbl) { lbl = AddLabel(name.fnv1a()); } // insert shared label + if (!lbl) { lbl = AddLabel(name.fnv1a()); lbl->referenced = false; } // insert shared label else if (!lbl->reference) { continue; } } else { // insert protected label while ((file_index + external) >= (int)externals.size()) { @@ -7187,15 +7842,7 @@ StatusCode Asm::WriteA2GS_OMF(strref filename, bool full_collapse) { } int main(int argc, char **argv) { - const strref listing("lst"); - const strref tass_listing( "tsl" ); - const strref tass_labels( "tl" ); - const strref allinstr("opcodes"); - const strref endmacro("endm"); - const strref cpu("cpu"); - const strref acc("acc"); - const strref xy("xy"); - const strref org("org"); + int return_value = 0; bool load_header = true; bool size_header = false; @@ -7210,15 +7857,15 @@ int main(int argc, char **argv) { const char *source_filename = nullptr, *obj_out_file = nullptr; const char *binary_out_name = nullptr; - const char *sym_file = nullptr, *vs_file = nullptr, *tass_labels_file = nullptr; + const char *sym_file = nullptr, *vs_file = nullptr, *cmdarg_tass_labels_file = nullptr; strref list_file, allinstr_file; strref tass_list_file; for (int a = 1; a<argc; a++) { if (argv[a][0]=='-') { strref arg(argv[a]+1); if (arg.get_first()=='i') { assembler.AddIncludeFolder(arg+1); } - else if (arg.same_str("kickasm") ) { assembler.syntax = SYNTAX_KICKASM; } - else if (arg.same_str("merlin")) { assembler.syntax = SYNTAX_MERLIN; } + else if (arg.same_str(cmdarg_kickasm) ) { assembler.syntax = SYNTAX_KICKASM; } + else if (arg.same_str(cmdarg_merlin)) { assembler.syntax = SYNTAX_MERLIN; } else if (arg.get_first()=='D'||arg.get_first()=='d') { ++arg; if (arg.find('=')>0) { @@ -7226,51 +7873,53 @@ int main(int argc, char **argv) { } else { assembler.AssignLabel(arg, "1"); } - } else if (arg.same_str("c64")) { + } else if (arg.same_str(cmdarg_c64)) { load_header = true; size_header = false; - } else if (arg.same_str("a2b")) { + } else if (arg.same_str(cmdarg_a2b)) { assembler.default_org = 0x0803; load_header = true; size_header = true; - } else if (arg.same_str("bin")) { + } else if (arg.same_str(cmdarg_bin)) { load_header = false; size_header = false; - } else if (arg.same_str("a2p")) { + } else if (arg.same_str(cmdarg_a2p)) { assembler.default_org = 0x2000; load_header = false; size_header = false; - } else if (arg.same_str("a2o")) { + } else if (arg.same_str(cmdarg_a2o)) { gs_os_reloc = true; - } else if (arg.same_str("mrg")) { + } else if (arg.same_str(cmdarg_mrg)) { force_merge_sections = true; - } else if (arg.same_str("sect")) { + } else if (arg.same_str(cmdarg_sect)) { info = true; - } else if (arg.same_str(endmacro)) { + } else if (arg.same_str(cmdarg_endmacro)) { assembler.end_macro_directive = true; - } else if (arg.has_prefix(listing)&&(arg.get_len()==listing.get_len()||arg[listing.get_len()]=='=')) { + } else if (arg.same_str(cmdarg_xrefimp)) { + assembler.import_means_xref = true; + } else if (arg.has_prefix(cmdarg_listing)&&(arg.get_len()==cmdarg_listing.get_len()||arg[cmdarg_listing.get_len()]=='=')) { assembler.list_assembly = true; list_output = true; list_file = arg.after( '=' ); - } else if (arg.has_prefix(tass_listing)&&(arg.get_len()==listing.get_len()||arg[listing.get_len()]=='=')) { + } else if (arg.has_prefix(cmdarg_tass_listing)&&(arg.get_len()==cmdarg_listing.get_len()||arg[cmdarg_listing.get_len()]=='=')) { assembler.list_assembly = true; tass_list_output = true; tass_list_file = arg.after( '=' ); - } else if (arg.has_prefix(allinstr)&&(arg.get_len()==allinstr.get_len()||arg[allinstr.get_len()]=='=')) { + } else if (arg.has_prefix(cmdarg_allinstr)&&(arg.get_len()==cmdarg_allinstr.get_len()||arg[cmdarg_allinstr.get_len()]=='=')) { gen_allinstr = true; allinstr_file = arg.after('='); - } else if (arg.has_prefix(org)) { + } else if (arg.has_prefix(cmdarg_org)) { arg = arg.after('='); if (arg && arg.get_first()=='$' && arg.get_len()>1) { assembler.default_org = (int)(arg+1).ahextoui(); } else if (arg.is_number()) { assembler.default_org = (int)arg.atoi(); } // force the current section to be org'd assembler.AssignAddressToSection(assembler.SectionId(), assembler.default_org); - } else if (arg.has_prefix(acc)&&arg[acc.get_len()]=='=') { + } else if (arg.has_prefix(cmdarg_acc)&&arg[cmdarg_acc.get_len()]=='=') { assembler.accumulator_16bit = arg.after('=').atoi()==16; - } else if (arg.has_prefix(xy)&&arg[xy.get_len()]=='=') { + } else if (arg.has_prefix(cmdarg_xy)&&arg[cmdarg_xy.get_len()]=='=') { assembler.index_reg_16bit = arg.after('=').atoi()==16; - } else if (arg.has_prefix(cpu)&&(arg.get_len()==cpu.get_len()||arg[cpu.get_len()]=='=')) { + } else if (arg.has_prefix(cmdarg_cpu)&&(arg.get_len()==cmdarg_cpu.get_len()||arg[cmdarg_cpu.get_len()]=='=')) { arg.split_token_trim('='); bool found = false; for (int c = 0; c<nCPUs; c++) { @@ -7287,14 +7936,14 @@ int main(int argc, char **argv) { return 1; } if (!arg) { return 0; } - } else if (arg.same_str("sym")&&(a+1)<argc) { + } else if (arg.same_str(cmdarg_sym)&&(a+1)<argc) { sym_file = argv[++a]; - } else if (arg.same_str("obj")&&(a+1)<argc) { + } else if (arg.same_str(cmdarg_obj)&&(a+1)<argc) { obj_out_file = argv[++a]; - } else if (arg.same_str("vice")&&(a+1)<argc) { + } else if (arg.same_str(cmdarg_vice)&&(a+1)<argc) { vs_file = argv[++a]; - } else if (arg.same_str(tass_labels)&&(a+1)<argc) { - tass_labels_file = argv[++a]; + } else if (arg.same_str(cmdarg_tass_labels)&&(a+1)<argc) { + cmdarg_tass_labels_file = argv[++a]; } else { printf("Unexpected option " STRREF_FMT "\n", STRREF_ARG(arg)); } } else if (!source_filename) { source_filename = argv[a]; } else if (!binary_out_name) { binary_out_name = argv[a]; } @@ -7446,8 +8095,8 @@ int main(int argc, char **argv) { } } // export tass labels - if( tass_labels_file && !srcname.same_str( tass_labels_file ) && !assembler.map.empty() ) { - if( FILE *f = fopen( tass_labels_file, "w" ) ) { + if( cmdarg_tass_labels_file && !srcname.same_str( cmdarg_tass_labels_file ) && !assembler.map.empty() ) { + if( FILE *f = fopen( cmdarg_tass_labels_file, "w" ) ) { for( MapSymbolArray::iterator i = assembler.map.begin(); i != assembler.map.end(); ++i ) { uint32_t value = ( uint32_t )i->value; if( size_t( i->section ) < assembler.allSections.size() ) { value += assembler.allSections[ i->section ].start_address; } @@ -7472,3 +8121,4 @@ int main(int argc, char **argv) { } return return_value; } +