1
0
mirror of https://github.com/ksherlock/x65.git synced 2024-06-08 11:32:33 +00:00

Adding string symbols

- String Symbols can be evaluated as expressions or assembled as code
- String Symbols can be generated by macros
- Cleaning up first page
- Adding more to the x65.txt documentation
This commit is contained in:
Carl-Henrik Skårstedt 2015-11-26 13:10:58 -08:00
parent 5459c6c0e0
commit 9f8ad61fe2
3 changed files with 845 additions and 1118 deletions

961
README.md

File diff suppressed because it is too large Load Diff

355
x65.cpp
View File

@ -234,9 +234,11 @@ enum AssemblerDirective {
AD_TEXT, // TEXT: Add text to output
AD_INCLUDE, // INCLUDE: Load and assemble another file at this address
AD_INCBIN, // INCBIN: Load and directly insert another file at this address
AD_CONST, // CONST: Prevent a label from mutating during assemble
AD_IMPORT, // IMPORT: Include or Incbin or Incobj or Incsym
AD_CONST, // CONST: Prevent a label from mutating during assemble
AD_LABEL, // LABEL: Create a mutable label (optional)
AD_STRING, // STRING: Declare a string symbol
AD_UNDEF, // UNDEF: remove a string or a label
AD_INCSYM, // INCSYM: Reference labels from another assemble
AD_LABPOOL, // POOL: Create a pool of addresses to assign as labels dynamically
AD_IF, // #IF: Conditional assembly follows based on expression
@ -304,7 +306,8 @@ enum EvalOperator {
EVOP_STP, // u, Unexpected input, should stop and evaluate what we have
EVOP_NRY, // v, Not ready yet
EVOP_XRF, // w, value from XREF label
EVOP_ERR, // x, Error
EVOP_EXP, // x, sub expression
EVOP_ERR, // y, Error
};
// Opcode encoding
@ -940,6 +943,8 @@ DirectiveName aDirectiveNames[] {
{ "IMPORT", AD_IMPORT },
{ "CONST", AD_CONST },
{ "LABEL", AD_LABEL },
{ "STRING", AD_STRING },
{ "UNDEF", AD_UNDEF },
{ "INCSYM", AD_INCSYM },
{ "LABPOOL", AD_LABPOOL },
{ "POOL", AD_LABPOOL },
@ -1274,6 +1279,24 @@ public:
bool reference; // this label is accessed from external and can't be used for evaluation locally
} Label;
// String data
typedef struct {
public:
strref string_name; // name of the string
strref string_const; // string contents if source reference
strovl string_value; // string contents if modified, initialized to null string
StatusCode Append(strref append);
StatusCode ParseLine(strref line);
strref get() { return string_value.valid() ? string_value.get_strref() : string_const; }
void clear() { if (string_value.cap()) { free(string_value.charstr());
string_value.invalidate(); string_value.clear(); }
string_const.clear();
}
} StringSymbol;
// If an expression can't be evaluated immediately, this is required
// to reconstruct the result when it can be.
typedef struct {
@ -1408,6 +1431,7 @@ public:
class Asm {
public:
pairArray<unsigned int, Label> labels;
pairArray<unsigned int, StringSymbol> strings;
pairArray<unsigned int, Macro> macros;
pairArray<unsigned int, LabelPool> labelPools;
pairArray<unsigned int, LabelStruct> labelStructs;
@ -1528,7 +1552,7 @@ public:
EvalOperator RPNToken_Merlin(strref &expression, const struct EvalContext &etx,
EvalOperator prev_op, short &section, int &value);
EvalOperator RPNToken(strref &expression, const struct EvalContext &etx,
EvalOperator prev_op, short &section, int &value);
EvalOperator prev_op, short &section, int &value, strref &subexp);
StatusCode EvalExpression(strref expression, const struct EvalContext &etx, int &result);
void SetEvalCtxDefaults(struct EvalContext &etx);
int ReptCnt() const;
@ -1541,7 +1565,13 @@ public:
StatusCode AssignLabel(strref label, strref line, bool make_constant = false);
StatusCode AddressLabel(strref label);
void LabelAdded(Label *pLabel, bool local = false);
void IncludeSymbols(strref line);
StatusCode IncludeSymbols(strref line);
// Strings
StringSymbol *GetString(strref string_name);
StringSymbol *AddString(strref string_name, strref string_value);
StatusCode StringAction(StringSymbol *pStr, strref line);
StatusCode ParseStringOp(StringSymbol *pStr, strref line);
// Manage locals
void MarkLabelLocal(strref label, bool scope_label = false);
@ -1564,6 +1594,8 @@ public:
StatusCode ApplyDirective(AssemblerDirective dir, strref line, strref source_file);
StatusCode Directive_Rept(strref line, strref source_file);
StatusCode Directive_Macro(strref line, strref source_file);
StatusCode Directive_String(strref line);
StatusCode Directive_Undef(strref line);
StatusCode Directive_Include(strref line);
StatusCode Directive_Incbin(strref line, int skip=0, int len=0);
StatusCode Directive_Import(strref line);
@ -1630,6 +1662,12 @@ void Asm::Cleanup() {
labels.clear();
macros.clear();
allSections.clear();
for (unsigned int i = 0; i < strings.count(); ++i) {
StringSymbol &str = strings.getValue(i);
if (str.string_value.cap())
free(str.string_value.charstr());
}
strings.clear();
for (std::vector<ExtLabels>::iterator exti = externals.begin(); exti !=externals.end(); ++exti)
exti->labels.clear();
externals.clear();
@ -3111,7 +3149,7 @@ EvalOperator Asm::RPNToken_Merlin(strref &expression, const struct EvalContext &
}
// Get a single token from most non-apple II assemblers
EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOperator prev_op, short &section, int &value)
EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOperator prev_op, short &section, int &value, strref &subexp)
{
char c = exp.get_first();
switch (c) {
@ -3165,6 +3203,7 @@ EvalOperator Asm::RPNToken(strref &exp, const struct EvalContext &etx, EvalOpera
if (ret != STATUS_NOT_STRUCT) return EVOP_ERR; // partial struct
}
if (!pLabel && label.same_str("rept")) { value = etx.rept_cnt; return EVOP_VAL; }
if (!pLabel) { if (StringSymbol *pStr = GetString(label)) subexp = pStr->get(); return EVOP_EXP; }
if (!pLabel || !pLabel->evaluated) return EVOP_NRY; // this label could not be found (yet)
value = pLabel->value; section = pLabel->section; return pLabel->reference ? EVOP_XRF : EVOP_VAL;
}
@ -3200,11 +3239,16 @@ static int mul_as_shift(int scalar)
return scalar == 1 ? shift : 0;
}
#define MAX_EXPR_STACK 2
StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx, int &result)
{
int numValues = 0;
int numOps = 0;
strref expression_stack[MAX_EXPR_STACK];
int exp_sp = 0;
char ops[MAX_EVAL_OPER]; // RPN expression
int values[MAX_EVAL_VALUES]; // RPN values (in order of RPN EVOP_VAL operations)
short section_ids[MAX_EVAL_SECTIONS]; // local index of each referenced section
@ -3217,19 +3261,29 @@ StatusCode Asm::EvalExpression(strref expression, const struct EvalContext &etx,
char op_stack[MAX_EVAL_OPER];
EvalOperator prev_op = EVOP_NONE;
expression.trim_whitespace();
while (expression) {
while (expression || exp_sp) {
int value = 0;
short section = -1, index_section = -1;
EvalOperator op = EVOP_NONE;
if (syntax == SYNTAX_MERLIN)
strref subexp;
if (!expression && exp_sp) {
expression = expression_stack[--exp_sp];
op = EVOP_RPR;
} else if (syntax == SYNTAX_MERLIN)
op = RPNToken_Merlin(expression, etx, prev_op, section, value);
else
op = RPNToken(expression, etx, prev_op, section, value);
op = RPNToken(expression, etx, prev_op, section, value, subexp);
if (op == EVOP_ERR)
return ERROR_UNEXPECTED_CHARACTER_IN_EXPRESSION;
else if (op == EVOP_NRY)
return STATUS_NOT_READY;
else if (op == EVOP_XRF) {
else if (op == EVOP_EXP) {
if (exp_sp >= MAX_EXPR_STACK)
return ERROR_TOO_MANY_VALUES_IN_EXPRESSION;
expression_stack[exp_sp++] = expression;
expression = subexp;
op = EVOP_LPR;
} else if (op == EVOP_XRF) {
xrefd = true;
op = EVOP_VAL;
}
@ -4016,7 +4070,7 @@ StatusCode Asm::AddressLabel(strref label)
}
// include symbols listed from a .sym file or all if no listing
void Asm::IncludeSymbols(strref line)
StatusCode Asm::IncludeSymbols(strref line)
{
strref symlist = line.before('"').get_trimmed_ws();
line = line.between('"', '"');
@ -4044,10 +4098,143 @@ void Asm::IncludeSymbols(strref line)
}
}
loadedData.push_back(buffer);
}
} else
return ERROR_COULD_NOT_INCLUDE_FILE;
return STATUS_OK;
}
// Get a string record if it exists
StringSymbol *Asm::GetString(strref string_name)
{
unsigned int string_hash = string_name.fnv1a();
unsigned int index = FindLabelIndex(string_hash, strings.getKeys(), strings.count());
while (index < strings.count() && string_hash == strings.getKey(index)) {
if (string_name.same_str(strings.getValue(index).string_name))
return strings.getValues() + index;
index++;
}
return nullptr;
}
// Add or modify a string record
StringSymbol *Asm::AddString(strref string_name, strref string_value)
{
StringSymbol *pStr = GetString(string_name);
if (pStr==nullptr) {
unsigned int string_hash = string_name.fnv1a();
unsigned int index = FindLabelIndex(string_hash, strings.getKeys(), strings.count());
strings.insert(index, string_hash);
pStr = strings.getValues() + index;
pStr->string_name = string_name;
pStr->string_value.invalidate();
pStr->string_value.clear();
}
if (pStr->string_value.cap()) {
free(pStr->string_value.charstr());
pStr->string_value.invalidate();
pStr->string_value.clear();
}
pStr->string_const = string_value;
return pStr;
}
// append a string to another string
StatusCode StringSymbol::Append(strref append)
{
if (!append)
return STATUS_OK;
strl_t add_len = append.get_len();
if (!string_value.cap()) {
strl_t new_len = (add_len + 0xff)&(~(strl_t)0xff);
char *buf = (char*)malloc(new_len);
if (!buf)
return ERROR_OUT_OF_MEMORY;
string_value.set_overlay(buf, new_len);
string_value.copy(string_const);
} else if (string_value.cap() < (string_value.get_len() + add_len)) {
strl_t new_len = (string_value.get_len() + add_len + 0xff)&(~(strl_t)0xff);
char *buf = (char*)malloc(new_len);
if (!buf)
return ERROR_OUT_OF_MEMORY;
strovl ovl(buf, new_len);
ovl.copy(string_value.get_strref());
free(string_value.charstr());
string_value.set_overlay(buf, new_len);
}
string_const.clear();
string_value.append(append);
return STATUS_OK;
}
StatusCode Asm::ParseStringOp(StringSymbol *pStr, strref line)
{
line.skip_whitespace();
if (line[0] == '+')
++line;
for (;;) {
line.skip_whitespace();
if (line[0] == '"') {
strref substr = line.between('"', '"');
line += substr.get_len() + 2;
pStr->Append(substr);
} else {
strref label = line.split_range(syntax == SYNTAX_MERLIN ?
label_end_char_range_merlin : label_end_char_range);
if (StringSymbol *pStr2 = GetString(label))
pStr->Append(pStr2->get());
else if (Label *pLabel = GetLabel(label)) {
if (!pLabel->evaluated)
return ERROR_TARGET_ADDRESS_MUST_EVALUATE_IMMEDIATELY;
strown<32> lblstr;
lblstr.sprintf("$%x", pLabel->value);
pStr->Append(lblstr.get_strref());
} else
break;
}
line.skip_whitespace();
if (!line || line[0] != '+')
break;
++line;
line.skip_whitespace();
}
return STATUS_OK;
}
StatusCode Asm::StringAction(StringSymbol *pStr, strref line)
{
line.skip_whitespace();
if (line[0] == '+' && line[1] == '=') { // append strings
line += 2;
line.skip_whitespace();
return ParseStringOp(pStr, line);
} else if (line[0] == '=') {
++line;
line.skip_whitespace();
pStr->clear();
return ParseStringOp(pStr, line);
} else {
strref str = pStr->string_value.valid() ?
pStr->string_value.get_strref() : pStr->string_const;
if (!str)
return STATUS_OK;
char *macro = (char*)malloc(str.get_len());
strovl mac(macro, str.get_len());
mac.copy(str);
mac.replace("\\n", "\n");
loadedData.push_back(macro);
contextStack.push(contextStack.curr().source_name, mac.get_strref(), mac.get_strref());
if (scope_depth >= (MAX_SCOPE_DEPTH - 1))
return ERROR_TOO_DEEP_SCOPE;
else
scope_address[++scope_depth] = CurrSection().GetPC();
contextStack.curr().scoped_context = true;
return STATUS_OK;
}
return STATUS_OK;
}
//
//
@ -4251,6 +4438,62 @@ StatusCode Asm::Directive_Macro(strref line, strref source_file)
return STATUS_OK;
}
// string: create a symbolic string
StatusCode Asm::Directive_String(strref line)
{
line.skip_whitespace();
strref string_name = line.split_range_trim(word_char_range, line[0]=='.' ? 1 : 0);
if (line[0]=='=' || keyword_equ.is_prefix_word(line)) {
line.next_word_ws();
strref substr = line;
if (line[0] == '"') {
substr = line.between('"', '"');
line += substr.get_len() + 2;
StringSymbol *pStr = AddString(string_name, substr);
if (pStr == nullptr)
return ERROR_OUT_OF_MEMORY;
line.skip_whitespace();
if (line[0] == '+')
return ParseStringOp(pStr, line);
} else {
StringSymbol *pStr = AddString(string_name, strref());
return ParseStringOp(pStr, line);
}
} else {
if (!AddString(string_name, strref()))
return ERROR_OUT_OF_MEMORY;
}
return STATUS_OK;
}
StatusCode Asm::Directive_Undef(strref line)
{
strref name = line.split_range_trim(syntax == SYNTAX_MERLIN ? label_end_char_range_merlin : label_end_char_range);
unsigned int name_hash = name.fnv1a();
unsigned int index = FindLabelIndex(name_hash, labels.getKeys(), labels.count());
while (index < labels.count() && name_hash == labels.getKey(index)) {
if (name.same_str(labels.getValue(index).label_name)) {
labels.remove(index);
return STATUS_OK;
}
index++;
}
index = FindLabelIndex(name_hash, strings.getKeys(), strings.count());
while (index < strings.count() && name_hash == strings.getKey(index)) {
if (name.same_str(strings.getValue(index).string_name)) {
StringSymbol str = strings.getValue(index);
if (str.string_value.cap()) {
free(str.string_value.charstr());
str.string_value.invalidate();
}
strings.remove(index);
return STATUS_OK;
}
index++;
}
return STATUS_OK;
}
// include: read in a source file and assemble at this point
StatusCode Asm::Directive_Include(strref line)
{
@ -4355,10 +4598,16 @@ StatusCode Asm::Directive_Import(strref line)
line += import_text.get_len();
line.skip_whitespace();
strref text_type = "petscii";
if (line[0]!='"') {
text_type = line.get_word_ws();
line += text_type.get_len();
line.skip_whitespace();
while (line[0]!='"') {
strref word = line.get_word_ws();
if (word.same_str("petscii") || word.same_str("petscii_shifted")) {
text_type = line.get_word_ws();
line += text_type.get_len();
line.skip_whitespace();
} else if (StringSymbol *pStr = GetString(line.get_word_ws())) {
line = pStr->get();
break;
}
}
CurrSection().AddText(line, text_type);
return STATUS_OK;
@ -4369,8 +4618,7 @@ StatusCode Asm::Directive_Import(strref line)
} else if (import_symbols.is_prefix_word(line)) {
line += import_symbols.get_len();
line.skip_whitespace();
IncludeSymbols(line);
return STATUS_OK;
return IncludeSymbols(line);
}
return STATUS_OK;
@ -4570,20 +4818,44 @@ StatusCode Asm::Directive_EVAL(strref line)
line.trim_whitespace();
struct EvalContext etx;
SetEvalCtxDefaults(etx);
strref lab1 = line;
lab1 = lab1.split_token_any_trim(syntax == SYNTAX_MERLIN ? label_end_char_range_merlin : label_end_char_range);
StringSymbol *pStr = line.same_str_case(lab1) ? GetString(lab1) : nullptr;
if (line && EvalExpression(line, etx, value) == STATUS_OK) {
if (description) {
printf("EVAL(%d): " STRREF_FMT ": \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line), value);
if (pStr != nullptr) {
printf("EVAL(%d): " STRREF_FMT ": \"" STRREF_FMT "\" = \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line), STRREF_ARG(pStr->get()), value);
} else {
printf("EVAL(%d): " STRREF_FMT ": \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line), value);
}
} else {
printf("EVAL(%d): \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line), value);
if (pStr != nullptr) {
printf("EVAL(%d): \"" STRREF_FMT "\" = \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line), STRREF_ARG(pStr->get()), value);
} else {
printf("EVAL(%d): \"" STRREF_FMT "\" = $%x\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line), value);
}
}
} else if (description) {
printf("EVAL(%d): \"" STRREF_FMT ": " STRREF_FMT"\"\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line));
if (pStr != nullptr) {
printf("EVAL(%d): " STRREF_FMT ": \"" STRREF_FMT "\" = \"" STRREF_FMT "\"\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line), STRREF_ARG(pStr->get()));
} else {
printf("EVAL(%d): \"" STRREF_FMT ": " STRREF_FMT"\"\n",
contextStack.curr().source_file.count_lines(description) + 1, STRREF_ARG(description), STRREF_ARG(line));
}
} else {
printf("EVAL(%d): \"" STRREF_FMT "\"\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line));
if (pStr != nullptr) {
printf("EVAL(%d): \"" STRREF_FMT "\" = \"" STRREF_FMT "\"\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line), STRREF_ARG(pStr->get()));
} else {
printf("EVAL(%d): \"" STRREF_FMT "\"\n",
contextStack.curr().source_file.count_lines(line) + 1, STRREF_ARG(line));
}
}
return STATUS_OK;
}
@ -4774,8 +5046,20 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
break;
case AD_TEXT: { // text: add text within quotes
strref text_prefix = line.before('"').get_trimmed_ws();
line = line.between('"', '"');
strref text_prefix;
while (line[0] != '"') {
strref word = line.get_word_ws();
if (word.same_str("petscii") || word.same_str("petscii_shifted")) {
text_prefix = line.get_word_ws();
line += text_prefix.get_len();
line.skip_whitespace();
} else if (StringSymbol *pStr = GetString(line.get_word_ws())) {
line = pStr->get();
break;
}
}
if (line[0] == '"')
line = line.between('"', '"');
CurrSection().AddText(line, text_prefix);
break;
}
@ -4803,10 +5087,15 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
error = ERROR_UNEXPECTED_LABEL_ASSIGMENT_FORMAT;
break;
}
case AD_STRING:
return Directive_String(line);
case AD_UNDEF:
return Directive_Undef(line);
case AD_INCSYM:
IncludeSymbols(line);
break;
return IncludeSymbols(line);
case AD_LABPOOL: {
strref name = line.split_range_trim(word_char_range, line[0]=='.' ? 1 : 0);
@ -4831,7 +5120,8 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
CheckConditionalDepth(); // Check if nesting
bool conditional_result;
error = EvalStatement(line, conditional_result);
if (GetLabel(line.get_trimmed_ws()) != nullptr)
strref name = line.get_trimmed_ws();
if (GetLabel(name) != nullptr || GetString(name) != nullptr)
ConsumeConditional();
else
SetConditional();
@ -5432,7 +5722,10 @@ StatusCode Asm::BuildLine(strref line)
labPool++;
}
if (!gotConstruct) {
if (syntax==SYNTAX_MERLIN && strref::is_ws(line_start[0])) {
if (StringSymbol *pStr = GetString(label)) {
StringAction(pStr, line);
line.clear();
} else if (syntax==SYNTAX_MERLIN && strref::is_ws(line_start[0])) {
error = ERROR_UNDEFINED_CODE;
} else if (label[0]=='$' || strref::is_number(label[0]))
line.clear();

647
x65.txt
View File

@ -20,22 +20,25 @@ result.
Noteworthy features:
* Full expression evaluation everywhere values are used.
* Basic relative sections and linking in addition to fixed address.
* C style scoping within '{' and '}'
* Code with sections, object files and linking or single file fixed
address, or mix it up with fixed address sections in object files.
* Assembler listing with cycle counting for code review.
* Export multiple binaries with a single link operation.
* C style scoping within '{' and '}' with local and pool labels
respecting scopes.
* Conditional assembly with if/ifdef/else etc.
* Directives support both with and without leading period.
* Assembler directives representing a variety of features.
* Local labels can be defined in a number of ways, such as leading
period (.label) or leading at-sign (@label) or terminating
dollar sign (label$).
* Reassignment of symbols. This means there is no error if you declare
the same label twice, but on the other hand you can do things like
label = label + 2.
* String Symbols system allows building user expressions and macros
during assembly.
* Reassignment of symbols and labels by default.
* No indentation required for instructions, meaning that labels can't
be mnemonics, macros or directives.
* As far as achievable, support the syntax of other 6502 assemblers
(Merlin syntax now requires command line argument, -endm adds support
for sources using macro/endmacro and repeat/endrepeat combos rather
* Supporting the syntax of other 6502 assemblers (Merlin syntax
requires command line argument, -endm adds support for sources
using macro/endmacro and repeat/endrepeat combos rather
than scoeps).
* Apple II GS executable output.
@ -43,6 +46,37 @@ Noteworthy features:
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Contents
--------
License
Command line arguments
CPU options
Syntax
Targets
Listing Output
Expressions
Math expression symbols supported
PC expression symbols supported
Conditional operators
Conditional assembly
65816
Data
Macros
Strings
Structs and Enums
Symbols
Label Pool
Sections
Relocatable code and linking
Merlin
All Directives
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
License
-------
@ -53,20 +87,23 @@ The MIT License (MIT)
Copyright (c) 2015 Carl-Henrik Skårstedt
Permission is hereby granted, free of charge, to any person obtaining a copy of this software
and associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute,
sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
Details, source and documentation at https://github.com/Sakrac/x65.
@ -82,6 +119,7 @@ Document Updates
Nov 23 2015 - Initial pass of x65 documentation
Nov 24 2015 - More text
Nov 26 2015 - String directive and more text
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -90,34 +128,37 @@ Command line arguments
----------------------
Input, output and target options are set on the command line, many of
these options can be controlled with assembler directives in code as
well as the command line.
x65 source target [options]
Where "options" include
* -i(path) : Add include path
* -D(label)[=value] : Define a label with an optional value
(otherwise defined as 1)
* -cpu=6502/65c02/65c02wdc/65816: assemble with opcodes for a different cpu
* -acc=8/16: set the accumulator mode for 65816 at start, default is 8 bits
* -xy=8/16: set the index register mode for 65816 at start, default is 8 bits
* -org = $2000 or - org = 4096: set the default start address of
fixed address code
* -obj (file.x65) : generate object file for later linking
* -bin : Raw binary
* -c64 : Include load address (default)
* -a2b : Apple II Dos 3.3 Binary
* -a2p : Apple II ProDos Binary
* -a2o : Apple II GS OS executable (relocatable)
* -mrg : Force merge all sections (use with -a2o)
* -sym (file.sym) : symbol file
* -lst / -lst = (file.lst) : generate disassembly text from
result (file or stdout)
* -opcodes / -opcodes = (file.s) : dump all available opcodes(file or stdout)
* -sect: display sections loaded and built
* -vice (file.vs) : export a vice symbol file
* -merlin: use Merlin syntax
* -endm : macros end with endm or endmacro instead of scoped('{' - '}')
Options include:
* -i(path) : Add include path
* -D(label)[=value] : Define a label with an optional value
(otherwise defined as 1)
* -cpu=6502/65c02/65c02wdc/65816: assemble with opcodes for a different cpu
* -acc=8/16: set the accumulator mode for 65816 at start, default is 8 bits
* -xy=8/16: set the index register mode for 65816 at start, default is 8 bits
* -org = $2000 or - org = 4096: set the default start address of
fixed address code
* -obj (file.x65) : generate object file for later linking
* -bin : Raw binary
* -c64 : Include load address (default)
* -a2b : Apple II Dos 3.3 Binary
* -a2p : Apple II ProDos Binary
* -a2o : Apple II GS OS executable (relocatable)
* -mrg : Force merge all sections (use with -a2o)
* -sym (file.sym) : symbol file
* -lst / -lst = (file.lst) : generate disassembly text from
result (file or stdout)
* -opcodes / -opcodes = (file.s) : dump all available opcodes(file or stdout)
* -sect: display sections loaded and built
* -vice (file.vs) : export a vice symbol file
* -merlin: use Merlin syntax
* -endm : macros end with endm or endmacro instead of scoped('{' - '}')
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -152,11 +193,12 @@ Syntax
------
The syntax of x65 source is the result of trying to build code from a
variety of assemblers, including a number of open source games and old
personal code. The primary syntax inspiration is from Kick Assembler,
but also DASM, TASM and XASM. Most of the downloaded sample code was
written for Apple II where Merlin, Orca and Lisa were referenced.
The syntax of x65 source is the result of trying to build code originally
created for a variety of assemblers, including a number of open source
games and old personal code. The primary syntax inspiration is from
Kick Assembler, but also DASM, TASM and XASM. Most of the downloaded
sample code was written for Apple II where Merlin, Orca and Lisa were
referenced.
Note that Merlin syntax requires the -merlin command line option.
@ -218,26 +260,25 @@ generate a .x65 object file. More information about object files in Sections.
Command line options for target output:
* -org = $2000: set the default start address of fixed address code,
default is $1000
* -obj (file.x65): generate object file for later linking
* -bin : Raw binary
* -c64 : Include load address (default)
* -a2b : Apple II Dos 3.3 Binary (load address + file size)
* -a2p : Apple II ProDos Binary (set org to $2000 otherwise binary)
* -a2o : Apple II GS OS executable (relocatable)
* -mrg : Force merge all sections (use with -a2o)
* -org = $2000: set the default start address of fixed address code,
default is $1000
* -obj (file.x65): generate object file for later linking
* -bin : Raw binary
* -c64 : Include load address (default)
* -a2b : Apple II Dos 3.3 Binary (load address + file size)
* -a2p : Apple II ProDos Binary (set org to $2000 otherwise binary)
* -a2o : Apple II GS OS executable (relocatable)
* -mrg : Force merge all sections (use with -a2o)
The -mrg option will combine all segments into one to allow for 16 bit
addressing to reach data in other segments, but will limit the size to fit
into a 64 k bank.
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
List Output
Listing Output
-----------
@ -309,6 +350,8 @@ the order of operations is based on C like precedence. Internally the
expression is converted to reverse polish notation to make it easier to
keep track of complex expressions.
Values in expressions can be labels, symbols, strings (added as an
expression within parenthesis) or raw decimal, binary or hexadecimal numbers.
Math expression symbols supported:
@ -420,6 +463,20 @@ non-assembling block of source.
* IFDEF - conditionals, start a block of conditional assembly if a symbol or
label exists at this point
Example:
if 0
this part of the source will not assemble,
however a line can not start with a conditional
assembler directive such as if, ifdef, else, elseif
or endif within a block that does not assemble
unless followed by a valid expression
else
; this part of the source will assemble
lda #0
rts
endif
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -428,9 +485,11 @@ non-assembling block of source.
-----
65816 is large expansion of 6502 and requires the assembler to be aware of
65816 is major expansion of 6502 and requires the assembler to be aware of
what processor flags the user has set to select instructions.
use -cpu=65816 on command line or CPU 65816 in source to set.
* A16 - 65816, set accumulator immediate operators to 16 bit mode
* A8 - 65816, set accumulator immediate operators to 8 bit mode
* I16 - 65816, set index register immediate operators to 16 bit mode,
@ -442,6 +501,8 @@ what processor flags the user has set to select instructions.
* XY8 - 65816, set index register immediate operators to 8 bit mode,
same as I8
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -470,6 +531,17 @@ declares a repeating value.
* WORD - data, insert comma separated 16 bit values, same as WORDS
* WORDS - data, insert comma seperated 16 bit values, same as WORD
Example:
ONE_824 = 1<<24 ; 1 as a 8.24 number
CosInvPermute: ; 1 +
long -(ONE_824 + 1)/(2) ; x^2 * this
long (ONE_824 + 3*4)/(2*3*4) ; x^4 * this
long -(ONE_824 + 3*4*5*6)/(2*3*4*5*6) ; x^6 * this
long -(ONE_824 + 3*4*5*6*7*8)/(2*3*4*5*6*7*8) ; x^8 * this
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -493,129 +565,101 @@ The parenthesis are optional both for the macro declaration and for the
macro instantiation so macros can be used as if they were instructions
MACRO neg address {
sec
lda #0
sbc source
sta source
sec
lda #0
sbc source
sta source
}
MACRO nega {
eor #$ff
sec
adc #0
}
MACRO nega {
eor #$ff
sec
adc #0
}
Now 'neg' and 'nega' can be used as if it was an instruction:
neg $7f80 ; negate byte at this hard coded address for some reason
lda #$6c
nega ; negate accumulator
neg $7f80 ; negate byte at this hard coded address for some reason
lda #$6c
nega ; negate accumulator
In order to support code written for other assemblers the -endm command line
option changes the syntax for macro declarations to start on the line after
MACRO and end before the line starting with ENDM or ENDMACRO:
MACRO inca
sec
adc #0
ENDMACRO
MACRO inca
sec
adc #0
ENDMACRO
Directives for macros:
* MACRO - macros, start a macro declaration
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Strings
-------
Strings are special symbols that contain text and was included in an
effort to support ORCA macros. The difference with ORCA and other
assemblers is that the macros can build up string symbols (along with
value symbols) and combine results into a more powerful macro system.
x65 now supports the same mechanism but not the same exact keywords.
Strings can be created and passed in as a value symbol in expressions
or used directly as a macro (without parameters).
Strings are defined using the STRING directive followed by the string
name and an equal sign followed by a string expression.
Strings can include value symbols which will be evaluated and represented
by $ + the hexadecimal representation of the value.
Example:
STRING exp = "1 + 2 + 3"
EVAL exp
result (output):
EVAL(2): "exp" = "1 + 2 + 3" = $6
Example:
STRING code_str = "lda #0\nsta $fe"
code_str
result (code):
lda #0
sta $fe
Example:
STRING concat_example = "ldx #0"
concat_example +=
Directives for String Symbols
* STRING - declare a string symbol
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Structs and Enums
-----------------
* ENUM - structs and enums, declare enumerations like C
* STRUCT - structs and enums, declare a C-like structure of symbols
separated by dots
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
#Sections
x65 supports linking of fully assembled object files into a single
larger project. This is a fairly standard feature of compilers but
supporting both common 68000 linking style and Apple II Merlin style
means that x65 is not quite as straightforward.
The purpose of a linked project is to work in multiple source files
without worrying about where in memory each file gets compiled to.
In addition sections of code and data in a single file can be linked
to different target locations. Each source file gets assembled to an
object file (.x65) and all the internal and external references are
stored separately from the binary code to be fixed up later.
The last step of a linked project is to load all object files and
generate one or more exported programs. A special source file uses
the INCOBJ directive to bring in object files one by one and piled up
by using the LINK [segment name] at a fixed address.
The SECTION directive starts a block of code or data to be linked
later. By default x65 creates a section named "default" which can
be used for linking as is but is intended to be replaced.
In order to export labels from a source file it should be declared
with XDEF prior to being defined:
XDEF Function
SECTION Code
Function:
lda #1
rts
To reference an exported label from a different file use XREF
XREF Function
SECTION Code
Code:
jsr Function
rts
To link object files (.x65) into an executable the assembled
objects need to be combined into a single source using INCOBJ
INCOBJ "Code.x65"
INCOBJ "Routines.x65"
The result will put the first included code section OR the first code
section declared in the link file.
The link file can export multiple binary executable files by using
the EXPORT directive
SECTION CodeOther, Code
EXPORT other
Code in the CodeOther section will be built as (binary)_other.(ext)
By linking multiple targets at once files can reference labels
between eachother.
* DUMMY - sections, start a dummy section (defines addresses but does not
generate data, same as Merlin DUM)
* DUMMY_END - sections, end a dummy section (same as Merlin DEND)
* EXPORT - sections, this section will link or save to a separate binary file
with the argument appended to the link or binary filename.
* IMPORT - data and sections, load a file and include it in the assembly based
on the argument
* INCOBJ - sections, load an object file (.x65) of previously assembled source
* LINK - sections, links a section to the current section
* SECTION - section, declare a section; Comma separated arguments are name,
type, align where type is Code, Data, BSS or Zeropage
* SEG - section, same as SECTION
* SEGMENT - section, same as SECTION
* XDEF - sections, declare a label as external which can be referenced in
other source files by using XREF
* XREF - sections, reference a label that has been declared as global in
another file by using XDEF
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
@ -716,11 +760,137 @@ The following extensions are recognized:
* [pool name] var.l (4 bytes)
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Sections
--------
x65 supports linking of fully assembled object files into a single
larger project. This is a fairly standard feature of compilers but
supporting both common 68000 linking style and Apple II Merlin style
means that x65 is not quite as straightforward.
The purpose of a linked project is to work in multiple source files
without worrying about where in memory each file gets compiled to.
In addition sections of code and data in a single file can be linked
to different target locations. Each source file gets assembled to an
object file (.x65) and all the internal and external references are
stored separately from the binary code to be fixed up later.
The last step of a linked project is to load all object files and
generate one or more exported programs. A special source file uses
the INCOBJ directive to bring in object files one by one and piled up
by using the LINK [segment name] at a fixed address.
The SECTION directive starts a block of code or data to be linked
later. By default x65 creates a section named "default" which can
be used for linking as is but is intended to be replaced.
In order to export labels from a source file it should be declared
with XDEF prior to being defined:
XDEF Function
SECTION Code
Function:
lda #1
rts
To reference an exported label from a different file use XREF
XREF Function
SECTION Code
Code:
jsr Function
rts
To link object files (.x65) into an executable the assembled
objects need to be combined into a single source using INCOBJ
INCOBJ "Code.x65"
INCOBJ "Routines.x65"
The result will put the first included code section OR the first code
section declared in the link file.
The link file can export multiple binary executable files by using
the EXPORT directive
SECTION CodeOther, Code
EXPORT other
Code in the CodeOther section will be built as (binary)_other.(ext)
By linking multiple targets at once files can reference labels
between eachother.
Sections can be named anything and still be assigned a section type:
section Gameplay, Code ; code section named Gameplay, unaligned
...
section GameBinary, Data, $100 ; data section named GameBinary, aligned
...
section Work, Zeropage ; Zeropage or Direct page section
...
section FixedZP, Zeropage
org $a0 ; Make zero page section as a fixed address
Section types include:
* Code: binary code
* Data: binary data
* BSS: uninitialized memory (for certain targets filled with zeroes)
* Zeropage: uninitialized memory restricted to the range $00 - $ff
Additional section directive styles include:
SEG segname
SEG.U segname
SEGMENT "segname": segtype
.SEGMENT "segname"
For creating relocatable files (OMF) certain sections can not be fixed address.
Special sections for Apple II GS executables:
Sections named DirectPage_Stack and of a BSS type (default) determine the size of the direct page + stack for the executable. If multiple sections match this rule the size will be the sum of all the sections with this name.
Zeropage sections will be linked to a fixed address (default at the highest direct page addresses) prior to exporting the relocatable code. Zeropage sections in x65 is intended to allocate ranges of the zero page / direct page which is a bit confusing with OMF that has the concept of the direct page + stack segment.
Directives related to sections:
* DUMMY - sections, start a dummy section (defines addresses but does not
generate data, same as Merlin DUM)
* DUMMY_END - sections, end a dummy section (same as Merlin DEND)
* EXPORT - sections, this section will link or save to a separate binary file
with the argument appended to the link or binary filename.
* IMPORT - data and sections, load a file and include it in the assembly based
on the argument
* INCOBJ - sections, load an object file (.x65) of previously assembled source
* LINK - sections, links a section to the current section
* SECTION - section, declare a section; Comma separated arguments are name,
type, align where type is Code, Data, BSS or Zeropage
* SEG - section, same as SECTION
* SEGMENT - section, same as SECTION
* XDEF - sections, declare a label as external which can be referenced in
other source files by using XREF
* XREF - sections, reference a label that has been declared as global in
another file by using XDEF
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Relocatable code and linking
----------------------------
A lot of 6502 code has been built with fixed address assemblers. While
supporting fixed address assembling, x65 is built around generating relocatable
@ -734,6 +904,155 @@ Apple II GS uses a relocatable binary format that can be exported, other
targets link to a fixed address during the linking stage.
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-
Merlin
------
x65 can compile most Merlin syntax code with the -merlin command line
option.
A variety of directives and label rules to support Merlin assembler
sources. Merlin syntax is supported in x65 since there is historic
relevance and readily available publicly release source.
Merlin Label Syntax
]label means mutable address label, also does not seem to invalidate
local labels.
:label is perfectly valid, currently treating as a local variable
labels can include '?'
Merlin labels are not allowed to include '.' as period means logical
or in merlin, which also means that enums and structs are not
supported when assembling with merlin syntax.
Merlin expressions
Merlin may not process expressions (probably left to right, parenthesis
not allowed) the same as x65 but given that it wouldn't be intuitive
to read the code that way, there are probably very few cases where this
would be an issue.
Merlin additional directives
XC
Change processor. The first instance of XC will switch from 6502 to
65C02, the second switches from 65C02 to 65816. To return to 6502 use
XC OFF. To go directly to 65816 XC XC is supported.
MX
MX sets the immediate mode accumulator instruction size, it takes a
number and uses the lowest two bits. Bit 0 applies to index registers
(x, y) where 0 means 16 bits and 1 means 8 bits, bit 1 applies to the
accumulator. Normally it is specified in binary using the '%' prefix.
MX %11
LUP
LUP is Merlingo for loop. The lines following the LUP directive to
the keyword --^ are repeated the number of times that follows LUP.
MAC
MAC is short for Macro. Merlin macros are defined on line inbetween
MAC and <<< or EOM. Macro arguments are listed on the same line as
MAC and the macro identifier is the label preceeding the MAC directive
on the same line.
EJECT
An old assembler directive that does not affect the assembler but if
printed would insert a page break at that point.
DS
Define section, followed by a number of bytes. If number is positive
insert this amount of 0 bytes, if negative, reduce the current PC.
DUM, DEND
Dummy section, this will not write any opcodes or data to the binary
output but all code and data will increment the PC addres up to the
point of DEND.
PUT
A variation of INCLUDE that applies an oddball set of filename
rules. These rules apply to INCLUDE as well just in case they
make sense.
USR
In Merlin USR calls a function at a fixed address in memory, x65
safely avoids this. If there is a requirement for a user defined
macro you've got the source code to do it in.
SAV
SAV causes Merlin to save the result it has generated so far,
which is somewhat similar to the [EXPORT](#export) directive.
If the SAV name is different than the source name the section
will have a different EXPORT name appended and exported to a
separate binary file.
DSK
DSK is similar to SAV
ENT
ENT defines the label that preceeds it as external, same as XDEF.
EXT
EXT imports an external label, same as XREF.
LNK, STR
LNK links the contents of an object file, to fit with the named section
method of linking in x65 this keyword has been reworked to have a
similar result, the actual linking doesn't begin until the current
section is complete.
CYC
CYC starts and stops a cycle counter, x65 scoping allows for hierarchical
cycle listings but the first merlin directive CYC starts the counter and
the next CYC stops the counter and shows the result. This is 6502 only
until data is entered for other CPUs.
ADR
Define byte triplets (like DA but three bytes instead of 2)
ADRL
Define values of four bytes.
-0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0--0-