1
0
mirror of https://github.com/ksherlock/x65.git synced 2024-06-11 01:29:26 +00:00

Merlin syntax support, symbol file fix, list file fix

- don't allow ! in merlin labels
- don't skip initial : in merlin labels
- symbol files resolves labels from incobj relative sections
- assembler listing skipping disassembly for lines that was not built
from source code
This commit is contained in:
Carl-Henrik Skårstedt 2015-10-19 22:44:02 -07:00
parent d6bc17414a
commit 521e3b98a8
2 changed files with 107 additions and 60 deletions

View File

@ -598,12 +598,15 @@ FindFirstSpace
Currently the assembler is in an early revision and while features are tested individually it is fairly certain that untested combinations of features will indicate flaws and certain features are not in a complete state.
**TODO**
* 65c02
* 65816
* Macro parameters should replace only whole words instead of any substring
* Add 'import' directive as a catch-all include/incbin/etc. alternative
* irp (indefinite repeat)
* boolean operators (==, <, >, etc.) for better conditional expressions
**FIXED**
* Merlin syntax fixes (no '!' in labels, don't skip ':' if first character of label), symbol file fix for included object files with resolved labels for relative sections. List output won't disassemble lines that wasn't built from source code.
* Export full memory of fixed sections instead of a single section
* Option to source disasm output and option to dump all opcodes as a source file for tests
* Object file format so sections can be saved for later linking

164
x65.cpp
View File

@ -262,21 +262,24 @@ typedef struct {
} OP_ID;
enum AddrMode {
AMB_ZP_REL_X, // address mode bit index
AMB_ZP,
AMB_IMM,
AMB_ABS,
AMB_ZP_Y_REL,
AMB_ZP_X,
AMB_ABS_Y,
AMB_ABS_X,
AMB_REL,
AMB_ACC,
AMB_NON,
AMB_ZP_REL_X, // 0 ($12,x) address mode bit index
AMB_ZP, // 1 $12
AMB_IMM, // 2 #$12
AMB_ABS, // 3 $1234
AMB_ZP_Y_REL, // 4 ($12),y
AMB_ZP_X, // 5 $12,x
AMB_ABS_Y, // 6 $1234,y
AMB_ABS_X, // 7 $1234,x
AMB_REL, // 8 ($1234)
AMB_ACC, // 9 A
AMB_NON, // a
AMB_ZP_REL, // b ($12)
AMB_REL_X, // c ($1234,x)
AMB_ZP_ABS, // d $12, *+$12
AMB_COUNT,
AMB_FLIPXY = AMB_COUNT,
AMB_BRANCH,
AMB_FLIPXY = AMB_COUNT, // e
AMB_BRANCH, // f
// address mode masks
AMM_NON = 1<<AMB_NON,
AMM_IMM = 1<<AMB_IMM,
@ -309,7 +312,7 @@ enum AddrMode {
struct mnem {
const char *instr;
unsigned short modes;
unsigned int modes;
unsigned char aCodes[AMB_COUNT];
};
@ -376,6 +379,14 @@ struct mnem opcodes_6502[] = {
static const int num_opcodes_6502 = sizeof(opcodes_6502) / sizeof(opcodes_6502[0]);
// 65C02
// http://6502.org/tutorials/65c02opcodes.html
// http://www.oxyron.de/html/opcodesc02.html
// 65816
// http://softpixel.com/~cwright/sianse/docs/65816NFO.HTM
// How instruction argument is encoded
enum CODE_ARG {
CA_NONE, // single byte instruction
@ -388,7 +399,7 @@ enum CODE_ARG {
static const strref c_comment("//");
static const strref word_char_range("!0-9a-zA-Z_@$!#");
static const strref label_end_char_range("!0-9a-zA-Z_@$!.");
static const strref label_end_char_range_merlin("!0-9a-zA-Z_@$!]:?");
static const strref label_end_char_range_merlin("!0-9a-zA-Z_@$]:?");
static const strref filename_end_char_range("!0-9a-zA-Z_!@#$%&()/\\-.");
static const strref keyword_equ("equ");
static const strref str_label("label");
@ -538,11 +549,12 @@ typedef std::vector<struct Reloc> relocList;
// For assembly listing this remembers the location of each line
struct ListLine {
strref source_name; // source file index name
strref code; // line of code this represents
int address; // start address of this line
int size; // number of bytes generated for this line
int line_offs; // offset into code
strref source_name; // source file index name
strref code; // line of code this represents
bool was_mnemonic; // only output code if generated by code
};
typedef std::vector<struct ListLine> Listing;
@ -577,7 +589,7 @@ typedef struct Section {
bool address_assigned; // address is absolute if assigned
bool dummySection; // true if section does not generate data, only labels
void reset() {
void reset() { // explicitly cleaning up sections, not called from Section destructor
name.clear(); export_append.clear();
start_address = address = load_address = 0x0;
address_assigned = false; output = nullptr; curr = nullptr;
@ -612,7 +624,7 @@ typedef struct Section {
address_assigned = true; }
Section(strref _name) : pRelocs(nullptr), pListing(nullptr) { reset(); name = _name;
start_address = load_address = address = 0; address_assigned = false; }
~Section() { reset(); }
~Section() { }
// Appending data to a section
void CheckOutputCapacity(unsigned int addSize);
@ -627,8 +639,8 @@ typedef struct Section {
struct MapSymbol {
strref name; // string name
short value;
short section;
bool local; // local variables
bool resolved;
};
typedef std::vector<struct MapSymbol> MapSymbolArray;
@ -793,6 +805,7 @@ public:
// Conditional assembly vars
int conditional_depth;
strref conditional_source[MAX_CONDITIONAL_DEPTH]; // start of conditional for error fixing
char conditional_nesting[MAX_CONDITIONAL_DEPTH];
bool conditional_consumed[MAX_CONDITIONAL_DEPTH];
@ -1097,7 +1110,6 @@ unsigned char* Asm::BuildExport(strref append, int &file_size, int &addr)
bool has_relative_section = false;
bool has_fixed_section = false;
int first_relative_section = -1;
int last_fixed_section = -1;
// find address range
@ -1195,7 +1207,7 @@ void Asm::LinkLabelsToAddress(int section_id, int section_address)
if (pLabels->mapIndex>=0 && pLabels->mapIndex<(int)map.size()) {
struct MapSymbol &msym = map[pLabels->mapIndex];
msym.value = pLabels->value;
msym.resolved = true;
msym.section = -1;
}
CheckLateEval(pLabels->label_name);
}
@ -2178,13 +2190,10 @@ void Asm::LabelAdded(Label *pLabel, bool local)
map.reserve(map.size() + 256);
MapSymbol sym;
sym.name = pLabel->label_name;
sym.resolved = pLabel->section < 0;
sym.section = pLabel->section;
sym.value = pLabel->value;
sym.local = local;
if (!sym.resolved)
pLabel->mapIndex = (int)map.size();
else
pLabel->mapIndex = -1;
pLabel->mapIndex = -1;
map.push_back(sym);
}
}
@ -2557,6 +2566,7 @@ void Asm::CloseConditional() {
void Asm::CheckConditionalDepth() {
if (conditional_consumed[conditional_depth]) {
conditional_depth++;
conditional_source[conditional_depth] = contextStack.curr().read_source.get_line();
conditional_consumed[conditional_depth] = false;
conditional_nesting[conditional_depth] = 0;
}
@ -2565,12 +2575,14 @@ void Asm::CheckConditionalDepth() {
// This conditional block is going to be assembled, mark it as consumed
void Asm::ConsumeConditional()
{
conditional_source[conditional_depth] = contextStack.curr().read_source.get_line();
conditional_consumed[conditional_depth] = true;
}
// This conditional block is not going to be assembled so mark that it is nesting
void Asm::SetConditional()
{
conditional_source[conditional_depth] = contextStack.curr().read_source.get_line();
conditional_nesting[conditional_depth] = 1;
}
@ -2715,6 +2727,7 @@ DirectiveName aDirectiveNames[] {
{ "ENUM", AD_ENUM },
{ "REPT", AD_REPT },
{ "INCDIR", AD_INCDIR },
{ "DO", AD_IF }, // MERLIN
{ "DA", AD_WORDS }, // MERLIN
{ "DW", AD_WORDS }, // MERLIN
{ "ASC", AD_TEXT }, // MERLIN
@ -2872,8 +2885,12 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
break;
}
case AD_BYTES: // bytes: add bytes by comma separated values/expressions
if (syntax==SYNTAX_MERLIN && line.get_first()=='#') // MERLIN allows for an immediate declaration on data
++line;
while (strref exp = line.split_token_trim(',')) {
int value;
if (syntax==SYNTAX_MERLIN && exp.get_first()=='#') // MERLIN allows for an immediate declaration on data
++exp;
error = EvalExpression(exp, etx, value);
if (error>STATUS_NOT_READY)
break;
@ -2889,6 +2906,8 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
while (strref exp_w = line.split_token_trim(',')) {
int value = 0;
if (!CurrSection().IsDummySection()) {
if (syntax==SYNTAX_MERLIN && exp_w.get_first()=='#') // MERLIN allows for an immediate declaration on data
++exp_w;
error = EvalExpression(exp_w, etx, value);
if (error>STATUS_NOT_READY)
break;
@ -2917,6 +2936,8 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
while (strref exp_dc = line.split_token_trim(',')) {
int value = 0;
if (!CurrSection().IsDummySection()) {
if (syntax==SYNTAX_MERLIN && exp_dc.get_first()=='#') // MERLIN allows for an immediate declaration on data
++exp_dc;
error = EvalExpression(exp_dc, etx, value);
if (error > STATUS_NOT_READY)
break;
@ -3190,8 +3211,11 @@ StatusCode Asm::ApplyDirective(AssemblerDirective dir, strref line, strref sourc
DummySection();
break;
case AD_DUMMY_END:
while (CurrSection().IsDummySection())
while (CurrSection().IsDummySection()) {
EndSection();
if (SectionId()==0)
break;
}
break;
case AD_DS: {
int value;
@ -3440,6 +3464,7 @@ StatusCode Asm::BuildLine(OP_ID *pInstr, int numInstructions, strref line)
int start_section = SectionId();
int start_address = CurrSection().address;
strref code_line = line;
bool built_opcode = false;
while (line && error == STATUS_OK) {
strref line_start = line;
char char0 = line[0]; // first char including white space
@ -3496,7 +3521,7 @@ StatusCode Asm::BuildLine(OP_ID *pInstr, int numInstructions, strref line)
} else {
// ignore leading period for instructions and directives - not for labels
strref label = operation;
if (operation[0]==':' || operation[0]=='.')
if ((syntax != SYNTAX_MERLIN && operation[0]==':') || operation[0]=='.')
++operation;
operation = operation.before_or_full('.');
@ -3508,8 +3533,10 @@ StatusCode Asm::BuildLine(OP_ID *pInstr, int numInstructions, strref line)
line.skip_whitespace();
}
error = ApplyDirective((AssemblerDirective)pInstr[op_idx].index, line, contextStack.curr().source_file);
} else if (ConditionalAsm() && pInstr[op_idx].type==OT_MNEMONIC)
} else if (ConditionalAsm() && pInstr[op_idx].type == OT_MNEMONIC) {
error = AddOpcode(line, pInstr[op_idx].index, contextStack.curr().source_file);
built_opcode = true;
}
line.clear();
} else if (!ConditionalAsm()) {
line.clear(); // do nothing if conditional nesting so clear the current line
@ -3567,7 +3594,14 @@ StatusCode Asm::BuildLine(OP_ID *pInstr, int numInstructions, strref line)
// Check for unterminated condition in source
if (!contextStack.curr().next_source &&
(!ConditionalAsm() || ConditionalConsumed() || conditional_depth)) {
error = ERROR_UNTERMINATED_CONDITION;
if (syntax == SYNTAX_MERLIN) { // this isn't a listed feature,
conditional_nesting[0] = 0; // some files just seem to get away without closing
conditional_consumed[0] = 0;
conditional_depth = 0;
} else {
PrintError(conditional_source[conditional_depth], error);
return ERROR_UNTERMINATED_CONDITION;
}
}
if (line.same_str_case(line_start))
@ -3588,23 +3622,16 @@ StatusCode Asm::BuildLine(OP_ID *pInstr, int numInstructions, strref line)
if (curr.pListing && curr.pListing->size() == curr.pListing->capacity())
curr.pListing->reserve(curr.pListing->size() + 256);
if (SectionId() == start_section) {
struct ListLine lst;
lst.address = start_address - curr.start_address;
lst.size = curr.address - start_address;
lst.code = contextStack.curr().source_file;
lst.source_name = contextStack.curr().source_name;
lst.line_offs = int(code_line.get() - lst.code.get());
if (lst.size && curr.size())
curr.pListing->push_back(lst);
} else {
struct ListLine lst;
lst.address = 0;
lst.size = curr.address - curr.start_address;
lst.code = contextStack.curr().source_file;
lst.source_name = contextStack.curr().source_name;
lst.line_offs = int(code_line.get() - lst.code.get());
if (lst.size && curr.size())
if (curr.address != start_address && curr.size() && !curr.IsDummySection()) {
struct ListLine lst;
lst.address = start_address - curr.start_address;
lst.size = curr.address - start_address;
lst.code = contextStack.curr().source_file;
lst.source_name = contextStack.curr().source_name;
lst.line_offs = int(code_line.get() - lst.code.get());
lst.was_mnemonic = built_opcode;
curr.pListing->push_back(lst);
}
}
}
return error;
@ -3695,7 +3722,7 @@ bool Asm::List(strref filename)
out.sprintf_append("%02x ", si->output[lst.address + b]);
}
out.append_to(' ', 18);
if (lst.size) {
if (lst.size && lst.was_mnemonic) {
unsigned char *buf = si->output + lst.address;
unsigned char op = mnemonic[*buf];
unsigned char am = addrmode[*buf];
@ -3887,8 +3914,8 @@ struct ObjFileLateEval {
struct ObjFileMapSymbol {
struct ObjFileStr name; // symbol name
int value;
short section;
bool local; // local labels are probably needed
bool resolved; // set if in a relative section, when resolved label eval should clear this..
};
// Simple string pool, converts strref strings to zero terminated strings and returns the offset to the string in the pool.
@ -4042,7 +4069,7 @@ StatusCode Asm::WriteObjectFile(strref filename)
ms.name.offs = _AddStrPool(mi->name, &stringArray, &stringPool, hdr.stringdata, stringPoolCap);
ms.value = mi->value;
ms.local = mi->local;
ms.resolved = mi->resolved;
ms.section = mi->section;
}
}
@ -4153,7 +4180,7 @@ StatusCode Asm::ReadObjectFile(strref filename)
map.reserve(map.size() + 256);
MapSymbol sym;
sym.name = m.name.offs>=0 ? strref(str_pool + m.name.offs) : strref();
sym.resolved = m.resolved;
sym.section = m.section >=0 ? aSctRmp[m.section] : m.section;
sym.value = m.value;
sym.local = m.local;
map.push_back(sym);
@ -4310,7 +4337,7 @@ int main(int argc, char **argv)
// if source_filename contains a path add that as a search path for include files
assembler.AddIncludeFolder(srcname.before_last('/', '\\'));
assembler.Assemble(strref(buffer, strl_t(size)), strref(argv[1]), obj_out_file != nullptr);
assembler.Assemble(strref(buffer, strl_t(size)), srcname, obj_out_file != nullptr);
if (assembler.errorEncountered)
return_value = 1;
@ -4379,12 +4406,20 @@ int main(int argc, char **argv)
bool wasLocal = false;
for (MapSymbolArray::iterator i = assembler.map.begin(); i!=assembler.map.end(); ++i) {
unsigned int value = (unsigned int)i->value;
if (i->resolved) {
fprintf(f, "%s.label " STRREF_FMT " = $%04x",
wasLocal==i->local ? "\n" : (i->local ? " {\n" : "\n}\n"),
STRREF_ARG(i->name), value);
wasLocal = i->local;
int section = i->section;
while (section >= 0 && section < (int)assembler.allSections.size()) {
if (assembler.allSections[section].IsMergedSection()) {
value += assembler.allSections[section].merged_offset;
section = assembler.allSections[section].merged_section;
} else {
value += assembler.allSections[section].start_address;
break;
}
}
fprintf(f, "%s.label " STRREF_FMT " = $%04x",
wasLocal==i->local ? "\n" : (i->local ? " {\n" : "\n}\n"),
STRREF_ARG(i->name), value);
wasLocal = i->local;
}
fputs(wasLocal ? "\n}\n" : "\n", f);
fclose(f);
@ -4396,11 +4431,20 @@ int main(int argc, char **argv)
if (FILE *f = fopen(vs_file, "w")) {
for (MapSymbolArray::iterator i = assembler.map.begin(); i!=assembler.map.end(); ++i) {
unsigned int value = (unsigned int)i->value;
if (i->resolved) {
fprintf(f, "al $%04x %s" STRREF_FMT "\n",
int section = i->section;
while (section >= 0 && section < (int)assembler.allSections.size()) {
if (assembler.allSections[section].IsMergedSection()) {
value += assembler.allSections[section].merged_offset;
section = assembler.allSections[section].merged_section;
}
else {
value += assembler.allSections[section].start_address;
break;
}
}
fprintf(f, "al $%04x %s" STRREF_FMT "\n",
value, i->name[0]=='.' ? "" : ".",
STRREF_ARG(i->name));
}
}
fclose(f);
}