From 72b81efc87303cb3191e39f6ebfc2ea0a30951b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl-Henrik=20Sk=C3=A5rstedt?= <cskarstedt@gmail.com> Date: Mon, 30 Nov 2015 23:03:51 -0800 Subject: [PATCH] Local labels attempted in disassembler --- disassembler/README.MD | 7 +++ disassembler/x65dsasm.cpp | 128 +++++++++++++++++++++++++++++--------- 2 files changed, 105 insertions(+), 30 deletions(-) diff --git a/disassembler/README.MD b/disassembler/README.MD index 56d378d..b832adf 100644 --- a/disassembler/README.MD +++ b/disassembler/README.MD @@ -24,6 +24,13 @@ x65dsasm binary disasm.txt [$skip[-$end]] [addr=$xxxx] [cpu=6502/65C02/65816] * mx: set the mx flags which control accumulator and index register size * labels: import labels from a file (each line: label=$xxxx [code]/[data] comment) +### Updates + +* **Local labels** to improve code readability +* Improvements to code vs data determination +* Instrument labels through labels text file +* **src** option attempts to generate valid assembler source + ### Labels file format Labels is a text file with one label declaration per line followed by the address to assign and the diff --git a/disassembler/x65dsasm.cpp b/disassembler/x65dsasm.cpp index 0ac87e0..f6cb94b 100644 --- a/disassembler/x65dsasm.cpp +++ b/disassembler/x65dsasm.cpp @@ -961,15 +961,18 @@ struct RefLink { }; struct RefAddr { - int address; // address - int data:3; // 1 if data, 0 if code, 2 if pointers - int size:29; // user specified size + int address:29; // address + int data:3; // 0 if code, 1 if data, 2 if pointers + int size:16; // user specified size + int local:1; // nonzero if local label + int separator:1; // nonzero if following a separator + int number:15; // label count strref label; // user defined label strref comment; std::vector<RefLink> *pRefs; // what is referencing this address - RefAddr() : address(-1), data(0), size(0), pRefs(nullptr) {} - RefAddr(int addr) : address(addr), data(0), size(0), pRefs(nullptr) {} + RefAddr() : address(-1), data(0), size(0), local(0), separator(0), number(-1), pRefs(nullptr) {} + RefAddr(int addr) : address(addr), data(0), size(0), local(0), separator(0), number(-1), pRefs(nullptr) {} }; std::vector<RefAddr> refs; @@ -1021,13 +1024,13 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i r.pRefs = new std::vector<RefLink>(); r.size = size; if (kw_data.is_prefix_word(lab_line)) { - r.data = 1; + r.data = DT_DATA; lab_line += kw_data.get_len(); } else if (kw_code.is_prefix_word(lab_line)) { - r.data = 0; + r.data = DT_CODE; lab_line += kw_code.get_len(); } else if (kw_pointers.is_prefix_word(lab_line)) { - r.data = 2; + r.data = DT_PTRS; lab_line += kw_pointers.get_len(); } lab_line.trim_whitespace(); @@ -1038,7 +1041,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i if (GetLabelIndex(start_addr)<0) { refs.push_back(RefAddr(start_addr)); refs[refs.size()-1].pRefs = new std::vector<RefLink>(); - refs[refs.size()-1].data = 1; + refs[refs.size()-1].data = DT_DATA; } if (init_data && GetLabelIndex(start_addr + init_data)<0) { refs.push_back(RefAddr(start_addr+init_data)); refs[refs.size()-1].pRefs = new std::vector<RefLink>(); @@ -1049,7 +1052,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i int last_user = (int)refs.size(); for (int i = 0; i<last_user; ++i) { - if (refs[i].data==2) { + if (refs[i].data==DT_PTRS) { int num = refs[i].size ? (refs[i].size/2) : ((refs[i+1].address - refs[i].address)/2); unsigned char *p = mem + refs[i].address - addr; for (int l = 0; l<num; l++) { @@ -1060,7 +1063,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i if (n<0) { refs.push_back(RefAddr(a)); refs[nr].pRefs = new std::vector<RefLink>(); - refs[nr].data = 0; + refs[nr].data = DT_CODE; refs[nr].pRefs->push_back(ref); } else { refs[n].pRefs->push_back(ref); @@ -1163,7 +1166,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i int curr_label = init_data ? 1 : 0; int prev_addr = -1; bool was_data = init_data>0; - refs[curr_label].data = 0; + refs[curr_label].data = DT_CODE; bool separator = false; int prev_op = 0xff; addr += init_data; @@ -1202,15 +1205,13 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i bytes -= arg_size; mem += arg_size; } - if (separator && curr_label>0 && refs[curr_label-1].data!=2 && curr!=refs[curr_label].address && !cutoff) { + if (separator && curr_label>0 && refs[curr_label-1].data!=DT_PTRS && curr!=refs[curr_label].address && !cutoff) { int end_addr = curr_label<(int)refs.size() ? refs[curr_label].address : (int)(addr_orig + bytes_orig); for (std::vector<RefAddr>::iterator k = refs.begin(); k!= refs.end(); ++k) { std::vector<RefLink> &l = *k->pRefs; std::vector<RefLink>::iterator r = l.begin(); while (r!=l.end()) { if (r->instr_addr>=curr && r->instr_addr<end_addr) { - if (k->address>0x43c5 && k->address<0x4600) - printf("erasing for address $%04x\n", k->address); r = l.erase(r); } else ++r; @@ -1226,14 +1227,14 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i std::vector<RefLink> &pRefs = *refs[curr_label].pRefs; if (curr_label < (int)refs.size()) { if (refs[curr_label].label) { - was_data = refs[curr_label].data==1; + refs[curr_label].separator = 1; // user labels are always global + was_data = refs[curr_label].data==DT_DATA; } else { bool prev_data = was_data; was_data = separator && !(!was_data && ((op==0x4c || op==0x6c) && (prev_op==0x4c || prev_op==0x6c))); for (size_t j = 0; j<pRefs.size(); ++j) { RefType type = pRefs[j].type; if (!(pRefs[j].instr_addr>curr && type == RT_BRANCH) && type != RT_DATA) { - separator = false; was_data = false; prev_data = false; break; @@ -1253,7 +1254,8 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i } was_data = only_data_ref; } - refs[curr_label].data = was_data; + refs[curr_label].separator = separator; + refs[curr_label].data = was_data ? DT_DATA : DT_CODE; } if (was_data) { int start = curr; @@ -1272,6 +1274,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i } curr_label++; } + separator = false; // after a separator if there is no jmp, jsr, brl begin data block if (!was_data) { @@ -1301,7 +1304,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i int last = (int)refs.size()-1; - while (last>1 && refs[last-1].data) { + while (last>1 && refs[last-1].data!=DT_CODE) { int start_addr = refs[last].address; for (int k = 0; k<(int)refs.size(); ++k) { std::vector<RefLink> &pRefs = *refs[k].pRefs; @@ -1315,7 +1318,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i } } last--; - if (last<=1 || !refs[last].data) + if (last<=1 || refs[last].data==DT_CODE) break; } @@ -1326,7 +1329,7 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i ++k; // don't delete the initial label } while (k!=refs.end()) { - if (k->pRefs && k->pRefs->size()==0 && !k->label) { + if (k->pRefs && k->pRefs->size()==0 && !k->label && !k->separator) { delete k->pRefs; k = refs.erase(k); } else @@ -1346,11 +1349,61 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i pRefs.push_back(*r); r = pRefs2.erase(r); } + if (n->separator) + k->separator = 1; delete &pRefs2; refs.erase(n); } ++k; } + + // figure out which labels can be local + + k = refs.begin(); + int label_count_code_global = 1; + int label_count_data_global = 1; + while (k!=refs.end()) { + if (k->separator && k->data == DT_CODE) { + int address = k->address; + k->number = label_count_code_global++; + + std::vector<RefAddr>::iterator f = k; // f = first local + ++f; + std::vector<RefAddr>::iterator e = f; // e = end of locals + while (e!=refs.end()) { + if (e->label || e->data != DT_CODE) + break; + std::vector<RefLink> &r = *(e->pRefs); + bool ext_ref = false; + for (std::vector<RefLink>::iterator i = r.begin(); i!=r.end(); ++i) { + if ((i->type != RT_BRANCH && i->type != RT_DATA) || i->instr_addr<k->address) { + ext_ref = true; + break; + } + } + if (ext_ref) + break; + ++e; + } + int label_count_code_local = 1; + while (f!=e) { + f->number = label_count_code_local++; + f->local = 1; + ++f; + } + if (k!=e) + k = e; + else + ++k; + } else { + k->local = 0; + if (k->data == DT_CODE) + k->number = label_count_code_global++; + else + k->number = label_count_data_global++; + ++k; + } + } } static const char spacing[] = " "; @@ -1376,8 +1429,8 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, int curr_label_index = 0; bool separator = false; - bool is_data = refs.size() ? !!refs[0].data : false; - bool is_ptrs = is_data && refs[0].data==2; + bool is_data = refs.size() ? (refs[0].data==DT_DATA || refs[0].data==DT_PTRS) : false; + bool is_ptrs = is_data && refs[0].data==DT_PTRS; strown<256> out; while (bytes) { @@ -1401,9 +1454,20 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, out.sprintf("%s; Referenced from " STRREF_FMT " + $%x (%s)\n", spc, STRREF_ARG(refs[lbl].label), ref_addr - prv_addr, aRefNames[(*ref.pRefs)[j].type]); - else - out.sprintf("%s; Referenced from Label_%d + $%x (%s)\n", spc, lbl, - ref_addr - prv_addr, aRefNames[(*ref.pRefs)[j].type]); + else { + out.sprintf("%s; Referenced from", spc); + if (refs[lbl].local) { + int lbl_glb = lbl; + while (lbl_glb>0 && refs[lbl_glb].local) + lbl_glb--; + if (refs[lbl].label) + out.append(refs[lbl].label); + else + out.sprintf_append(" %s_%d /", refs[lbl_glb].local ? ".l" : (refs[lbl_glb].data==DT_CODE ? "Code" : "Data"), refs[lbl_glb].number); + } + out.sprintf_append(" %s_%d + $%x (%s)\n", refs[lbl].local ? ".l" : (refs[lbl].data==DT_CODE ? "Code" : "Data"), + refs[lbl].number, ref_addr - prv_addr, aRefNames[(*ref.pRefs)[j].type]); + } } else out.sprintf("%s; Referenced from $%04x (%s)\n", spc, (*ref.pRefs)[j].instr_addr, aRefNames[(*ref.pRefs)[j].type]); @@ -1417,10 +1481,12 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, out.sprintf_append("%s" STRREF_FMT ": ; $%04x\n", spc, STRREF_ARG(refs[curr_label_index].label), addr); else - out.sprintf_append("%sLabel_%d: ; $%04x\n", spc, curr_label_index, addr); + out.sprintf_append("%s%s_%d: ; $%04x\n", spc, + refs[curr_label_index].local ? ".l" : (refs[curr_label_index].data==DT_CODE ? "Code" : "Data"), + refs[curr_label_index].number, addr); fputs(out.c_str(), f); is_data = !!refs[curr_label_index].data; - is_ptrs = is_data && refs[curr_label_index].data==2; + is_ptrs = is_data && refs[curr_label_index].data==DT_PTRS; separator = false; curr_label_index++; } @@ -1442,7 +1508,9 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, out.append(refs[lbl].label); out.sprintf_append(" ; $%04x " STRREF_FMT "\n", a, STRREF_ARG(refs[lbl].comment)); } else - out.sprintf_append("Label_%d ; $%04x " STRREF_FMT "\n", lbl, a, STRREF_ARG(refs[lbl].comment)); + out.sprintf_append("%s%s_%d: ; $%04x%s\n", spc, + refs[lbl].local ? ".l" : (refs[lbl].data==DT_CODE ? "Code" : "Data"), + refs[lbl].number, a, STRREF_ARG(refs[lbl].comment)); } else out.sprintf_append("$%04x\n", a); fputs(out.c_str(), f); @@ -1553,7 +1621,7 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, if (refs[i].label) lblname.copy(refs[i].label); else - lblname.sprintf("Label_%d", i); + lblname.sprintf("%s_%d", refs[i].local ? ".l" : (refs[i].data==DT_CODE ? "Code" : "Data"), refs[i].number); lblcmt = refs[i].comment; if (reference > refs[i].address) lblname.sprintf_append(" + $%x", reference - refs[i].address);