From d73b7c8581e97a79670107de94084d6cb4dad4d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl-Henrik=20Sk=C3=A5rstedt?= Date: Thu, 19 Nov 2015 23:50:59 -0800 Subject: [PATCH] Iterating on the disassembler - exploring rules to better distinguish between code and data --- disassembler/x65dsasm.cpp | 186 ++++++++++++++++++++++++-------------- 1 file changed, 118 insertions(+), 68 deletions(-) diff --git a/disassembler/x65dsasm.cpp b/disassembler/x65dsasm.cpp index 94b7b85..e8ec81d 100644 --- a/disassembler/x65dsasm.cpp +++ b/disassembler/x65dsasm.cpp @@ -944,7 +944,6 @@ enum RefType { RT_JUMP, // jmp RT_JSR, // jsr RT_DATA, // lda $... - RT_COUNT }; @@ -1063,6 +1062,10 @@ void GetReferences(unsigned char *mem, size_t bytes, bool acc_16, bool ind_16, i addr = addr_orig; int curr_label = 0; int prev_addr = -1; + bool was_data = false; + refs[curr_label].data = 0; + bool separator = false; + int prev_op = 0xff; while (bytes && curr_label bytes) + break; // ended on partial instruction addr += arg_size; bytes -= arg_size; mem += arg_size; - if (curr == refs[curr_label].address) + + if (curr == refs[curr_label].address) { + std::vector &pRefs = *refs[curr_label].pRefs; + if (curr_label < (int)refs.size()) { + bool prev_data = was_data; + was_data = separator && !(!was_data && op==0x4c && prev_op==0x4c); + for (size_t j = 0; jcurr && type == RT_BRANCH) && type != RT_DATA) { + separator = false; + was_data = false; + prev_data = false; + break; + } + } + + if (!was_data && prev_data) { + bool only_data_ref = pRefs.size() ? true : false; + for (size_t j = 0; j &pRefs = *refs[k].pRefs; + std::vector::iterator r = pRefs.begin(); + while (r != pRefs.end()) { + if (r->instr_addr>=start && r->instr_addr refs[curr_label].address && prev_addr>=0) { refs[curr_label].address = prev_addr; - curr_label++; - } - prev_addr = curr; - } - - // mark segments as code or data - mem = mem_orig; - bytes = bytes_orig; - addr = addr_orig; - bool separator = false; - bool was_data = false; - curr_label = 0; - while (bytes && curr_labelsize() && (*ref.pRefs)[0].type == RT_DATA) { - was_data = true; - for (int j = 1; was_data && jsize(); j++) - was_data = (*ref.pRefs)[0].type == RT_DATA; - } else - was_data = false; refs[curr_label].data = was_data; curr_label++; } - separator = false; - if (op == 0x60 || op == 0x40 || op == 0x68 || op == 0x4c || op == 0x6c || op == 0x7c || op == 0x5c || op == 0xdc) { // rts, rti, rtl or jmp - separator = true; - for (size_t i = 0; i &pRefs = *refs[i].pRefs; - if (refs[i].address<=curr) { - for (size_t j = 0; jcurr) { - separator = false; - break; - } - } - } else { - for (size_t j = 0; j &pRefs = *refs[i].pRefs; + for (size_t j = 0; jcurr) { + separator = false; + break; + } } } } } } + + prev_op = op; + prev_addr = curr; + } + + + int last = (int)refs.size()-1; + while (last>1 && refs[last-1].data) { + int start_addr = refs[last].address; + for (int k = 0; k<(int)refs.size(); ++k) { + std::vector &pRefs = *refs[k].pRefs; + std::vector::iterator r = pRefs.begin(); + while (r != pRefs.end()) { + if (r->instr_addr>=start_addr && r->instr_addr::iterator k = refs.begin(); + if (k != refs.end()) + ++k; // don't delete the initial label + while (k!=refs.end()) { + if (k->pRefs && k->pRefs->size()==0) { + delete k->pRefs; + k = refs.erase(k); + } else + ++k; } } @@ -1189,10 +1229,11 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, bool separator = false; bool is_data = false; strown<256> out; + int prev_op = 255; while (bytes) { - + bool data_to_code = false; // Determine if current address is referenced from somewhere - while (curr_label_index= refs[curr_label_index].address) { struct RefAddr &ref = refs[curr_label_index]; if (ref.pRefs) { for (size_t j = 0; jsize(); ++j) { @@ -1216,13 +1257,18 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, out.sprintf("%sLabel_%d: ; $%04x\n", spc, curr_label_index, addr); fputs(out.c_str(), f); is_data = !!refs[curr_label_index].data; + data_to_code = !is_data; + separator = false; curr_label_index++; + if (curr_label_index < (int)refs.size() && refs[curr_label_index].data) + data_to_code = false; } - if (src && is_data) { + if (src && (is_data || separator)) { out.clear(); int left = end_addr - addr; - if (curr_label_index < (int)refs.size()) + if (curr_label_index<(int)refs.size()) left = refs[curr_label_index].address - addr; + is_data = true; for (int i = 0; i &pRefs = *refs[i].pRefs; @@ -1397,11 +1446,12 @@ void Disassemble(strref filename, unsigned char *mem, size_t bytes, bool acc_16, mem += arg_size; out.append('\n'); fputs(out.c_str(), f); - if (separator) { - fputs("\n", f); - fputs(spc, f); - fprintf(f, "; ------------- $%04x ------------- ;\n\n", addr); - } + prev_op = op; + } + if (separator || ((curr_label_index)