This commit is contained in:
Sean 2020-02-20 13:51:00 -07:00
parent c7837da33e
commit 86d4c4fe3a
No known key found for this signature in database
GPG Key ID: B111C910D99B42B8
9 changed files with 438 additions and 171 deletions

View File

@ -1,4 +1,4 @@
OBJS = main.o omf.o handle.o map.o disasm.o api.o
OBJS = main.o omf.o handle.o map.o disasm.o api.o scanner.o
CXX = clang++
CXXFLAGS = -Wall -std=c++11

View File

@ -63,7 +63,7 @@ API::API(unsigned char *dat, unsigned int len) {
break;
}
s->name = name;
symbols.insert(std::pair<std::string, symbol::Symbol>(name, s));
symbols[name] = s;
}
for (uint32_t i = 0; i < numSymbols; i++) {

View File

@ -59,7 +59,7 @@ struct Struct : public Symbol {
};
struct Intrinsic : public Symbol {
enum uint8_t { U8, U16, U23, S8, S16, S32 } type;
enum uint8_t { U8, U16, U32, S8, S16, S32 } type;
};
struct Enum : public Symbol {

View File

@ -2,10 +2,13 @@
#include "disasm.h"
#include "65816.h"
#include "scanner.h"
#include <fstream>
#include <iostream>
#include <stack>
namespace ph = std::placeholders;
static std::map<Addressing, int> sizes;
Disassembler::Disassembler(std::shared_ptr<Fingerprints> prints,
@ -18,16 +21,17 @@ Disassembler::Disassembler(std::shared_ptr<Fingerprints> prints,
bool Disassembler::disassemble(std::vector<Segment> segments,
std::vector<Entry> entries) {
this->segments = segments;
Scanner scanner(segments, symbols, fingerprints);
// trace all entry points
for (auto &entry : entries) {
if (!trace(entry)) {
if (!scanner.trace(entry, std::bind(&Disassembler::decodeInst, this,
ph::_1, ph::_2))) {
std::cerr << "Failed to trace execution flow" << std::endl;
return false;
}
}
// build the basic blocks
if (!basicBlocks()) {
if (!scanner.basicBlocks()) {
std::cerr << "Failed to calculate basic blocks" << std::endl;
return false;
}
@ -41,7 +45,10 @@ bool Disassembler::disassemble(std::vector<Segment> segments,
}
f << "Section $" << hex(segment.segnum, Value) << " "
<< segment.name << std::endl;
if (!decode(segment.mapped, segment.mapped + segment.length)) {
if (!scanner.disassemble(f, segment.mapped,
segment.mapped + segment.length,
std::bind(&Disassembler::printInst, this,
ph::_1))) {
std::cerr << "Disassembly failed" << std::endl;
return false;
}
@ -50,163 +57,6 @@ bool Disassembler::disassemble(std::vector<Segment> segments,
return true;
}
bool Disassembler::trace(const Entry &start) {
std::stack<Entry> workList;
workList.push(start);
labels.insert(std::pair<uint32_t, uint32_t>(start.org, start.org));
while (!workList.empty()) {
auto state = workList.top();
workList.pop();
std::shared_ptr<Inst> inst = nullptr;
do {
auto ptr = getAddress(state.org);
if (ptr == nullptr) {
return false;
}
auto addr = state.org;
inst = nullptr;
int16_t numDB = 0;
if (fingerprints) { // scan for fingerprints
auto node = fingerprints->root;
int8_t len = 0;
auto fstart = ptr->tell();
do {
node = node->map[ptr->r8()];
len++;
if (node != nullptr && !node->name.empty()) {
if (inst == nullptr) {
inst = std::make_shared<Inst>();
inst->type = Special;
}
inst->name = node->name;
inst->length = len;
numDB = node->numDB;
}
} while (node != nullptr && !ptr->eof());
if (inst) {
fstart += inst->length;
state.org += inst->length;
}
ptr->seek(fstart);
}
if (numDB > 0 && inst) {
inst->name += " {";
for (int i = 0; i < numDB; i++) {
if (i) {
inst->name += ", ";
}
inst->name += hex(ptr->r8(), Value);
}
inst->name += "}";
inst->length += numDB;
state.org += numDB;
}
if (!inst) {
inst = decodeInst(ptr, &state);
}
map[addr] = inst;
if (inst->type == Jump || inst->type == Branch || inst->type == Call) {
if (inst->operType == Opr::Imm || inst->operType == Opr::Abs) {
if (valid(inst->oper) && labels.find(inst->oper) == labels.end()) {
workList.push({state.flags, inst->oper});
labels.insert(std::pair<uint32_t, uint32_t>(inst->oper,
inst->oper));
}
}
}
if (inst->type == Jump || inst->type == Branch ||
inst->type == Return) {
branches.insert(std::pair<uint32_t, uint32_t>(state.org, addr));
}
if (inst->type == Invalid) {
branches.insert(std::pair<uint32_t, uint32_t>(addr, addr));
}
} while (inst->type != Return && inst->type != Jump &&
inst->type != Invalid);
}
return true;
}
bool Disassembler::basicBlocks() {
// always starts at a label
auto address = labels.lower_bound(0)->first;
auto block = getBlock(address);
auto done = false;
while (!done) {
auto label = labels.upper_bound(address);
auto branch = branches.upper_bound(address);
if (label != labels.end() && (branch == branches.end() ||
label->second < branch->second)) {
// label was earliest
address = label->second;
block->length = address - block->address;
auto next = getBlock(address);
next->preds.append(block);
block->succs.append(next);
block = next;
} else if (branch != branches.end() && (label == labels.end() ||
branch->second <= label->second)) {
// branch was earliest (or equal)
auto b = map[branch->second];
block->branchLen = b->length;
block->length = branch->first - block->address;
if (b->type != Return && b->type != Invalid) {
// branch has a destination
if (b->operType == Opr::Imm || b->operType == Opr::Abs) {
if (valid(b->oper)) {
auto next = getBlock(b->oper);
next->preds.append(block);
block->succs.append(next);
}
}
}
if (b->type == Jump || b->type == Return || b->type == Invalid) {
// branch doesn't continue
auto next = labels.upper_bound(branch->second);
if (next == labels.end()) {
done = true;
} else {
address = next->second;
block = getBlock(address);
}
} else {
// branch continues
auto next = getBlock(branch->first);
next->preds.append(block);
block->succs.append(next);
block = next;
address = block->address;
}
} else {
// out of labels and branches, we screwed up
block->length = map.lastKey() + map.last()->length - block->address;
done = true;
}
}
std::sort(blocks.begin(), blocks.end(), compareBlocks);
return true;
}
Handle Disassembler::getAddress(uint32_t address) {
for (auto &s : segments) {
if (address >= s.mapped && address < s.mapped + s.length) {
s.data->seek(address - s.mapped);
return s.data;
}
}
return nullptr;
}
bool Disassembler::valid(uint32_t address) {
for (auto &s : segments) {
if (address >= s.mapped && address < s.mapped + s.length) {
return true;
}
}
return false;
}
std::shared_ptr<Inst> Disassembler::decodeInst(Handle f, Entry *entry) {
auto inst = std::make_shared<Inst>();
auto opcode = f->r8();
@ -400,6 +250,111 @@ std::shared_ptr<Inst> Disassembler::decodeInst(Handle f, Entry *entry) {
return inst;
}
std::string Disassembler::printInst(std::shared_ptr<Inst> inst) {
std::string args;
std::string comment;
if (inst->type == Special) {
return inst->name;
}
switch (inst->operType) {
case Opr::None:
break;
case Opr::Imm:
args = "#" + hex(inst->oper, Value);
break;
case Opr::Abs:
args = hex(inst->oper, Address);
break;
case Opr::AbsB:
args = "B:" + hex(inst->oper, Value);
break;
case Opr::AbsD:
args = "D:" + hex(inst->oper, Value);
break;
case Opr::AbsX:
args = hex(inst->oper, Address) + ", x";
break;
case Opr::AbsXB:
args = "B:" + hex(inst->oper, Value) + ", x";
break;
case Opr::AbsXD:
args = "D:" + hex(inst->oper, Value) + ", x";
break;
case Opr::AbsY:
args = hex(inst->oper, Address) + ", y";
break;
case Opr::AbsYB:
args = "B:" + hex(inst->oper, Value) + ", y";
break;
case Opr::AbsYD:
args = "D:" + hex(inst->oper, Value) + ", y";
break;
case Opr::AbsS:
args = hex(inst->oper, Value) + ", s";
break;
case Opr::Ind:
args = "(" + hex(inst->oper, Address) + ")";
break;
case Opr::IndB:
args = "(B:" + hex(inst->oper, Value) + ")";
break;
case Opr::IndD:
args = "(D:" + hex(inst->oper, Value) + ")";
break;
case Opr::IndX:
args = "(" + hex(inst->oper, Address) + ", x)";
break;
case Opr::IndXB:
args = "(B:" + hex(inst->oper, Value) + ", x)";
break;
case Opr::IndY:
args = "(" + hex(inst->oper, Address) + "), y";
break;
case Opr::IndL:
args = "[" + hex(inst->oper, Address) + "]";
break;
case Opr::IndLY:
args = "[" + hex(inst->oper, Address) + "], y";
break;
case Opr::IndS:
args = "(" + hex(inst->oper, Value) + ", s), y";
break;
case Opr::Bank:
args = hex(inst->oper >> 8, Value) + ", " + hex(inst->oper & 0xff, Value);
break;
}
if (inst->flags & IsEmuChanged) {
if (inst->flags & IsEmu) {
comment = " 8-bit mode";
} else {
comment = " 16-bit mode";
}
}
if (inst->flags & IsM8Changed) {
if (inst->flags & IsM8) {
comment += " a.b";
} else {
comment += " a.w";
}
}
if (inst->flags & IsX8Changed) {
if (inst->flags & IsX8) {
comment += " x.b";
} else {
comment += " x.w";
}
}
std::string r = args;
if (!comment.empty()) {
while (r.length() < 20) {
r += " ";
}
r += "; " + comment;
}
return r;
}
std::string Disassembler::hex(uint32_t val, HexType type) {
std::string ret;
int width = 0;

View File

@ -53,17 +53,11 @@ class Disassembler {
std::vector<struct Entry> entries);
private:
bool trace(const struct Entry &start);
bool basicBlocks();
std::string printInst(std::shared_ptr<Inst> inst);
std::shared_ptr<Inst> decodeInst(Handle f, Entry *entry);
Handle getAddress(uint32_t address);
bool valid(uint32_t address);
std::string hex(uint32_t value, HexType type);
std::map<uint32_t, std::string> symbols;
std::map<uint32_t, uint32_t> labels;
std::map<uint32_t, uint32_t> branches;
std::vector<struct Segment> segments;
std::shared_ptr<Fingerprints> fingerprints;
std::map<uint32_t, std::shared_ptr<Inst>> map;
};

View File

@ -58,6 +58,13 @@ uint32_t TheHandle::r32() {
return r;
}
uint32_t TheHandle::r24() {
uint32_t r = *pos++;
r |= *pos << 8;
r |= *pos << 16;
return r;
}
uint16_t TheHandle::r16() {
uint16_t r = *pos++;
r |= *pos++ << 8;

View File

@ -57,6 +57,10 @@ std::vector<Entry> Map::getEntries() {
return entryPoints;
}
std::map<uint32_t, std::string> Map::getSymbols() {
return symbols;
}
void Map::addEntry(uint32_t entry, uint32_t flags) {
Entry e;
e.org = entry;

267
src/scanner.cc Normal file
View File

@ -0,0 +1,267 @@
/** @copyright 2020 Sean Kasun */
#include "scanner.h"
#include <stack>
#include <iostream>
#include <algorithm>
Scanner::Scanner(std::vector<Segment> segments,
std::map<uint32_t, std::string> symbols,
std::shared_ptr<Fingerprints> fingerprints)
: symbols(symbols), segments(segments), fingerprints(fingerprints) {}
bool Scanner::trace(const Entry &start,
std::function<std::shared_ptr<Inst>(Handle, Entry*)>
decode) {
std::stack<Entry> workList;
workList.push(start);
labels[start.org] = start.org;
while (!workList.empty()) {
auto state = workList.top();
workList.pop();
std::shared_ptr<Inst> inst = nullptr;
do {
auto ptr = getAddress(state.org);
if (ptr == nullptr) {
return false;
}
auto addr = state.org;
inst = nullptr;
int16_t numDB = 0;
if (fingerprints) {
auto node = fingerprints->root;
int8_t len = 0;
auto fstart = ptr->tell();
do {
node = node->map[ptr->r8()];
len++;
if (node != nullptr && !node->name.empty()) {
if (inst == nullptr) {
inst = std::make_shared<Inst>();
inst->type = Special;
}
inst->name = node->name;
inst->length = len;
numDB = node->numDB;
}
} while (node != nullptr && !ptr->eof());
if (inst) {
fstart += inst->length;
state.org += inst->length;
}
ptr->seek(fstart);
}
if (numDB > 0 && inst) {
inst->name += " {";
for (int i = 0; i < numDB; i++) {
if (i) {
inst->name += ", ";
}
inst->name += "$" + hex(ptr->r8(), 2);
}
inst->name += "}";
inst->length += numDB;
state.org += numDB;
}
if (!inst) {
inst = decode(ptr, &state);
}
map[addr] = inst;
if (inst->type == Jump || inst->type == Branch || inst->type == Call) {
if (inst->operType == Opr::Imm || inst->operType == Opr::Abs) {
if (valid(inst->oper) && labels.find(inst->oper) == labels.end()) {
workList.push({state.flags, inst->oper});
labels[inst->oper] = inst->oper;
}
}
}
if (inst->type == Jump || inst->type == Branch || inst->type == Return) {
// we want branches included in the block
branches[state.org] = addr;
}
if (inst->type == Invalid) {
branches[addr] = addr;
}
} while (inst->type != Return && inst->type != Jump &&
inst->type != Invalid);
}
return true;
}
static bool compareBlocks(std::shared_ptr<Block> a, std::shared_ptr<Block> b) {
return a->address < b->address;
}
bool Scanner::basicBlocks() {
// always start at a label
auto address = labels.lower_bound(0)->first;
auto block = getBlock(address);
auto done = false;
while (!done) {
auto label = labels.upper_bound(address);
auto branch = branches.upper_bound(address);
if (label != labels.end() && (branch == branches.end() ||
label->second < branch->second)) {
// label was earliest
address = label->second;
block->length = address - block->address;
auto next = getBlock(address);
next->preds.push_back(block);
block->succs.push_back(next);
block = next;
} else if (branch != branches.end() && (label == labels.end() ||
branch->second <= label->second)) {
// branch was earliest (or equal)
auto b = map[branch->second];
block->branchLen = b->length;
block->length = branch->first - block->address;
if (b->type != Return && b->type != Invalid) {
// branch has a destination
if (b->operType == Opr::Imm || b->operType == Opr::Abs) {
if (valid(b->oper)) {
auto next = getBlock(b->oper);
next->preds.push_back(block);
block->succs.push_back(next);
}
}
}
if (b->type == Jump || b->type == Return || b->type == Invalid) {
// branch doesn't continue
auto next = labels.upper_bound(branch->second);
if (next == labels.end()) {
done = true;
} else {
address = next->second;
block = getBlock(address);
}
} else {
// branch continues
auto next = getBlock(branch->first);
next->preds.push_back(block);
block->succs.push_back(next);
block = next;
address = block->address;
}
} else {
// out of labels and branches
block->length = map.rbegin()->first + map.rbegin()->second->length -
block->address;
done = true;
}
}
std::sort(blocks.begin(), blocks.end(), compareBlocks);
return true;
}
bool Scanner::disassemble(std::ostream &f, uint32_t from, uint32_t to,
std::function<std::string(std::shared_ptr<Inst>)>
printInst) {
auto address = from;
for (auto b : blocks) {
if (b->address + b->length <= address) {
continue;
}
if (address < b->address) {
auto last = std::min(to, b->address);
dumpHex(f, address, last);
address = last;
}
if (address == b->address) {
auto count = 0;
std::string preds;
for (auto pred : b->preds) {
if (pred->address + pred->length != b->address && count++ < 4) {
preds += (pred->address < b->address) ? u8"\u2b9d" : u8"\u2b9f";
preds += hex(pred->address + pred->length - pred->branchLen, 6);
}
}
if (count > 0) {
f << hex(address, 6) << ": " << preds << std::endl;
}
}
while (address < to && address < b->address + b->length) {
auto sym = symbols[address];
if (!sym.empty()) {
f << sym << std::endl;
}
f << hex(address, 6) << ": " << printInst(map[address]) << std::endl;
address += map[address]->length;
}
if (address < b->address + b->length) {
break;
}
}
if (address < to) {
dumpHex(f, address, to);
}
return true;
}
void Scanner::dumpHex(std::ostream &f, uint32_t from, uint32_t to) {
auto ptr = getAddress(from);
if (ptr != nullptr) {
auto len = std::min<uint32_t>(to - from, ptr->length - ptr->tell());
for (int i = 0; i < len; i += 16) {
f << hex(from, 6) << ": ";
std::string ascii;
int j = 0;
for (; j < 16 && !ptr->eof(); j++) {
uint8_t ch = ptr->r8();
f << hex(ch, 2) << " ";
ascii += isprint(ch) ? static_cast<char>(ch) : '.';
if (j == 7) {
f << " ";
ascii += " ";
}
}
for (; j < 16; j++) {
f << " ";
}
f << "|" << ascii << std::endl;
}
}
}
Handle Scanner::getAddress(uint32_t address) {
for (auto &s : segments) {
if (address >= s.mapped && address < s.mapped + s.length) {
s.data->seek(address - s.mapped);
return s.data;
}
}
return nullptr;
}
bool Scanner::valid(uint32_t address) {
for (auto &s : segments) {
if (address >= s.mapped && address < s.mapped + s.length) {
return true;
}
}
return false;
}
std::shared_ptr<Block> Scanner::getBlock(uint32_t address) {
auto res = std::find_if(blocks.begin(), blocks.end(),
[address](const std::shared_ptr<Block> n) {
return n->address == address;
});
if (res != blocks.end()) {
return *res;
}
auto block = std::make_shared<Block>(address);
block->branchLen = 0;
block->length = 0;
blocks.push_back(block);
return block;
}
std::string Scanner::hex(uint32_t value, size_t len) {
static const char *digits = "0123456789abcdef";
std::string ret(len, '0');
for (size_t i = 0, j = (len - 1) << 2; i < len; i++, j -= 4) {
ret[i] = digits[(value >> j) & 0xf];
}
return ret;
}

40
src/scanner.h Normal file
View File

@ -0,0 +1,40 @@
/** @copyright 2020 Sean Kasun */
#pragma once
#include "map.h"
struct Block {
explicit Block(uint32_t address) : address(address) {}
uint32_t address;
uint32_t length;
uint32_t branchLen;
std::vector<std::shared_ptr<Block>> preds;
std::vector<std::shared_ptr<Block>> succs;
};
class Scanner {
public:
Scanner(std::vector<Segment> segments,
std::map<uint32_t, std::string> symbols,
std::shared_ptr<Fingerprints> fingerprints);
bool trace(const Entry &start,
std::function<std::shared_ptr<Inst>(Handle, Entry*)> decode);
bool basicBlocks();
bool disassemble(std::ostream &f, uint32_t from, uint32_t to,
std::function<std::string(std::shared_ptr<Inst>)> printInst);
private:
std::string hex(uint32_t value, size_t len);
std::shared_ptr<Block> getBlock(uint32_t address);
Handle getAddress(uint32_t address);
bool valid(uint32_t address);
void dumpHex(std::ostream &f, uint32_t from, uint32_t to);
std::map<uint32_t, std::string> symbols;
std::vector<Segment> segments;
std::shared_ptr<Fingerprints> fingerprints;
std::map<uint32_t, uint32_t> labels;
std::map<uint32_t, uint32_t> branches;
std::vector<std::shared_ptr<Block>> blocks;
std::map<uint32_t, std::shared_ptr<Inst>> map;
};