From 54b1b4b8ce6f5286997d3a2c671c7542eef66a11 Mon Sep 17 00:00:00 2001 From: Sean Date: Tue, 18 Feb 2020 17:03:29 -0700 Subject: [PATCH] fingerprints --- src/Makefile | 12 ++- src/api.cc | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/api.h | 86 +++++++++++++++++++++ src/disasm.cc | 127 +++++++++++++++++++++++++++++++ src/disasm.h | 34 +++++++++ src/handle.cc | 9 +++ src/handle.h | 1 + src/main.cc | 52 ++++++++++++- src/map.cc | 19 +++-- src/map.h | 6 +- src/omf.cc | 14 +--- src/omf.h | 5 +- 12 files changed, 535 insertions(+), 31 deletions(-) create mode 100644 src/api.cc create mode 100644 src/api.h create mode 100644 src/disasm.cc diff --git a/src/Makefile b/src/Makefile index 3cd14c7..156cfd6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,12 +1,16 @@ -OBJS = main.o omf.o handle.o map.o +OBJS = main.o omf.o handle.o map.o disasm.o api.o CXX = clang++ -CXXFLAGS = -g -Wall -std=c++11 -LDFLAGS = -largp +CXXFLAGS = -Wall -std=c++11 + +UNAME := $(shell uname) +ifeq ($(UNAME), Darwin) + LDFLAGS = -largp +endif all: regs regs: $(OBJS) - $(CXX) $(CXXFLAGS) $(LIBS) -o $@ $^ + $(CXX) $(CXXFLAGS) $(LIBS) -o $@ $(LDFLAGS) $^ %.o: %.cc $(CXX) -c $(CXXFLAGS) -o $@ $< diff --git a/src/api.cc b/src/api.cc new file mode 100644 index 0000000..2ce8179 --- /dev/null +++ b/src/api.cc @@ -0,0 +1,201 @@ +/** @copyright 2020 Sean Kasun */ + +#include +#include "api.h" + +Fingerprints::Fingerprints() { + root = std::make_shared(); +} + +void Fingerprints::add(const std::vector &keys, std::string name, + uint8_t numDB) { + auto node = root; + for (auto key : keys) { + if (node->map.find(key) == node->map.end()) { + node->map.insert(std::pair> + (key, std::make_shared())); + } + node = node->map[key]; + } + node->name = name; + node->numDB = numDB; +} + +API::API(unsigned char *dat, unsigned int len) { + auto h = TheHandle::createFromArray(std::vector(dat, dat + len)); + auto numSymbols = h->r32(); + for (uint32_t i = 0; i < numSymbols; i++) { + auto id = h->r32(); + auto kind = h->r8(); + auto name = h->rs(); + ids.insert(std::pair(id, name)); + std::shared_ptr s = nullptr; + switch (kind) { + case symbol::isIntrinsic: + s = std::make_shared(); + s->kind = symbol::isIntrinsic; + break; + case symbol::isEnum: + s = std::make_shared(); + s->kind = symbol::isEnum; + break; + case symbol::isAlias: + s = std::make_shared(); + s->kind = symbol::isAlias; + break; + case symbol::isStruct: + s = std::make_shared(); + s->kind = symbol::isStruct; + break; + case symbol::isUnion: + s = std::make_shared(); + s->kind = symbol::isUnion; + break; + case symbol::isRef: + std::cerr << "isRef on root!" << std::endl; + break; + case symbol::isFunction: + s = std::make_shared(); + s->kind = symbol::isFunction; + break; + default: + std::cerr << "Unknown type" << std::endl; + break; + } + s->name = name; + symbols.insert(std::pair(name, s)); + } + + for (uint32_t i = 0; i < numSymbols; i++) { + auto id = h->r32(); + auto s = symbols[ids[id]]; + s->size = h->r32(); + switch (s->kind) { + case symbol::isIntrinsic: + setIntrinsic(h, s); + break; + case symbol::isEnum: + setEnum(h, s); + break; + case symbol::isAlias: + setRef(h, s); + break; + case symbol::isStruct: + setStruct(h, s); + break; + case symbol::isUnion: + setStruct(h, s); + break; + case symbol::isRef: + std::cerr << "base level ref" << std::endl; + break; + case symbol::isFunction: + setFunction(h, s); + break; + } + } +} + +std::shared_ptr API::lookup(uint32_t id) { + return symbols[ids[id]]; +} + +void API::setIntrinsic(Handle h, std::shared_ptr s) { + auto i = std::static_pointer_cast(s); + auto type = h->r8(); + switch (type) { + case symbol::Intrinsic::U8: + i->type = symbol::Intrinsic::U8; + break; + case symbol::Intrinsic::U16: + i->type = symbol::Intrinsic::U16; + break; + case symbol::Intrinsic::U32: + i->type = symbol::Intrinsic::U32; + break; + case symbol::Intrinsic::S8: + i->type = symbol::Intrinsic::S8; + break; + case symbol::Intrinsic::S16: + i->type = symbol::Intrinsic::S16; + break; + case symbol::Intrinsic::S32: + i->type = symbol::Intrinsic::S32; + break; + } +} + +void API::setEnum(Handle h, std::shared_ptr s) { + auto e = std::static_pointer_cast(s); + e->type = lookup(h->r32()); + auto num = h->r32(); + for (uint32_t i = 0; i < num; i++) { + auto value = h->r32(); + e->entries[h->rs()] = value; + } +} + +void API::setRef(Handle h, std::shared_ptr s) { + auto r = std::static_pointer_cast(s); + auto id = h->r32(); + if (id == 0) { + r->symbol = nullptr; + } else { + r->symbol = lookup(id); + r->pointer = h->r32(); + r->array = static_cast(h->r32()); + r->reg = h->rs(); + } +} + +void API::setStruct(Handle h, std::shared_ptr s) { + auto str = std::static_pointer_cast(s); + auto numFields = h->r32(); + for (uint32_t i = 0; i < numFields; i++) { + auto key = h->rs(); + auto kind = h->r8(); + auto size = h->r32(); + std::shared_ptr sym = nullptr; + switch (kind) { + case symbol::isRef: + sym = std::make_shared(); + sym->kind = symbol::isRef; + sym->size = size; + setRef(h, sym); + break; + case symbol::isStruct: + sym = std::make_shared(); + sym->kind = symbol::isStruct; + sym->size = size; + setStruct(h, sym); + break; + case symbol::isUnion: + sym = std::make_shared(); + sym->kind = symbol::isUnion; + sym->size = size; + setStruct(h, sym); + break; + default: + std::cerr << "Unknown field type" << std::endl; + break; + } + str->fields.push_back({key, sym}); + } +} + +void API::setFunction(Handle h, std::shared_ptr s) { + auto f = std::static_pointer_cast(s); + auto numArgs = h->r32(); + for (uint32_t i = 0; i < numArgs; i++) { + auto key = h->rs(); + auto ref = std::make_shared(); + setRef(h, ref); + f->arguments.push_back({key, ref}); + } + f->returnType = std::make_shared(); + setRef(h, f->returnType); + auto numSig = h->r32(); + for (uint32_t i = 0; i < numSig; i++) { + f->signature.push_back(h->r32()); + } +} diff --git a/src/api.h b/src/api.h new file mode 100644 index 0000000..38e4011 --- /dev/null +++ b/src/api.h @@ -0,0 +1,86 @@ +/** @copyright 2020 Sean Kasun */ +#pragma once + +#include +#include +#include "handle.h" + +struct Fingerprint { + std::map> map; + uint8_t numDB; + std::string name; +}; + +class Fingerprints { + public: + Fingerprints(); + void add(const std::vector &keys, std::string name, + uint8_t numDB = 0); + std::shared_ptr root; +}; + +namespace symbol { + +enum Kind { + isIntrinsic = 0, isEnum, isAlias, isStruct, isUnion, isRef, isFunction, +}; + +struct Symbol { + std::string name; + uint32_t size; + Kind kind; +}; + +struct Ref : public Symbol { + std::shared_ptr symbol; + uint32_t pointer; + int32_t array; + std::string reg; +}; + +struct Argument { + std::string key; + std::shared_ptr ref; +}; + +struct Function : public Symbol { + std::vector arguments; + std::shared_ptr returnType; + std::vector signature; +}; + +struct Field { + std::string key; + std::shared_ptr value; +}; + +struct Struct : public Symbol { + std::vector fields; +}; + +struct Intrinsic : public Symbol { + enum uint8_t { U8, U16, U23, S8, S16, S32 } type; +}; + +struct Enum : public Symbol { + std::shared_ptr type; + std::map entries; +}; + +} + +class API { + public: + API(unsigned char *dat, unsigned int len); + std::map> symbols; + + private: + std::shared_ptr lookup(uint32_t id); + void setIntrinsic(Handle h, std::shared_ptr s); + void setEnum(Handle h, std::shared_ptr s); + void setRef(Handle h, std::shared_ptr s); + void setStruct(Handle h, std::shared_ptr s); + void setFunction(Handle h, std::shared_ptr s); + + std::map ids; +}; diff --git a/src/disasm.cc b/src/disasm.cc new file mode 100644 index 0000000..68f497b --- /dev/null +++ b/src/disasm.cc @@ -0,0 +1,127 @@ +/** @copyright 2020 Sean Kasun */ + +#include "disasm.h" +#include +#include + +Disassembler::Disassembler(std::shared_ptr prints) : fingerprints(prints) { } + +bool Disassembler::disassemble(std::vector segments, + std::vector entries) { + this->segments = segments; + // trace all entry points + for (auto &entry : entries) { + if (!trace(entry)) { + std::cerr << "Failed to trace execution flow" << std::endl; + return false; + } + } + // build the basic blocks + if (!basicBlocks()) { + std::cerr << "Failed to calculate basic blocks" << std::endl; + return false; + } + // disassemble each segment + for (auto &segment : segments) { + std::string fname = "seg" + std::to_string(segment.segnum); + std::ofstream f(fname, std::ios::out | std::ios::binary | std::ios::trunc); + if (!f.is_open()) { + std::cerr << "Failed to open '" << fname << "' for writing" << std::endl; + return false; + } + f << "Section $" << std::ios::hex << segment.segnum << " " + << segment.name << std::endl; + if (!decode(segment.mapped, segment.mapped + segment.length)) { + std::cerr << "Disassembly failed" << std::endl; + return false; + } + f.close(); + } + return true; +} + +bool Disassembler::trace(const Entry &start) { + std::stack workList; + + workList.push(start); + labels.insert(std::pair(start.org, start.org)); + while (!workList.empty()) { + auto state = workList.top(); + workList.pop(); + std::shared_ptr inst = nullptr; + do { + auto ptr = getAddress(state.org); + if (ptr == nullptr) { + return false; + } + auto addr = state.org; + inst = nullptr; + int16_t numDB = 0; + if (fingerprints) { // scan for fingerprints + auto node = fingerprints->root; + int8_t len = 0; + auto fstart = ptr->tell(); + do { + node = node->map.value(ptr->r8(), nullptr); + len++; + if (node != nullptr && !node->name.isEmpty()) { + if (inst == nullptr) { + inst = std::make_shared(); + inst->type = Special; + } + inst->name = node->name; + inst->length = len; + numDB = node->numDB; + } + } while (node != nullptr && !ptr->eof()); + if (inst) { + fstart += inst->length; + state.org += inst->length; + } + ptr->seek(fstart); + } + if (numDB > 0 && inst) { + inst->name += " {"; + for (int i = 0; i < numDB; i++) { + if (i) { + inst->name += ", "; + } + inst->name += hex2(ptr->r8()); + } + inst->name += "}"; + inst->length += numDB; + state.org += numDB; + } + if (!inst) { + inst = decodeInst(ptr, &state); + } + map[addr] = inst; + if (inst->type == Jump || inst->type == Branch || inst->type == Call) { + auto target = target(inst, resolver); + if (target > 0 && !labels.contains(target)) { + workList.push({state.flags, target}); + labels.insert(target, target); + } + } + if (inst->type == Jump || inst->type == Branch || + inst->type == Return) { + branches.insert(state.org, addr); + } + if (inst->type == Invalid) { + branches.insert(addr, addr); + } + } while (inst->type != Return && inst->type != Jump && + inst->type != Invalid); + } + return true; +} + +Handle Disassembler::getAddress(uint32_t address) { + for (auto &s : segments) { + if (address >= s.mapped && address < s.mapped + s.length) { + s.data->seek(address - s.mapped); + return s.data; + } + } + return nullptr; +} diff --git a/src/disasm.h b/src/disasm.h index 1c10ff8..13aa243 100644 --- a/src/disasm.h +++ b/src/disasm.h @@ -2,6 +2,9 @@ #pragma once #include +#include +#include "omf.h" +#include "api.h" enum { IsX8 = 0x10, @@ -12,3 +15,34 @@ enum { IsM8Changed = 0x400, IsEmuChanged = 0x2000, }; + +enum InsType : uint16_t { + Normal = 0x00, + Call = 0x01, + Jump = 0x02, + Return = 0x03, + Branch = 0x04, + Special = 0x05, // fingerprint + Invalid = 0xff, +}; + +struct Inst { + std::string name; + InsType type; + uint16_t length; +}; + +class Disassembler { + public: + Disassembler(std::shared_ptr prints); + bool disassemble(std::vector segments, std::vector entries); + + private: + bool trace(const Entry &start); + Handle getAddress(uint32_t address); + + std::map labels; + std::map branches; + std::vector segments; + std::shared_ptr fingerprints; +}; diff --git a/src/handle.cc b/src/handle.cc index a188fd5..9760b4d 100644 --- a/src/handle.cc +++ b/src/handle.cc @@ -68,6 +68,15 @@ uint8_t TheHandle::r8() { return *pos++; } +std::string TheHandle::rs() { + std::string r; + uint8_t len = *pos++; + for (auto i = 0; i < len; i++) { + r += static_cast(*pos++); + } + return r; +} + std::string TheHandle::read(int32_t len) { std::string r; for (auto i = 0; i < len; i++) { diff --git a/src/handle.h b/src/handle.h index 525a0c6..2607112 100644 --- a/src/handle.h +++ b/src/handle.h @@ -19,6 +19,7 @@ class TheHandle { uint32_t r24(); uint16_t r16(); uint8_t r8(); + std::string rs(); void seek(int64_t pos); void skip(int64_t length); std::string read(int32_t length); diff --git a/src/main.cc b/src/main.cc index 96c0bb9..31b5bd5 100644 --- a/src/main.cc +++ b/src/main.cc @@ -4,7 +4,8 @@ #include #include "disasm.h" #include "omf.h" - +#include "api.h" +#include "../iigs.c" const char *argp_program_version = "regs 0.2"; const char *argp_program_bug_address = "sean@seancode.com"; @@ -109,11 +110,54 @@ int main(int argc, char **argv) { argp_parse(&argp, argc, argv, 0, 0, &arguments); // load map if it exists - Map map(arguments.filename, arguments.org, arguments.flags); - OMF omf(map); - if (!omf.load(arguments.filename)) { + Map map(arguments.filename); + OMF omf; + if (!omf.load(arguments.filename, arguments.org)) { std::cerr << "Failed to load " << arguments.filename << std::endl; return -1; } auto segments = omf.get(); + if (map.needsEntry()) { + for (auto &s : segments) { + if ((s.kind & 0x1f) == 0) { // code + map.addEntry(s.mapped + s.entry, arguments.flags); + break; + } + } + } + + API api(iigs_dat, iigs_dat_len); + + auto prints = std::make_shared(); + for (auto s : api.symbols) { + if (s->kind == symbol::isFunction) { + auto f = std::static_pointer_cast(s); + if (f->signature.size() >= 2) { + if (f->signature[0] >= 0) { // tool + // ldx tool, jsl e1/0000 + std::vector sig = { 0xa2, f->signature[0], f->signature[1], + 0x22, 0x00, 0x00, 0xe1 }; + prints->add(sig, f->name); + } else if (f->signature[0] == -1) { // p16/gsos + // jsl e1/00a8 + std::vector sig = { 0x22, 0xa8, 0x00, 0xe1, + f->signature[2] & 0xff, f->signature[2] >> 8 }; + prints->add(sig, f->name, f->signature[1] & 0xff); + } else if (f->signature[0] == -2) { // p8 + // jsr bf00 + std::vector sig = { 0x20, 0x00, 0xbf, f->signature[2] }; + prints->add(sig, f->name, f->signature[1] & 0xff); + } else if (f->signature[0] == -3) { // smartport + // jsr c50d + std::vector sig5 = { 0x20, 0x0d, 0xc5, f->signature[2] }; + std::vector sig7 = { 0x20, 0x0d, 0xc7, f->signature[2] }; + prints->add(sig5, f->name, f->signature[1]); + prints->add(sig7, f->name, f->signature[1]); + } + } + } + } + + Disassembler d(prints); + d.disassemble(segments, map.getEntries()); } diff --git a/src/map.cc b/src/map.cc index 70e56aa..853b044 100644 --- a/src/map.cc +++ b/src/map.cc @@ -3,16 +3,11 @@ #include #include -Map::Map(const char *filename, uint32_t org, uint32_t flags) { +Map::Map(const char *filename) { std::string mapname = filename; mapname += ".regs"; File file(mapname); if (!file.is_open()) { - Entry entry; - entry.org = org; - this->org = org; - entry.flags = flags; - entryPoints.push_back(entry); return; } @@ -54,10 +49,18 @@ Map::Map(const char *filename, uint32_t org, uint32_t flags) { } } -void Map::addEntry(uint32_t entry) { +bool Map::needsEntry() { + return entryPoints.size() == 0; +} + +std::vector Map::getEntries() { + return entryPoints; +} + +void Map::addEntry(uint32_t entry, uint32_t flags) { Entry e; e.org = entry; - e.flags = 0; + e.flags = flags; entryPoints.push_back(e); } diff --git a/src/map.h b/src/map.h index 48a9a36..5aa2c23 100644 --- a/src/map.h +++ b/src/map.h @@ -31,8 +31,10 @@ struct Entry { class Map { public: - Map(const char *filename, uint32_t org, uint32_t flags); - void addEntry(uint32_t entry); + Map(const char *filename); + bool needsEntry(); + std::vector getEntries(); + void addEntry(uint32_t entry, uint32_t flags); uint32_t org; private: diff --git a/src/omf.cc b/src/omf.cc index 3dd2cf8..713c2c7 100644 --- a/src/omf.cc +++ b/src/omf.cc @@ -13,21 +13,22 @@ enum SegOp { SUPER = 0xf7, }; -OMF::OMF(const Map &map) : map(map) { +OMF::OMF() { } static bool compareSegments(const Segment &a, const Segment &b) { return a.mapped < b.mapped; } -bool OMF::load(const char *filename) { +bool OMF::load(const char *filename, uint32_t org) { handle = TheHandle::createFromFile(filename); if (!isOMF()) { Segment seg; seg.bytecnt = handle->length; seg.kind = 0; // code - seg.mapped = map.org; + seg.entry = 0; + seg.mapped = org; seg.data = handle; segments.push_back(seg); } else { @@ -40,13 +41,6 @@ bool OMF::load(const char *filename) { if (!relocSegments()) { return false; } - // add the first entry point - for (auto &s : segments) { - if ((s.kind & 0x1f) == 0) { // code - map.addEntry(s.mapped + s.entry); - break; - } - } } std::sort(segments.begin(), segments.end(), compareSegments); return true; diff --git a/src/omf.h b/src/omf.h index f8b01f2..37afe27 100644 --- a/src/omf.h +++ b/src/omf.h @@ -26,8 +26,8 @@ struct Segment { class OMF { public: - explicit OMF(const Map &map); - bool load(const char *filename); + explicit OMF(); + bool load(const char *filename, uint32_t org); std::vector get() const; private: @@ -39,6 +39,5 @@ class OMF { uint32_t value); Handle handle; - const Map ↦ std::vector segments; };