From 3a060e2c393db9604ac81a4817bb8fbe0110c260 Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Sun, 25 Dec 2016 00:04:17 -0500 Subject: [PATCH] new disassembler code. --- Makefile | 5 +- disassembler.cpp | 497 +++++++++++++++++++++++++++++++++++++++++++++++ disassembler.h | 62 ++++++ dumpobj.cpp | 58 +++--- 4 files changed, 595 insertions(+), 27 deletions(-) create mode 100644 disassembler.cpp create mode 100644 disassembler.h diff --git a/Makefile b/Makefile index 6c794af..0c8acdf 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,10 @@ CC=c++ -std=c++11 -g CXX=c++ -std=c++11 -g OBJS = dumpobj.o disasm.o -dumpobj : dumpobj.o disasm.o +dumpobj : dumpobj.o disasm.o disassembler.o + +disassembler.o : disassembler.cpp disassembler.h +dumpobj.o : dumpobj.cpp disassembler.h .PHONY: clean diff --git a/disassembler.cpp b/disassembler.cpp new file mode 100644 index 0000000..27b76ef --- /dev/null +++ b/disassembler.cpp @@ -0,0 +1,497 @@ +#include "disassembler.h" +#include + + +static constexpr const char opcodes[] = + "brkoracoporatsboraaslora" + "phporaaslphdtsboraaslora" + "bploraoraoratrboraaslora" + "clcorainctcstrboraaslora" + "jsrandjslandbitandroland" + "plpandrolpldbitandroland" + "bmiandandandbitandroland" + "secanddectscbitandroland" + "rtieorwdmeormvpeorlsreor" + "phaeorlsrphkjmpeorlsreor" + "bvceoreoreormvneorlsreor" + "clieorphytcdjmleorlsreor" + "rtsadcperadcstzadcroradc" + "plaadcrorrtljmpadcroradc" + "bvsadcadcadcstzadcroradc" + "seiadcplytdcjmpadcroradc" + "brastabrlstastystastxsta" + "deybittxaphbstystastxsta" + "bccstastastastystastxsta" + "tyastatxstxystzstastzsta" + "ldyldaldxldaldyldaldxlda" + "tayldataxplbldyldaldxlda" + "bcsldaldaldaldyldaldxlda" + "clvldatsxtyxldyldaldxlda" + "cpycmprepcmpcpycmpdeccmp" + "inycmpdexwaicpycmpdeccmp" + "bnecmpcmpcmppeicmpdeccmp" + "cldcmpphxstpjmlcmpdeccmp" + "cpxsbcsepsbccpxsbcincsbc" + "inxsbcnopxbacpxsbcincsbc" + "beqsbcsbcsbcpeasbcincsbc" + "sedsbcplxxcejsrsbcincsbc" + ; + +static constexpr const int mImplied = 0x0000; +static constexpr const int mImmediate = 0x1000; +static constexpr const int mAbsolute = 0x2000; +static constexpr const int mAbsoluteI = 0x3000; +static constexpr const int mAbsoluteIL = 0x4000; +static constexpr const int mAbsoluteLong = 0x5000; +static constexpr const int mDP = 0x6000; +static constexpr const int mDPI = 0x7000; +static constexpr const int mDPIL = 0x8000; +static constexpr const int mRelative = 0x9000; +static constexpr const int mBlockMove = 0xa000; + +static constexpr const int m_S = 0x0100; +static constexpr const int m_X = 0x0200; +static constexpr const int m_Y = 0x0400; + +static constexpr const int m_M = 0x0020; +static constexpr const int m_I = 0x0010; + +static constexpr const int modes[] = +{ + 1 | mAbsolute, // 00 brk #imm + 1 | mDPI | m_X, // 01 ora (dp,x) + 1 | mAbsolute, // 02 cop #imm + 1 | mDP | m_S, // 03 ora ,s + 1 | mDP, // 04 tsb abs + + 1 | mRelative, // 10 bpl + 1 | mDPI | m_Y, // 11 ora (dp),y + 1 | mDPI, // 12 ora (dp) + 1 | mDPI | m_S | m_Y, // 13 ora ,s,y + 1 | mDP, // 14 trb abs,x + + 2 | mAbsolute, // 20 jsr |abs + 1 | mDPI | m_X, // 21 and (dp,x) + 3 | mAbsoluteLong, // 22 jsl >abs + 1 | mDP | m_S, // 23 and ,s + 1 | mDP, // 24 bit abs + + 1 | mRelative, // 30 bmi + 1 | mDPI | m_Y, // 31 and (dp),y + 1 | mDPI, // 32 and (dp) + 1 | mDPI | m_S | m_Y, // 33 and ,s,y + 1 | mDP | m_X, // 34 bit dp,x + 1 | mDP | m_X, // 35 and dp,x + 1 | mDP | m_X, // 36 rol abs,x + + 0 | mImplied, // 40 rti + 1 | mDPI | m_X, // 41 eor (dp,x) + 1 | mAbsolute, // 42 wdm #imm + 1 | mDP | m_S, // 43 eor ,s + 2 | mBlockMove, // 44 mvp x,x + 1 | mDP, // 45 eor dp + 1 | mDP, // 46 lsr dp + 1 | mDPIL | m_Y, // 47 eor [dp],y + 0 | mImplied, // 48 pha + 1 | mImmediate | m_M, // 49 eor #imm + 0 | mImplied, // 4a lsr a + 0 | mImplied, // 4b phk + 2 | mAbsolute, // 4c jmp |abs + 2 | mAbsolute, // 4d eor |abs + 2 | mAbsolute, // 4e lsr |abs + 3 | mAbsoluteLong, // 4f eor >abs + + 1 | mRelative, // 50 bvc + 1 | mDPI | m_Y, // 51 eor (dp),y + 1 | mDPI, // 52 eor (dp) + 1 | mDPI | m_S | m_Y, // 53 eor ,s,y + 2 | mBlockMove, // 54 mvn x,x + 1 | mDP | m_X, // 55 eor dp,x + 1 | mDP | m_X, // 56 lsr dp,x + 1 | mDPIL | m_Y, // 57 eor [dp],y + 0 | mImplied, // 58 cli + 2 | mAbsolute | m_Y, // 59 eor |abs,y + 0 | mImplied, // 5a phy + 0 | mImplied, // 5b tcd + 3 | mAbsoluteLong, // 5c jml >abs + 2 | mAbsolute | m_X, // 5d eor |abs,x + 2 | mAbsolute | m_X, // 5e lsr |abs,x + 3 | mAbsoluteLong | m_X, // 5f eor >abs,x + + 0 | mImplied, // 60 rts + 1 | mDPI | m_X, // 61 adc (dp,x) + 2 | mRelative, // 62 per |abs + 1 | mDP | m_S, // 63 adc ,s + 1 | mDP, // 64 stz abs + + 1 | mRelative, // 70 bvs + 1 | mDPI | m_Y, // 71 adc (dp),y + 1 | mDPI, // 72 adc (dp) + 1 | mDPI | m_S | m_Y, // 73 adc ,s,y + 1 | mDP | m_X, // 74 stz dp,x + 1 | mDP | m_X, // 75 adc dp,x + 1 | mDP | m_X, // 76 ror dp,x + 1 | mDPIL | m_Y, // 77 adc [dp],y + 0 | mImplied, // 78 sei + 2 | mAbsolute | m_Y, // 79 adc |abs,y + 0 | mImplied, // 7a ply + 0 | mImplied, // 7b tdc + 2 | mAbsoluteI | m_X, // 7c jmp (abs,x) + 2 | mAbsolute | m_X, // 7d adc |abs,x + 2 | mAbsolute | m_X, // 7e ror |abs,x + 3 | mAbsoluteLong | m_X, // 7f adc >abs,x + + 1 | mRelative, // 80 bra + 1 | mDPI | m_X, // 81 sta (dp,x) + 2 | mRelative, // 82 brl |abs + 1 | mDP | m_S, // 83 sta ,s + 1 | mDP, // 84 sty abs + + 1 | mRelative, // 90 bcc + 1 | mDPI | m_Y, // 91 sta (dp),y + 1 | mDPI, // 92 sta (dp) + 1 | mDPI | m_S | m_Y, // 93 sta ,s,y + 1 | mDP | m_X, // 94 sty dp,x + 1 | mDP | m_X, // 95 sta dp,x + 1 | mDP | m_Y, // 96 stx dp,y + 1 | mDPIL | m_Y, // 97 sta [dp],y + 0 | mImplied, // 98 tya + 2 | mAbsolute | m_Y, // 99 sta |abs,y + 0 | mImplied, // 9a txs + 0 | mImplied, // 9b txy + 2 | mAbsolute, // 9c stz |abs + 2 | mAbsolute | m_X, // 9d sta |abs,x + 2 | mAbsolute | m_X, // 9e stz |abs,x + 3 | mAbsoluteLong | m_X, // 9f sta >abs,x + + 1 | mImmediate | m_I, // a0 ldy #imm + 1 | mDPI | m_X, // a1 lda (dp,x) + 1 | mImmediate | m_I, // a2 ldx #imm + 1 | mDP | m_S, // a3 lda ,s + 1 | mDP, // a4 ldy abs + + 1 | mRelative, // b0 bcs + 1 | mDPI | m_Y, // b1 lda (dp),y + 1 | mDPI, // b2 lda (dp) + 1 | mDPI | m_S | m_Y, // b3 lda ,s,y + 1 | mDP | m_X, // b4 ldy abs,x + + 1 | mImmediate | m_I, // c0 cpy #imm + 1 | mDPI | m_X, // c1 cmp (dp,x) + 1 | mImmediate, // c2 rep # + 1 | mDP | m_S, // c3 cmp ,s + 1 | mDP, // c4 cpy abs + + 1 | mRelative, // d0 bne + 1 | mDPI | m_Y, // d1 cmp (dp),y + 1 | mDPI, // d2 cmp (dp) + 1 | mDPI | m_S | m_Y, // d3 cmp ,s,y + 1 | mDPI, // d4 pei (dp) + 1 | mDP | m_X, // d5 cmp dp,x + 1 | mDP | m_X, // d6 dec dp,x + 1 | mDPIL | m_Y, // d7 cmp [dp],y + 0 | mImplied, // d8 cld + 2 | mAbsolute | m_Y, // d9 cmp |abs,y + 0 | mImplied, // da phx + 0 | mImplied, // db stp + 2 | mAbsoluteIL, // dc jml [abs] + 2 | mAbsolute | m_X, // dd cmp |abs,x + 2 | mAbsolute | m_X, // de dec |abs,x + 3 | mAbsoluteLong | m_X, // df cmp >abs,x + + 1 | mImmediate | m_I, // e0 cpx #imm + 1 | mDPI | m_X, // e1 sbc (dp,x) + 1 | mImmediate, // e2 sep #imm + 1 | mDP | m_S, // e3 sbc ,s + 1 | mDP, // e4 cpx abs + + 1 | mRelative, // f0 beq + 1 | mDPI | m_Y, // f1 sbc (dp),y + 1 | mDPI, // f2 sbc (dp) + 1 | mDPI | m_S | m_Y, // f3 sbc ,s,y + 2 | mAbsolute, // f4 pea |abs + 1 | mDP | m_X, // f5 sbc dp,x + 1 | mDP | m_X, // f6 inc dp,x + 1 | mDPIL | m_Y, // f7 sbc [dp],y + 0 | mImplied, // f8 sed + 2 | mAbsolute | m_Y, // f9 sbc |abs,y + 0 | mImplied, // fa plx + 0 | mImplied, // fb xce + 2 | mAbsoluteI, // fc jsr (abs) + 2 | mAbsolute | m_X, // fd sbc |abs,x + 2 | mAbsolute | m_X, // fe inc |abs,x + 3 | mAbsoluteLong | m_X, // ff sbc >abs,x + +}; + + +void disassembler::reset() { + _arg = 0; + _st = 0; +} +void disassembler::dump() { + printf("\tbyte\t"); + + for (unsigned i = 0; i < _st; ++i) { + if (i > 0) printf(", "); + printf("$%02x", _bytes[i]); + } + printf("\n"); + _pc += _st; + reset(); +} + +void disassembler::dump(const std::string &expr, unsigned size) { + if (_st) dump(); + + switch(size) { + case 1: printf("\tbyte\t"); break; + case 2: printf("\tword\t"); break; + case 3: printf("\tda\t"); break; + case 4: printf("\tlong\t"); break; + default: printf("\t%d bytes\t", size); + } + printf("%s\n", expr.c_str()); + _pc += _size; + reset(); +} + +void disassembler::process(const std::string &expr, unsigned size) { + if (_st != 1 || size != _size) { + dump(expr, size); + return; + } + + print(expr); +} + +void disassembler::flush() { + if (_st) dump(); +} + +void disassembler::process(uint8_t byte) { + _bytes[_st++] = byte; + if (_st == 1) { + _op = byte; + _mode = modes[_op]; + _size = _mode & 0x0f; + if (_mode & _flags & m_I) _size++; + if (_mode & _flags & m_M) _size++; + + if (!_size) { + print(); + } + return; + } + unsigned shift = (_st - 2) * 8; + _arg = _arg + (byte << shift); + if (_st <= _size) return; + + switch(_op) { + case 0xc2: // REP + _flags |= (_arg & 0x30); + break; + break; + case 0xe2: // SEP + _flags &= ~(_arg & 0x30); + break; + } + + // all done... now print it. + print(); +} + +void disassembler::print_prefix() { + + switch(_mode & 0xf000) { + case mImmediate: printf("\t#"); break; + case mDP: printf("\t<"); break; + case mDPI: printf("\t(<"); break; + case mDPIL: printf("\t[<"); break; + + case mRelative: + case mBlockMove: + printf("\t"); break; + + // cop, brk are treated as absolute. + case mAbsolute: + if (_size == 1) printf("\t"); + else printf("\t|"); + break; + case mAbsoluteLong: printf("\t>"); break; + case mAbsoluteI: printf("\t("); break; + } +} + +void disassembler::print_suffix() { + + switch(_mode & 0x0f00) { + case m_X: printf(",x"); break; + case m_Y: if (!(_mode & (mDPI|mDPIL))) printf(",y"); break; + case m_S: + case m_S | m_Y: + printf(",s"); break; + } + + switch(_mode & 0xf000) { + case mAbsoluteI: + case mDPI: + printf(")"); break; + case mAbsoluteIL: + case mDPIL: + printf("]"); break; + } + + // (xxx,s),y + // (xxx),y + // [xxx],y + switch(_mode & 0x0f00) { + case m_Y: if (_mode & (mDPI|mDPIL)) printf(",y"); break; + case m_S | m_Y: + printf(",y"); break; + } + +} + +void disassembler::print() { + printf("\t%.3s", &opcodes[_op * 3]); + + print_prefix(); + + // todo -- relative, block mode. + switch (_size) { + case 0: break; + case 1: printf("$%02x", _arg); break; + case 2: printf("$%04x", _arg); break; + case 3: printf("$%06x", _arg); break; + } + + print_suffix(); + // also print bytes? + printf("\n"); + _pc += _size + 1; + reset(); +} + +void disassembler::print(const std::string &expr) { + printf("\t%.3s", &opcodes[_op * 3]); + + print_prefix(); + + // todo -- relative, block mode. + printf("%s", expr.c_str()); + + print_suffix(); + // also print bytes? + printf("\n"); + _pc += _size + 1; + reset(); +} \ No newline at end of file diff --git a/disassembler.h b/disassembler.h new file mode 100644 index 0000000..e909f37 --- /dev/null +++ b/disassembler.h @@ -0,0 +1,62 @@ +#ifndef __disassembler_h__ +#define __disassembler_h__ + +#include +#include + +class disassembler { + + public: + disassembler() = default; + + void process(uint8_t byte); + void process(const std::string &expr, unsigned size); + + template + void process(Iter begin, Iter end) { while (begin != end) process(*begin++); } + + template + void process(const T &t) { process(std::begin(t), std::end(t)); } + + bool m() const { return _flags & 0x20; } + bool x() const { return _flags & 0x10; } + uint32_t pc() const { return _pc; } + + void set_m(bool x) { + if (x) _flags |= 0x20; + else _flags &= 0x20; + } + + void set_x(bool x) { + if (x) _flags |= 0x10; + else _flags &= 0x10; + } + + void set_pc(uint32_t pc) { pc = _pc; } + + void flush(); + + private: + + void reset(); + + void dump(); + void dump(const std::string &expr, unsigned size); + + void print(); + void print(const std::string &expr); + + void print_prefix(); + void print_suffix(); + + unsigned _st = 0; + uint8_t _op = 0; + unsigned _size = 0; + unsigned _mode = 0; + uint8_t _bytes[4]; + unsigned _flags = 0x30; + unsigned _pc = 0; + unsigned _arg = 0; +}; + +#endif \ No newline at end of file diff --git a/dumpobj.cpp b/dumpobj.cpp index eaf10d3..c59ab44 100644 --- a/dumpobj.cpp +++ b/dumpobj.cpp @@ -11,6 +11,7 @@ #include #include "obj816.h" +#include "disassembler.h" enum class endian { little = __ORDER_LITTLE_ENDIAN__, @@ -20,9 +21,9 @@ enum class endian { -extern unsigned init_flags(bool longM, bool longX); -void dump(const std::vector &data, unsigned &pc); -extern void disasm(const std::vector &data, unsigned &flags, unsigned &pc); +//extern unsigned init_flags(bool longM, bool longX); +//void dump(const std::vector &data, unsigned &pc); +//extern void disasm(const std::vector &data, unsigned &flags, unsigned &pc); @@ -181,8 +182,8 @@ void dump_obj(const char *name, int fd) uint8_t op = REC_END; - uint32_t pc = 0; - unsigned flags = init_flags(true, true); + + disassembler d; auto iter = data.begin(); while (iter != data.end()) { @@ -190,11 +191,8 @@ void dump_obj(const char *name, int fd) op = read_8(iter); if (op == 0) break; if (op < 0xf0) { - std::vector tmp(iter, iter + op); - + d.process(iter, iter + op); iter += op; - //printf("DATA: %02x\n", op); - disasm(tmp, flags, pc); continue; } @@ -206,8 +204,9 @@ void dump_obj(const char *name, int fd) uint8_t op = 0; bytes = read_8(iter); - printf("EXPR: %02x : ", bytes); - pc += bytes; + std::string tmp; + char buffer[32]; + for(;;) { op = read_8(iter); @@ -217,34 +216,35 @@ void dump_obj(const char *name, int fd) uint8_t section = read_8(iter); uint32_t offset = read_32(iter); if (section < sizeof(sections) / sizeof(sections[0])) - printf("%s+%04x ", sections[section], offset); + snprintf(buffer, sizeof(buffer), "%s+%04x ", sections[section], offset); else - printf("section %02x+%04x ", section, offset); + snprintf(buffer, sizeof(buffer), "section %02x+%04x ", section, offset); } + tmp.append(buffer); break; case OP_VAL: - printf("%04x ", read_32(iter)); + snprintf(buffer, sizeof(buffer), "$%04x ", read_32(iter)); + tmp.append(buffer); break; case OP_SYM: - printf("symbol %02x ", read_16(iter)); + snprintf(buffer, sizeof(buffer), "symbol %02x ", read_16(iter)); + tmp.append(buffer); break; - case OP_SHR: printf(">> "); break; - case OP_SHL: printf("<< "); break; - case OP_ADD: printf("+ "); break; - case OP_SUB: printf("- "); break; + case OP_SHR: tmp.append(">> "); break; + case OP_SHL: tmp.append("<< "); break; + case OP_ADD: tmp.append("+ "); break; + case OP_SUB: tmp.append("- "); break; default: - printf("\n"); errx(EX_DATAERR, "%s: unknown expression opcode %02x", name, op); } } - printf("\n"); + d.process(tmp, bytes); } break; case REC_DEBUG: { - static const char *debugs[] = { "D_C_FILE", "D_C_LINE", @@ -264,6 +264,7 @@ void dump_obj(const char *name, int fd) "D_LONGI_OFF", }; + d.flush(); uint16_t size = read_16(iter); //printf("\t;DEBUG\n"); @@ -271,19 +272,19 @@ void dump_obj(const char *name, int fd) uint8_t op = read_8(iter); switch(op) { case D_LONGA_ON: - flags |= 0x20; + d.set_m(true); printf("\tlonga\ton\n"); break; case D_LONGA_OFF: - flags &= ~0x20; + d.set_m(false); printf("\tlonga\toff\n"); break; case D_LONGI_ON: - flags |= 0x10; + d.set_x(true); printf("\tlongi\ton\n"); break; case D_LONGI_OFF: - flags &= ~0x10; + d.set_x(false); printf("\tlongi\toff\n"); break; case D_C_FILE: { @@ -316,11 +317,15 @@ void dump_obj(const char *name, int fd) break; case REC_SECT: { + + d.flush(); uint8_t section = read_8(iter); printf("\t.sect\t%d\n", section); break; } case REC_ORG: { + + d.flush(); uint32_t org = read_32(iter); printf("\t.org\t$%04x\n", org); break; @@ -330,6 +335,7 @@ void dump_obj(const char *name, int fd) case REC_RELEXP: case REC_LINE: default: + d.flush(); errx(EX_DATAERR, "%s: unknown opcode %02x", name, op); } }