From fff18e5866fef6a74c703c763d573a27f405928b Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Mon, 22 Mar 2021 00:21:57 +0100 Subject: [PATCH] Initial commit. --- PowerRomDasm.py | 267 +++++++++++++++++++++++++++++++++++ database/ROMDB_9FEB69B3.yaml | 47 ++++++ mactraps.py | 49 +++++++ 3 files changed, 363 insertions(+) create mode 100644 PowerRomDasm.py create mode 100644 database/ROMDB_9FEB69B3.yaml create mode 100644 mactraps.py diff --git a/PowerRomDasm.py b/PowerRomDasm.py new file mode 100644 index 0000000..1870c63 --- /dev/null +++ b/PowerRomDasm.py @@ -0,0 +1,267 @@ +''' + Power Macintosh ROM disassembler. + + Author: Max Poliakovski 2020-2021 + + Usage: + python3 PowerRomDasm.py --rom_path=[path to a Power Macintosh ROM dump] +''' +from argparse import ArgumentParser +from ruamel.yaml import YAML + +from capstone import * +from capstone.m68k import * + +import struct + +def bit_not(n, numbits=32): + return (1 << numbits) - 1 - n + +def align(n, m): + return (n + m - 1) & bit_not(m - 1) + +''' Capstone-based disassembler for 68k code.''' +class M68KDasm: + def __init__(self, cb): + self.rom_cb = cb + self.cse = Cs(CS_ARCH_M68K, CS_MODE_M68K_040) + self.cse.detail = True + self.labels = {} + + def dasm_single(self, address, code): + ''' Disassble single 68k instruction with the Capstone engine. ''' + + # detect A-Traps and disassemble them ourselves + if (code[0] & 0xF0) == 0xA0: + from mactraps import TRAP_TABLE + + trap_num = (code[0] << 8) | code[1] + if trap_num in TRAP_TABLE: + return ((TRAP_TABLE[trap_num], [], 2)) + else: + return (('dc.w', [hex(trap_num)], 2)) + + # disassemble non-trap instructions with Capstone + instrs = self.cse.disasm(code, address) + return next(instrs) + + def dasm_region(self, addr, size, data): + pp_dasm = [] + last_addr = addr + size + offset = 0 + + while addr < last_addr: + # prefetch binary data (2 >= bytes <= 10) for the next instruction + bin_length = min(last_addr - addr, 10) + bin_prefetch = bytearray() + for i in range(bin_length): + bin_prefetch.append(data[offset+i]) + + instr = self.dasm_single(addr, bin_prefetch) + + if not isinstance(instr, CsInsn): + pp_dasm.append({'addr': addr, 'mnem': instr[0], 'ops': instr[1]}) + addr += instr[2] + offset += instr[2] + continue + + op_list = instr.op_str.split(',') + #print(op_list) + ops = [] + for op in instr.operands: + #print(op.type) + if op.type == M68K_OP_MEM: + #print(op.address_mode) + if op.address_mode == M68K_AM_PCI_DISP: + ea = addr + op.mem.disp + 2 + flag,sym = self.rom_cb.get_symbol(ea) + if flag: + ops.append(sym) + else: + label = 'l_{:x}'.format(ea) + ops.append(label) + if ea >= addr and ea < (addr + size): + self.labels[ea] = label + op_list.pop(0) + elif op.address_mode == M68K_AM_PCI_INDEX_BASE_DISP: + ops.append(instr.op_str) + else: + ops.append(op_list.pop(0)) + elif op.type == M68K_OP_BR_DISP: + if op.address_mode == M68K_AM_BRANCH_DISPLACEMENT: + ea = addr + op.br_disp.disp + 2 + flag,sym = self.rom_cb.get_symbol(ea) + if flag: + ops.append(sym) + else: + label = 'l_{:x}'.format(ea) + ops.append(label) + if ea >= addr and ea < (addr + size): + self.labels[ea] = label + op_list.pop(0) + else: + ops.append(op_list.pop(0)) + else: + ops.append(op_list.pop(0)) + pp_dasm.append({'addr': addr, 'mnem': instr.mnemonic, 'ops': ops}) + addr += instr.size + offset += instr.size + #print(ops) + #print(self.labels) + for instr in pp_dasm: + if instr['addr'] in self.labels: + print('\n' + self.labels[instr['addr']] + ':') + print(hex(instr['addr']).ljust(15), end='') + print(instr['mnem'], '\t', end='') + print(','.join(instr['ops'])) + + def dasm_regions(self, start_addr, size, data, regions): + self.labels = {} + for reg in regions: + if reg[2] == 'align': + print(hex(start_addr + reg[0]).ljust(15), end='') + print('align\t' + str(reg[3])) + elif reg[2] == 'code': + reg_size = reg[1] - reg[0] + 1 + self.dasm_region(start_addr + reg[0], reg_size, + data[reg[3]:reg[3]+reg_size]) + else: + print("Unknown region type " + reg[2]) + + +class ROMDisassembler: + def __init__(self, rom_data, rom_db): + self.rom_data = rom_data + self.rom_db = rom_db + self.start_addr = rom_db['main_info']['phys_addr'] + self.m68k_dasm = M68KDasm(self) + + def get_symbol(self, addr): + offset = addr - self.start_addr + if offset in self.rom_db['annot_items']: + return (True, self.rom_db['annot_items'][offset]['label']) + else: + return (False, '') + + def fmt_single_entry(self, format, size, offset): + print(hex(self.start_addr + offset).ljust(15), end='') + if format == 'hex': + if size == 1: + print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1])) + elif size == 2: + print("dc.w\t0x%X" % struct.unpack('>H', self.rom_data[offset:offset+2])) + elif size == 4: + print("dc.l\t0x%X" % struct.unpack('>I', self.rom_data[offset:offset+4])) + else: + print("INVALID SIZE!") + elif format == 'offset': + dest_offset = struct.unpack('>I', self.rom_data[offset:offset+4])[0] + if dest_offset in self.rom_db['annot_items']: + symbol = self.rom_db['annot_items'][dest_offset]['label'] + print("dc.l\t" + symbol + '-BaseOfRom') + else: + print("dc.l\t0x%X" % dest_offset) + + def parse_subregs(self, start, size, subregs): + #print("This entry has subregions", subregs) + regs = [] + reg_start = start + for reg in subregs: + if reg['type'] != 'align': + print("Unknown subregion type " + reg['type']) + return regs + offset = reg['offset'] + if offset < reg_start or offset >= (start + size): + print("Invalid subregion offset: 0x%X" % offset) + return regs + regs.append((reg_start, offset - 1, 'code', reg_start - start)) + boundary = reg['boundary'] + reg_end = align(offset, boundary) + regs.append((offset, reg_end - 1, 'align', boundary)) + reg_start = reg_end + if reg_start < (start + size): + regs.append((reg_start, (start + size) - 1, 'code', reg_start - start)) + #print(regs) + return regs + + + def fmt_entry(self, entry, offset): + print("") + print((entry['label'] + ':').ljust(15)) + + if entry['type'] == 'array': + count = entry['size'] // entry['elsize'] + for i in range(count): + self.fmt_single_entry(entry['format'], entry['elsize'], offset) + offset += entry['elsize'] + elif entry['type'] == 'int': + self.fmt_single_entry(entry['format'], entry['size'], offset) + elif entry['type'] == 'code': + size = entry['size'] + if entry['arch'] == '68k': + if 'subregs' in entry: + regs = self.parse_subregs(offset, size, entry['subregs']) + else: + regs = [(offset, offset + size - 1, 'code', 0)] + self.m68k_dasm.dasm_regions(self.start_addr, size, + self.rom_data[offset:offset+size], regs) + elif entry['arch'] == 'ppc': + print("PPC disassembler not implemented yet") + else: + print("Unknown code region architecture " + entry['arch']) + + def dasm_region(self, start, end): + offset = start + while offset < end: + if offset in self.rom_db['annot_items']: + entry = self.rom_db['annot_items'][offset] + self.fmt_entry(entry, offset) + offset += entry['size'] + else: + print(hex(self.start_addr + offset).ljust(15), end='') + print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1])) + offset += 1 + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument('--rom_path', type=str, + dest='rom_path', + help='path to a PowerMacintosh ROM file to process', + metavar='ROM_PATH', required=True) + parser.add_argument('--start', type=lambda x: int(x,0), + dest='start_offs', default=0, + help='offset to the start of the region to disassemble', + required=False, + ) + parser.add_argument('--end', type=lambda x: int(x,0), + dest='end_offs', default=0x500, + help='offset to the end of the region to disassemble', + required=False, + ) + + opts = parser.parse_args() + + with open(opts.rom_path, 'rb') as rom_file: + rom_file.seek(0, 2) + rom_size = rom_file.tell() + if rom_size != (4 * 1024 * 1024): + print("Invalid ROM file size %d (expected 4 MB)" % rom_size) + + # just load the whole ROM image into memory + rom_file.seek(0, 0) + rom_data = rom_file.read() + + check_sum = struct.unpack('>I', rom_data[0:4])[0] + print("ROM Checksum: %X" % check_sum) + + db_name = 'ROMDB_' + '{:x}'.format(int(check_sum)).upper() + '.yaml' + + with open('database/' + db_name, 'rb') as db_file: + yaml = YAML() + annot_db = yaml.load(db_file) + + print(annot_db['main_info']['name']) + + rdasm = ROMDisassembler(rom_data, annot_db) + rdasm.dasm_region(opts.start_offs, opts.end_offs) diff --git a/database/ROMDB_9FEB69B3.yaml b/database/ROMDB_9FEB69B3.yaml new file mode 100644 index 0000000..d8d6ec8 --- /dev/null +++ b/database/ROMDB_9FEB69B3.yaml @@ -0,0 +1,47 @@ +main_info: + short_cksum: 0x9FEB69B3 + name: "Power Macintosh 6100, 7100 and 8100 ROM file" + phys_addr: 0x40800000 # starting address of this ROM + +annot_items: + 0: {type: int, format: hex, size: 4, label: BaseOfRom, comment: ROM checksum} + 4: {type: int, format: offset, size: 4, label: StartPC} + 8: {type: int, format: hex, size: 1, label: MachineType} + 9: {type: int, format: hex, size: 1, label: ROMVersion} + 0xA: {type: code, arch: 68k, size: 4, label: JStartBoot} + 0xE: {type: code, arch: 68k, size: 4, label: BadDisk} + 0x12: {type: int, format: hex, size: 2, label: ROMRelease} + 0x14: {type: int, format: hex, size: 2, label: PatchFlags} + 0x16: {type: int, format: offset, size: 4, label: ForeignOSVecT} + 0x1A: {type: int, format: offset, size: 4, label: RomRsrc} + 0x1E: {type: code, arch: 68k, size: 4, label: Eject} + 0x22: {type: int, format: offset, size: 4, label: DispOff} + 0x26: {type: code, arch: 68k, size: 4, label: Critical} + 0x2A: {type: code, arch: 68k, size: 4, label: ResetEntry} + 0x2E: {type: int, format: hex, size: 1, label: RomLoc} + 0x30: {type: array, format: hex, size: 16, elsize: 4, label: ROMCksum128} + 0x40: {type: int, format: hex, size: 4, label: ROMSize68k} + 0x44: {type: int, format: offset, size: 4, label: EraseMyIconOffs} + 0x48: {type: int, format: offset, size: 4, label: InitSys7ToolboxOffs} + 0x4E: {type: array, format: offset, size: 28, elsize: 4, label: ForeignOS} + 0x4C: {type: int, format: hex, size: 2, label: ROMSubRelease} + 0x6A: {type: code, arch: 68k, size: 6, label: GoofyDoEject} + 0x70: {type: int, format: hex, size: 4, label: TCOff} + 0x74: {type: code, arch: 68k, size: 0x70, label: StartBoot} + 0xE4: + type: code + arch: 68k + size: 0x202 + label: StartInit1 + subregs: + - type: align + offset: 0x1f2 + boundary: 16 + - type: align + offset: 0x236 + boundary: 16 + 0x2E6: {type: code, arch: 68k, size: 0x1C2, label: BootRetry} + 0x4A8: {type: code, arch: 68k, size: 0x4E, label: WhichCPU} + 0x4F6: {type: code, arch: 68k, size: 0xC, label: WhichBoard} + 0x502: {type: code, arch: 68k, size: 0x100, label: SetUpTimeK} + 0x3A24A: {type: code, arch: 68k, size 4, label: DoEject} diff --git a/mactraps.py b/mactraps.py new file mode 100644 index 0000000..2750bd1 --- /dev/null +++ b/mactraps.py @@ -0,0 +1,49 @@ +''' + Provides a mapping from 68k traps to Macintosh traps names. + + Author: Max Poliakovski 2020-2021 +''' +TRAP_TABLE = { + # trapN trap name + 0xA01B : "_SetZone", + 0xA01F : "_DisposePtr", + 0xA029 : "_HLock", + 0xA02D : "_SetApplLimit", + 0xA02E : "_BlockMove", + 0xA047 : "_SetTrapAddress", + 0xA04A : "_HNoPurge", + 0xA055 : "_StripAddress", + 0xA057 : "_SetAppBase", + 0xA064 : "_MoveHHi", + 0xA069 : "_HGetState", + 0xA06A : "_HSetState", + 0xA06C : "_InitFS", + 0xA0AD : "_GestaltDispatch", + 0xA0BD : "_CacheFlush", + 0xA11A : "_GetZone", + 0xA11E : "_NewPtr", + 0xA122 : "_NewHandle", + 0xA128 : "_RecoverHandle", + 0xA146 : "_GetTrapAddress", + 0xA162 : "_PurgeSpace", + 0xA1AD : "_Gestalt", + 0xA025 : "_GetHandleSize", + 0xA31E : "_NewPtrClear", + 0xA322 : "_NewHandleClear", + 0xA346 : "_GetOSTrapAddress", + 0xA51E : "_NewPtrSys", + 0xA522 : "_NewHandleSys", + 0xA71E : "_NewPtrSysClear", + 0xA722 : "_NewHandleSysClear", + 0xA746 : "_GetToolTrapAddress", + 0xA81F : "_Get1Resource", + 0xA820 : "_Get1NamedResource", + 0xA895 : "_ShutDown", + 0xA96E : "_Dequeue", + 0xA994 : "_CurResFile", + 0xA99B : "_SetResLoad", + 0xA9A0 : "_GetResource", + 0xA9A2 : "_LoadResource", + 0xA9AF : "_ResError", + 0xA9C9 : "_SysError", +}