''' Power Macintosh ROM disassembler. Author: Max Poliakovski 2020-2021 Usage: python3 PowerRomDasm.py --rom_path=[path to a Power Macintosh ROM dump] ''' from argparse import ArgumentParser from ruamel.yaml import YAML from capstone import * from capstone.m68k import * import os import struct def bit_not(n, numbits=32): return (1 << numbits) - 1 - n def align(n, m): return (n + m - 1) & bit_not(m - 1) ''' Capstone-based disassembler for 68k code.''' class M68KDasm: def __init__(self, cb): self.rom_cb = cb self.cse = Cs(CS_ARCH_M68K, CS_MODE_M68K_040) self.cse.detail = True self.labels = {} def dasm_single(self, address, code): ''' Disassble single 68k instruction with the Capstone engine. ''' # detect A-Traps and disassemble them ourselves if (code[0] & 0xF0) == 0xA0: from mactraps import TRAP_TABLE trap_num = (code[0] << 8) | code[1] if trap_num in TRAP_TABLE: return ((TRAP_TABLE[trap_num], [], 2)) else: return (('dc.w', [hex(trap_num)], 2)) # disassemble non-trap instructions with Capstone instrs = self.cse.disasm(code, address) return next(instrs) def dasm_region(self, addr, size, data): pp_dasm = [] last_addr = addr + size offset = 0 while addr < last_addr: # prefetch binary data (2 >= bytes <= 10) for the next instruction bin_length = min(last_addr - addr, 10) bin_prefetch = bytearray() for i in range(bin_length): bin_prefetch.append(data[offset+i]) instr = self.dasm_single(addr, bin_prefetch) if not isinstance(instr, CsInsn): pp_dasm.append({'addr': addr, 'mnem': instr[0], 'ops': instr[1]}) addr += instr[2] offset += instr[2] continue op_list = instr.op_str.split(',') #print(op_list) ops = [] for op in instr.operands: #print(op.type) if op.type == M68K_OP_MEM: #print(op.address_mode) if op.address_mode == M68K_AM_PCI_DISP: ea = addr + op.mem.disp + 2 flag,sym = self.rom_cb.get_symbol(ea) if flag: ops.append(sym) else: label = 'l_{:x}'.format(ea) ops.append(label) if ea not in self.labels: self.labels[ea] = label # discard current op because we've just replaced it op_list.pop(0) elif op.address_mode == M68K_AM_PCI_INDEX_BASE_DISP: ops.append(instr.op_str) else: ops.append(op_list.pop(0)) elif op.type == M68K_OP_BR_DISP: if op.address_mode == M68K_AM_BRANCH_DISPLACEMENT: ea = addr + op.br_disp.disp + 2 flag,sym = self.rom_cb.get_symbol(ea) if flag: ops.append(sym) else: label = 'l_{:x}'.format(ea) ops.append(label) if ea not in self.labels: self.labels[ea] = label # discard current op because we've just replaced it op_list.pop(0) else: ops.append(op_list.pop(0)) else: ops.append(op_list.pop(0)) pp_dasm.append({'addr': addr, 'mnem': instr.mnemonic, 'ops': ops}) addr += instr.size offset += instr.size #print(ops) #print(self.labels) for instr in pp_dasm: if instr['addr'] in self.labels: print('\n' + self.labels[instr['addr']] + ':') print(hex(instr['addr']).ljust(15), end='') print(instr['mnem'], '\t', end='') print(','.join(instr['ops'])) class ROMDisassembler: def __init__(self, rom_data, rom_db): self.rom_data = rom_data self.rom_db = rom_db self.start_addr = rom_db['main_info']['phys_addr'] self.m68k_dasm = M68KDasm(self) def get_symbol(self, addr): offset = addr - self.start_addr if offset in self.rom_db['annot_items']: return (True, self.rom_db['annot_items'][offset]['label']) else: return (False, '') def fmt_single_entry(self, format, size, offset): print(hex(self.start_addr + offset).ljust(15), end='') if format == 'hex': if size == 1: print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1])) elif size == 2: print("dc.w\t0x%X" % struct.unpack('>H', self.rom_data[offset:offset+2])) elif size == 4: print("dc.l\t0x%X" % struct.unpack('>I', self.rom_data[offset:offset+4])) else: print("INVALID SIZE!") elif format == 'dec': if size == 1: print("dc.b\t%d" % struct.unpack('>B', self.rom_data[offset:offset+1])) elif size == 2: print("dc.w\t%d" % struct.unpack('>H', self.rom_data[offset:offset+2])) elif size == 4: print("dc.l\t%d" % struct.unpack('>I', self.rom_data[offset:offset+4])) else: print("INVALID SIZE!") elif format == 'offset': dest_offset = struct.unpack('>I', self.rom_data[offset:offset+4])[0] if dest_offset in self.rom_db['annot_items']: symbol = self.rom_db['annot_items'][dest_offset]['label'] print("dc.l\t" + symbol + '-BaseOfRom') else: print("dc.l\t0x%X" % dest_offset) def dasm_regions(self, start_addr, size, data, regions): self.labels = {} for reg in regions: if reg[2] == 'align': print(hex(start_addr + reg[0]).ljust(15), end='') print('align\t' + str(reg[3])) elif reg[2] == 'code': reg_size = reg[1] - reg[0] + 1 self.m68k_dasm.dasm_region(start_addr + reg[0], reg_size, data[reg[3]:reg[3]+reg_size]) elif reg[2] == 'int': print("") print((reg[1] + ':').ljust(15)) self.fmt_single_entry(reg[3], reg[4], reg[0]) else: print("Unknown region type " + reg[2]) def parse_subregs(self, start, size, subregs): #print("This entry has subregions", subregs) regs = [] reg_start = start for reg in subregs: if reg['type'] == 'align': offset = reg['offset'] if offset < reg_start or offset >= (start + size): print("Invalid subregion offset: 0x%X" % offset) return regs regs.append((reg_start, offset - 1, 'code', reg_start - start)) boundary = reg['boundary'] reg_end = align(offset, boundary) regs.append((offset, reg_end - 1, 'align', boundary)) reg_start = reg_end print("reg_start=%d" % reg_start) elif reg['type'] == 'int': offset = reg['offset'] if offset < reg_start or offset >= (start + size): print("Invalid subregion offset: 0x%X" % offset) return regs regs.append((reg_start, offset - 1, 'code', reg_start - start)) reg_size = reg['size'] if 'label' in reg: label = reg['label'] else: label = 'l_{:x}'.format(self.start_addr + offset) self.m68k_dasm.labels[self.start_addr + offset] = label regs.append((offset, label, 'int', reg['format'], reg_size)) reg_start = offset + reg_size else: print("Unknown subregion type " + reg['type']) return regs if reg_start < (start + size): regs.append((reg_start, (start + size) - 1, 'code', reg_start - start)) #print(regs) return regs def fmt_array(self, entry, offset): count = entry['size'] // entry['elsize'] for i in range(count): self.fmt_single_entry(entry['format'], entry['elsize'], offset) offset += entry['elsize'] def parse_struct(self, fields, offset): size_acc = 0 for field in fields: if field['type'] == 'int': self.fmt_single_entry(field['format'], field['size'], offset) elif field['type'] == 'array': self.fmt_array(field, offset) else: print("Unknown struct field type %s" % field['type']) offset += field['size'] size_acc += field['size'] return size_acc def fmt_entry(self, entry, offset): print("") if entry['type'] == 'align': start = offset end = align(start, entry['boundary']) print(hex(self.start_addr + start).ljust(15), end='') print('align\t' + str(entry['boundary'])) return end - start print((entry['label'] + ':').ljust(15)) if entry['type'] == 'array': self.fmt_array(entry, offset) elif entry['type'] == 'int': self.fmt_single_entry(entry['format'], entry['size'], offset) elif entry['type'] == 'code': size = entry['size'] if entry['arch'] == '68k': if 'subregs' in entry: regs = self.parse_subregs(offset, size, entry['subregs']) else: regs = [(offset, offset + size - 1, 'code', 0)] self.dasm_regions(self.start_addr, size, self.rom_data[offset:offset+size], regs) elif entry['arch'] == 'ppc': print("PPC disassembler not implemented yet") else: print("Unknown code region architecture " + entry['arch']) elif entry['type'] == 'fixlenstr': # fixed-length string print(hex(self.start_addr + offset).ljust(15), end='') str_len = entry['size'] fmt_str = '%is' % str_len print('"%s"' % struct.unpack(fmt_str, self.rom_data[offset:offset+str_len])[0].decode('mac_roman')) elif entry['type'] == 'struct': return self.parse_struct(entry['fields'], offset) return entry['size'] def dasm_region(self, start, end): offset = start while offset < end: if offset in self.rom_db['annot_items']: entry = self.rom_db['annot_items'][offset] size = self.fmt_entry(entry, offset) offset += size else: print(hex(self.start_addr + offset).ljust(15), end='') print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1])) offset += 1 if __name__ == "__main__": parser = ArgumentParser() parser.add_argument('--rom_path', type=str, dest='rom_path', help='path to a PowerMacintosh ROM file to process', metavar='ROM_PATH', required=True) parser.add_argument('--start', type=lambda x: int(x,0), dest='start_offs', default=0, help='offset to the start of the region to disassemble', required=False, ) parser.add_argument('--end', type=lambda x: int(x,0), dest='end_offs', default=0x500, help='offset to the end of the region to disassemble', required=False, ) opts = parser.parse_args() with open(opts.rom_path, 'rb') as rom_file: rom_file.seek(0, 2) rom_size = rom_file.tell() if rom_size != (4 * 1024 * 1024): print("Invalid ROM file size %d (expected 4 MB)" % rom_size) # just load the whole ROM image into memory rom_file.seek(0, 0) rom_data = rom_file.read() check_sum = struct.unpack('>I', rom_data[0:4])[0] print("ROM Checksum: %X" % check_sum) my_path = os.path.dirname(os.path.realpath(__file__)) db_name = 'ROMDB_' + '{:x}'.format(int(check_sum)).upper() + '.yaml' with open(my_path + '/database/' + db_name, 'rb') as db_file: yaml = YAML() annot_db = yaml.load(db_file) print(annot_db['main_info']['name']) rdasm = ROMDisassembler(rom_data, annot_db) rdasm.dasm_region(opts.start_offs, opts.end_offs)