mirror of
https://github.com/maximumspatium/PowerRomDasm.git
synced 2024-12-22 13:30:31 +00:00
330 lines
13 KiB
Python
330 lines
13 KiB
Python
'''
|
|
Power Macintosh ROM disassembler.
|
|
|
|
Author: Max Poliakovski 2020-2021
|
|
|
|
Usage:
|
|
python3 PowerRomDasm.py --rom_path=[path to a Power Macintosh ROM dump]
|
|
'''
|
|
from argparse import ArgumentParser
|
|
from ruamel.yaml import YAML
|
|
|
|
from capstone import *
|
|
from capstone.m68k import *
|
|
|
|
import struct
|
|
|
|
def bit_not(n, numbits=32):
|
|
return (1 << numbits) - 1 - n
|
|
|
|
def align(n, m):
|
|
return (n + m - 1) & bit_not(m - 1)
|
|
|
|
''' Capstone-based disassembler for 68k code.'''
|
|
class M68KDasm:
|
|
def __init__(self, cb):
|
|
self.rom_cb = cb
|
|
self.cse = Cs(CS_ARCH_M68K, CS_MODE_M68K_040)
|
|
self.cse.detail = True
|
|
self.labels = {}
|
|
|
|
def dasm_single(self, address, code):
|
|
''' Disassble single 68k instruction with the Capstone engine. '''
|
|
|
|
# detect A-Traps and disassemble them ourselves
|
|
if (code[0] & 0xF0) == 0xA0:
|
|
from mactraps import TRAP_TABLE
|
|
|
|
trap_num = (code[0] << 8) | code[1]
|
|
if trap_num in TRAP_TABLE:
|
|
return ((TRAP_TABLE[trap_num], [], 2))
|
|
else:
|
|
return (('dc.w', [hex(trap_num)], 2))
|
|
|
|
# disassemble non-trap instructions with Capstone
|
|
instrs = self.cse.disasm(code, address)
|
|
return next(instrs)
|
|
|
|
def dasm_region(self, addr, size, data):
|
|
pp_dasm = []
|
|
last_addr = addr + size
|
|
offset = 0
|
|
|
|
while addr < last_addr:
|
|
# prefetch binary data (2 >= bytes <= 10) for the next instruction
|
|
bin_length = min(last_addr - addr, 10)
|
|
bin_prefetch = bytearray()
|
|
for i in range(bin_length):
|
|
bin_prefetch.append(data[offset+i])
|
|
|
|
instr = self.dasm_single(addr, bin_prefetch)
|
|
|
|
if not isinstance(instr, CsInsn):
|
|
pp_dasm.append({'addr': addr, 'mnem': instr[0], 'ops': instr[1]})
|
|
addr += instr[2]
|
|
offset += instr[2]
|
|
continue
|
|
|
|
op_list = instr.op_str.split(',')
|
|
#print(op_list)
|
|
ops = []
|
|
for op in instr.operands:
|
|
#print(op.type)
|
|
if op.type == M68K_OP_MEM:
|
|
#print(op.address_mode)
|
|
if op.address_mode == M68K_AM_PCI_DISP:
|
|
ea = addr + op.mem.disp + 2
|
|
flag,sym = self.rom_cb.get_symbol(ea)
|
|
if flag:
|
|
ops.append(sym)
|
|
else:
|
|
label = 'l_{:x}'.format(ea)
|
|
ops.append(label)
|
|
if ea not in self.labels:
|
|
self.labels[ea] = label
|
|
# discard current op because we've just replaced it
|
|
op_list.pop(0)
|
|
elif op.address_mode == M68K_AM_PCI_INDEX_BASE_DISP:
|
|
ops.append(instr.op_str)
|
|
else:
|
|
ops.append(op_list.pop(0))
|
|
elif op.type == M68K_OP_BR_DISP:
|
|
if op.address_mode == M68K_AM_BRANCH_DISPLACEMENT:
|
|
ea = addr + op.br_disp.disp + 2
|
|
flag,sym = self.rom_cb.get_symbol(ea)
|
|
if flag:
|
|
ops.append(sym)
|
|
else:
|
|
label = 'l_{:x}'.format(ea)
|
|
ops.append(label)
|
|
if ea not in self.labels:
|
|
self.labels[ea] = label
|
|
# discard current op because we've just replaced it
|
|
op_list.pop(0)
|
|
else:
|
|
ops.append(op_list.pop(0))
|
|
else:
|
|
ops.append(op_list.pop(0))
|
|
pp_dasm.append({'addr': addr, 'mnem': instr.mnemonic, 'ops': ops})
|
|
addr += instr.size
|
|
offset += instr.size
|
|
#print(ops)
|
|
#print(self.labels)
|
|
for instr in pp_dasm:
|
|
if instr['addr'] in self.labels:
|
|
print('\n' + self.labels[instr['addr']] + ':')
|
|
print(hex(instr['addr']).ljust(15), end='')
|
|
print(instr['mnem'], '\t', end='')
|
|
print(','.join(instr['ops']))
|
|
|
|
|
|
class ROMDisassembler:
|
|
def __init__(self, rom_data, rom_db):
|
|
self.rom_data = rom_data
|
|
self.rom_db = rom_db
|
|
self.start_addr = rom_db['main_info']['phys_addr']
|
|
self.m68k_dasm = M68KDasm(self)
|
|
|
|
def get_symbol(self, addr):
|
|
offset = addr - self.start_addr
|
|
if offset in self.rom_db['annot_items']:
|
|
return (True, self.rom_db['annot_items'][offset]['label'])
|
|
else:
|
|
return (False, '')
|
|
|
|
def fmt_single_entry(self, format, size, offset):
|
|
print(hex(self.start_addr + offset).ljust(15), end='')
|
|
if format == 'hex':
|
|
if size == 1:
|
|
print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1]))
|
|
elif size == 2:
|
|
print("dc.w\t0x%X" % struct.unpack('>H', self.rom_data[offset:offset+2]))
|
|
elif size == 4:
|
|
print("dc.l\t0x%X" % struct.unpack('>I', self.rom_data[offset:offset+4]))
|
|
else:
|
|
print("INVALID SIZE!")
|
|
elif format == 'dec':
|
|
if size == 1:
|
|
print("dc.b\t%d" % struct.unpack('>B', self.rom_data[offset:offset+1]))
|
|
elif size == 2:
|
|
print("dc.w\t%d" % struct.unpack('>H', self.rom_data[offset:offset+2]))
|
|
elif size == 4:
|
|
print("dc.l\t%d" % struct.unpack('>I', self.rom_data[offset:offset+4]))
|
|
else:
|
|
print("INVALID SIZE!")
|
|
elif format == 'offset':
|
|
dest_offset = struct.unpack('>I', self.rom_data[offset:offset+4])[0]
|
|
if dest_offset in self.rom_db['annot_items']:
|
|
symbol = self.rom_db['annot_items'][dest_offset]['label']
|
|
print("dc.l\t" + symbol + '-BaseOfRom')
|
|
else:
|
|
print("dc.l\t0x%X" % dest_offset)
|
|
|
|
def dasm_regions(self, start_addr, size, data, regions):
|
|
self.labels = {}
|
|
for reg in regions:
|
|
if reg[2] == 'align':
|
|
print(hex(start_addr + reg[0]).ljust(15), end='')
|
|
print('align\t' + str(reg[3]))
|
|
elif reg[2] == 'code':
|
|
reg_size = reg[1] - reg[0] + 1
|
|
self.m68k_dasm.dasm_region(start_addr + reg[0], reg_size,
|
|
data[reg[3]:reg[3]+reg_size])
|
|
elif reg[2] == 'int':
|
|
print("")
|
|
print((reg[1] + ':').ljust(15))
|
|
self.fmt_single_entry(reg[3], reg[4], reg[0])
|
|
else:
|
|
print("Unknown region type " + reg[2])
|
|
|
|
def parse_subregs(self, start, size, subregs):
|
|
#print("This entry has subregions", subregs)
|
|
regs = []
|
|
reg_start = start
|
|
for reg in subregs:
|
|
if reg['type'] == 'align':
|
|
offset = reg['offset']
|
|
if offset < reg_start or offset >= (start + size):
|
|
print("Invalid subregion offset: 0x%X" % offset)
|
|
return regs
|
|
regs.append((reg_start, offset - 1, 'code', reg_start - start))
|
|
boundary = reg['boundary']
|
|
reg_end = align(offset, boundary)
|
|
regs.append((offset, reg_end - 1, 'align', boundary))
|
|
reg_start = reg_end
|
|
print("reg_start=%d" % reg_start)
|
|
elif reg['type'] == 'int':
|
|
offset = reg['offset']
|
|
if offset < reg_start or offset >= (start + size):
|
|
print("Invalid subregion offset: 0x%X" % offset)
|
|
return regs
|
|
regs.append((reg_start, offset - 1, 'code', reg_start - start))
|
|
reg_size = reg['size']
|
|
if 'label' in reg:
|
|
label = reg['label']
|
|
else:
|
|
label = 'l_{:x}'.format(self.start_addr + offset)
|
|
self.m68k_dasm.labels[self.start_addr + offset] = label
|
|
regs.append((offset, label, 'int', reg['format'], reg_size))
|
|
reg_start = offset + reg_size
|
|
else:
|
|
print("Unknown subregion type " + reg['type'])
|
|
return regs
|
|
if reg_start < (start + size):
|
|
regs.append((reg_start, (start + size) - 1, 'code', reg_start - start))
|
|
#print(regs)
|
|
return regs
|
|
|
|
def fmt_array(self, entry, offset):
|
|
count = entry['size'] // entry['elsize']
|
|
for i in range(count):
|
|
self.fmt_single_entry(entry['format'], entry['elsize'], offset)
|
|
offset += entry['elsize']
|
|
|
|
def parse_struct(self, fields, offset):
|
|
size_acc = 0
|
|
for field in fields:
|
|
if field['type'] == 'int':
|
|
self.fmt_single_entry(field['format'], field['size'], offset)
|
|
elif field['type'] == 'array':
|
|
self.fmt_array(field, offset)
|
|
else:
|
|
print("Unknown struct field type %s" % field['type'])
|
|
offset += field['size']
|
|
size_acc += field['size']
|
|
return size_acc
|
|
|
|
def fmt_entry(self, entry, offset):
|
|
print("")
|
|
|
|
if entry['type'] == 'align':
|
|
start = offset
|
|
end = align(start, entry['boundary'])
|
|
print(hex(self.start_addr + start).ljust(15), end='')
|
|
print('align\t' + str(entry['boundary']))
|
|
return end - start
|
|
|
|
print((entry['label'] + ':').ljust(15))
|
|
|
|
if entry['type'] == 'array':
|
|
self.fmt_array(entry, offset)
|
|
elif entry['type'] == 'int':
|
|
self.fmt_single_entry(entry['format'], entry['size'], offset)
|
|
elif entry['type'] == 'code':
|
|
size = entry['size']
|
|
if entry['arch'] == '68k':
|
|
if 'subregs' in entry:
|
|
regs = self.parse_subregs(offset, size, entry['subregs'])
|
|
else:
|
|
regs = [(offset, offset + size - 1, 'code', 0)]
|
|
self.dasm_regions(self.start_addr, size, self.rom_data[offset:offset+size], regs)
|
|
elif entry['arch'] == 'ppc':
|
|
print("PPC disassembler not implemented yet")
|
|
else:
|
|
print("Unknown code region architecture " + entry['arch'])
|
|
elif entry['type'] == 'fixlenstr': # fixed-length string
|
|
print(hex(self.start_addr + offset).ljust(15), end='')
|
|
str_len = entry['size']
|
|
fmt_str = '%is' % str_len
|
|
print('"%s"' % struct.unpack(fmt_str, self.rom_data[offset:offset+str_len])[0].decode('mac_roman'))
|
|
elif entry['type'] == 'struct':
|
|
return self.parse_struct(entry['fields'], offset)
|
|
|
|
return entry['size']
|
|
|
|
def dasm_region(self, start, end):
|
|
offset = start
|
|
while offset < end:
|
|
if offset in self.rom_db['annot_items']:
|
|
entry = self.rom_db['annot_items'][offset]
|
|
size = self.fmt_entry(entry, offset)
|
|
offset += size
|
|
else:
|
|
print(hex(self.start_addr + offset).ljust(15), end='')
|
|
print("dc.b\t0x%X" % struct.unpack('>B', self.rom_data[offset:offset+1]))
|
|
offset += 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser()
|
|
parser.add_argument('--rom_path', type=str,
|
|
dest='rom_path',
|
|
help='path to a PowerMacintosh ROM file to process',
|
|
metavar='ROM_PATH', required=True)
|
|
parser.add_argument('--start', type=lambda x: int(x,0),
|
|
dest='start_offs', default=0,
|
|
help='offset to the start of the region to disassemble',
|
|
required=False,
|
|
)
|
|
parser.add_argument('--end', type=lambda x: int(x,0),
|
|
dest='end_offs', default=0x500,
|
|
help='offset to the end of the region to disassemble',
|
|
required=False,
|
|
)
|
|
|
|
opts = parser.parse_args()
|
|
|
|
with open(opts.rom_path, 'rb') as rom_file:
|
|
rom_file.seek(0, 2)
|
|
rom_size = rom_file.tell()
|
|
if rom_size != (4 * 1024 * 1024):
|
|
print("Invalid ROM file size %d (expected 4 MB)" % rom_size)
|
|
|
|
# just load the whole ROM image into memory
|
|
rom_file.seek(0, 0)
|
|
rom_data = rom_file.read()
|
|
|
|
check_sum = struct.unpack('>I', rom_data[0:4])[0]
|
|
print("ROM Checksum: %X" % check_sum)
|
|
|
|
db_name = 'ROMDB_' + '{:x}'.format(int(check_sum)).upper() + '.yaml'
|
|
|
|
with open('database/' + db_name, 'rb') as db_file:
|
|
yaml = YAML()
|
|
annot_db = yaml.load(db_file)
|
|
|
|
print(annot_db['main_info']['name'])
|
|
|
|
rdasm = ROMDisassembler(rom_data, annot_db)
|
|
rdasm.dasm_region(opts.start_offs, opts.end_offs)
|