1098 lines
32 KiB
Python
Executable File
1098 lines
32 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
from tablib import Dataset
|
|
import re
|
|
import struct
|
|
import numpy as np
|
|
from collections import defaultdict
|
|
from itertools import chain
|
|
from heapq import merge
|
|
|
|
# for main:
|
|
from sys import argv
|
|
from os import path, makedirs
|
|
|
|
# for nonfunctional writeout
|
|
from mpwobj import MPWObject
|
|
from blist import sortedlist
|
|
|
|
|
|
def trim_rom(rom):
|
|
thelen, = struct.unpack_from('>L', rom, 0x1a)
|
|
return rom[:thelen]
|
|
|
|
|
|
def read_mac_source_file(path):
|
|
# May insert arbitrary blank lines!
|
|
with open(path, encoding='mac_roman') as f:
|
|
return f.read().replace('\r','\n')
|
|
|
|
|
|
def parse_asm(src):
|
|
"""
|
|
Parse assembly into an iterable of {label: x, directive: y, args: [...], comment: z} dicts
|
|
"""
|
|
|
|
# The MPW Asm (and PPCAsm) syntax is roughly:
|
|
# [label[:]][ directive[ arg[,arg...]]][;comment]
|
|
|
|
myre = (
|
|
r'^'
|
|
r'(?:(?P<label>\w+):?)?'
|
|
r'(?:\s+'
|
|
r'(?P<directive>[\w\.]+)'
|
|
r'(?P<args>\s+[^\n;,]+(?:\s*,\s*[^\n;,]+)*?)?'
|
|
r')?'
|
|
r'(?:\s*;\s*(?P<comment>.*))?'
|
|
r'\s*$'
|
|
)
|
|
|
|
myre = myre.replace(r'\s', r'[ \t]')
|
|
|
|
myre = re.compile(myre, re.MULTILINE)
|
|
|
|
for m in myre.finditer(src):
|
|
g = m.groupdict()
|
|
|
|
# Split args into a list
|
|
args = g['args']
|
|
if args is None:
|
|
args = []
|
|
else:
|
|
args = [memb.rstrip().lstrip() for memb in args.split(',')]
|
|
if args == ['']: args = []
|
|
g['args'] = args
|
|
|
|
# replace Nones with empty strings
|
|
g = {k: '' if v is None else v for (k, v) in g.items()}
|
|
|
|
yield g
|
|
|
|
|
|
def vector_routines_from_source(src):
|
|
d = Dataset(headers=['name', 'vtable', 'voffset', 'register', 'comment'])
|
|
|
|
for line in parse_asm(src):
|
|
if line['directive'] == 'vIndirect':
|
|
if line['label'] == '' or len(line['args']) != 3:
|
|
raise ValueError('Malformed Vector Table line: %r %r' % (line['label'], line['args']))
|
|
|
|
vtable = int(line['args'][0][1:], 16)
|
|
voffset = int(line['args'][1][1:], 16)
|
|
register = int(line['args'][2][1:])
|
|
|
|
# Just dispose of register conventions???
|
|
|
|
d.append((line['label'], vtable, voffset, register, line['comment']))
|
|
|
|
return d
|
|
|
|
|
|
def vector_table_from_rom(rom):
|
|
RTS = 0x4e75
|
|
BSR = 0x61ff
|
|
|
|
d = Dataset(headers=['vtable', 'voffset', 'offset'])
|
|
|
|
InitRomVectors, = struct.unpack_from('>L', rom, 0x66)
|
|
|
|
if InitRomVectors > len(rom) - 4: return d
|
|
if struct.unpack_from('>H', rom, InitRomVectors)[0] != BSR: return d # missing BSR InitDescriptors
|
|
|
|
# Following code is kinda borrowed...
|
|
|
|
InitDescriptors = InitRomVectors + struct.unpack_from('>l', rom, InitRomVectors + 2)[0] + 2
|
|
|
|
kDescTableFormat, = struct.unpack_from('>L', rom, InitDescriptors + 10)
|
|
tableCnt, = struct.unpack_from('>L', rom, InitDescriptors + 14)
|
|
|
|
|
|
top_bsr_base = InitRomVectors + 6
|
|
|
|
while struct.unpack_from('>H', rom, top_bsr_base)[0] != RTS:
|
|
if struct.unpack_from('>H', rom, top_bsr_base)[0] != BSR:
|
|
raise ValueError('Did not find expected BSR at 0x%x', top_bsr_base)
|
|
|
|
bsr_target = top_bsr_base + struct.unpack_from('>l', rom, top_bsr_base + 2)[0] + 2
|
|
|
|
# print(' InitVec @ 0x%x' % bsr_target)
|
|
|
|
if struct.unpack_from('>L', rom, bsr_target)[0] != 0x41fa000e: raise ValueError('Did not find expected LEA')
|
|
|
|
table, = struct.unpack_from('>H', rom, bsr_target + 6)
|
|
rec = struct.unpack_from('>L', rom, bsr_target + 16)[0] & 0xfffff
|
|
kInfoTableFormat, = struct.unpack_from('>L', rom, rec)
|
|
kDirectVector, = struct.unpack_from('>L', rom, rec+4)
|
|
entry_count, = struct.unpack_from('>L', rom, rec+8)
|
|
|
|
#print('Table %04x @ %x (VecInfo @ %x, %d vectors) %s' % (table, bsr_target, rec, entry_count, TableNames.get(table, '')))
|
|
|
|
for i in range(entry_count):
|
|
offset = 4 + 4 * i
|
|
implementation = struct.unpack_from('>L', rom, bsr_target + 16 + offset)[0] & 0xfffff
|
|
|
|
d.append((table, offset, implementation))
|
|
|
|
top_bsr_base += 6
|
|
|
|
return d
|
|
|
|
|
|
def vector_glue_from_rom(rom):
|
|
d = Dataset(headers=['vtable', 'voffset', 'register', 'offset'])
|
|
|
|
endsearch, = struct.unpack_from('>L', rom, 0x1a)
|
|
rom = rom[:endsearch]
|
|
|
|
for i in range(0, len(rom) - 10, 2):
|
|
if struct.unpack_from('>L', rom, i)[0] != 0x2f3081e2: continue
|
|
if struct.unpack_from('>H', rom, i+8)[0] != 0x4e75: continue
|
|
|
|
table = struct.unpack_from('>H', rom, i+4)[0]
|
|
vector = struct.unpack_from('>H', rom, i+6)[0]
|
|
|
|
if not 0x2010 <= table <= 0x208c or table % 4 != 0:
|
|
continue
|
|
|
|
d.append((table, vector, 7, i))
|
|
|
|
for i in range(0, len(rom) - 10, 2):
|
|
if struct.unpack_from('>H', rom, i)[0] & 0xf1ff != 0x2078: continue
|
|
if struct.unpack_from('>H', rom, i+4)[0] & 0xf1f8 != 0x2068: continue
|
|
if struct.unpack_from('>H', rom, i+8)[0] & 0xfff8 != 0x4ed0: continue
|
|
|
|
ra = (struct.unpack_from('>H', rom, i)[0] >> 9) & 7
|
|
rb = (struct.unpack_from('>H', rom, i+4)[0] >> 9) & 7
|
|
rc = struct.unpack_from('>H', rom, i+4)[0] & 7
|
|
rd = struct.unpack_from('>H', rom, i+8)[0] & 7
|
|
|
|
if not (ra==rb==rc==rd): continue
|
|
|
|
table = struct.unpack_from('>H', rom, i+2)[0]
|
|
vector = struct.unpack_from('>H', rom, i+6)[0]
|
|
|
|
if not 0x2010 <= table <= 0x208c or table % 4 != 0:
|
|
continue
|
|
|
|
d.append((table, vector, ra, i))
|
|
|
|
return d
|
|
|
|
|
|
def trap_routines_from_source(src):
|
|
trap_range = range(0xa000, 0xb000)
|
|
|
|
d = ['_%04X' % i for i in trap_range]
|
|
|
|
for line in parse_asm(src):
|
|
if any(x.startswith('&') for x in line['args']):
|
|
# Skip macro definitions
|
|
continue
|
|
|
|
if not 2 <= len(line['args']) < 4:
|
|
# print('cancelling', line)
|
|
continue
|
|
|
|
if line['directive'] == 'ToolBox':
|
|
trapnum = int(line['args'][0][1:], 16) + 0xa800
|
|
elif line['directive'] == 'OS':
|
|
trapnum = int(line['args'][0][1:], 16) + 0xa000
|
|
else:
|
|
continue
|
|
|
|
trapname = line['args'][1].upper()
|
|
if not trapname: continue
|
|
|
|
d[trapnum - 0xa000] = trapname
|
|
|
|
ds = Dataset()
|
|
ds.append_col(trap_range, header='trap')
|
|
ds.append_col(d, header='name')
|
|
return ds
|
|
|
|
|
|
def trap_offsets_from_rom(rom):
|
|
d = Dataset(headers=['trap', 'offset'])
|
|
|
|
traptab, = struct.unpack_from('>L', rom, 0x22)
|
|
|
|
badtrap, = struct.unpack_from('>L', rom, 0x56)
|
|
|
|
traps = []
|
|
|
|
for i in range(0, (1024 + 256) * 4, 4):
|
|
offset, = struct.unpack_from('>L', rom, traptab + i)
|
|
|
|
if offset == badtrap or offset == 0: continue
|
|
|
|
if i >= 4096: #os
|
|
trapinsn = 0xa000 + (i-4096)//4
|
|
else: #toolbox
|
|
trapinsn = 0xa800 + i//4
|
|
|
|
d.append((trapinsn, offset))
|
|
|
|
return d
|
|
|
|
|
|
def certain_module_boundaries_from_glue_offsets(offsets):
|
|
ROMALIGN = 16
|
|
|
|
yield 0 # start of the first module
|
|
p = None
|
|
|
|
# every time we encounter a glue n that is more than 10 bytes from the previous m,
|
|
# we yield m + 10 forward-aligned to a multiple of 16
|
|
for o in offsets:
|
|
if p is not None and o > p + 10:
|
|
cut = p + 10
|
|
cut += ROMALIGN - 1
|
|
cut -= cut % ROMALIGN
|
|
yield cut
|
|
|
|
p = o
|
|
|
|
|
|
def aggregate_reasons(*iterables):
|
|
# each iterable should yield (offset, reason) tuples!
|
|
d = defaultdict(lambda: [])
|
|
|
|
for iterable in iterables:
|
|
for (offset, reason) in iterable:
|
|
d[offset].append(reason)
|
|
|
|
# I return an iterable of (offset, [reasons...])
|
|
return d.items()
|
|
|
|
|
|
def module_ranges(starts, stops):
|
|
stops_and_reasons = sorted(stops)
|
|
starts_and_reasons = sorted(starts)
|
|
|
|
j = 0
|
|
|
|
for i, (start, start_reasons) in enumerate(starts_and_reasons):
|
|
while j < len(stops_and_reasons) and stops_and_reasons[j][0] <= start:
|
|
# always leave one stop at the end!
|
|
j += 1
|
|
|
|
possible_stops_and_reasons = []
|
|
|
|
if j < len(stops_and_reasons):
|
|
possible_stops_and_reasons.append(stops_and_reasons[j])
|
|
|
|
if i + 1 < len(starts_and_reasons):
|
|
possible_stops_and_reasons.append((starts_and_reasons[i+1][0], ['ran into next module']))
|
|
|
|
try:
|
|
stop, stop_reasons = min(possible_stops_and_reasons)
|
|
except ValueError:
|
|
continue # no way to know where to stop this putative module?
|
|
|
|
yield range(start, stop), start_reasons, stop_reasons
|
|
|
|
|
|
def trimmed_module_ranges(mrngs, rom, glues):
|
|
# Can trim a module that is clearly just padded to align...
|
|
# *if* it is not followed by a glue!
|
|
|
|
glues = set(glues)
|
|
|
|
for (mrng, start_reasons, stop_reasons) in mrngs:
|
|
if mrng.stop not in glues:
|
|
if mrng.stop - mrng.start >= 16:
|
|
if any(rom[mrng.stop-16:mrng.stop-2]) and not any(rom[mrng.stop-2:mrng.stop]):
|
|
newstop = mrng.stop
|
|
while not any(rom[newstop-2:newstop]): newstop -= 2
|
|
stop_reasons.append('%d nulls trimmed' % (mrng.stop-newstop))
|
|
mrng = range(mrng.start, newstop)
|
|
|
|
yield mrng, start_reasons, stop_reasons
|
|
|
|
|
|
|
|
|
|
def parse_manual(manual):
|
|
d = defaultdict(lambda: [])
|
|
|
|
prev = 0
|
|
|
|
for line in manual:
|
|
if not line['directive']:
|
|
continue
|
|
|
|
try:
|
|
offset = int(line['label'], 16)
|
|
except ValueError:
|
|
raise ValueError('manual file bad label: %r' % line)
|
|
|
|
if offset < prev:
|
|
raise ValueError('manual file out of order: %r' % line)
|
|
|
|
directive = line['directive'].upper()
|
|
|
|
d[directive].append((offset, *line['args']))
|
|
|
|
prev = offset
|
|
|
|
return d
|
|
|
|
|
|
def manual_directives(manual, *directives):
|
|
lists = ([(offset, x, *args) for (offset, *args) in manual.get(x, [])] for x in directives)
|
|
# for l in lists:
|
|
# for x in l:
|
|
# print(x)
|
|
return merge(*lists)
|
|
|
|
|
|
def file_ranges_from_manual(manual):
|
|
relevant_directives = list(manual_directives(manual, 'FILE', 'ENDF'))
|
|
|
|
for i, (offset, directive, *args) in enumerate(relevant_directives):
|
|
if directive != 'FILE':
|
|
continue
|
|
|
|
if i == len(relevant_directives) - 1:
|
|
raise ValueError('Unending FILE %x %r' % (offset, args))
|
|
|
|
yield (range(offset, relevant_directives[i+1][0]), *args)
|
|
|
|
|
|
def feature_ranges_from_manual(manual, feature):
|
|
relevant_directives = iter(manual_directives(manual, 'FILE', 'ENDF', feature))
|
|
|
|
# potential problem here: manual sort order is not stable!
|
|
|
|
while True:
|
|
for offset, directive, *args in relevant_directives:
|
|
if (directive == 'FILE' and feature in args) or (directive == feature and 'OFF' not in args):
|
|
started_at = offset
|
|
break
|
|
else:
|
|
# No more starts!
|
|
break
|
|
|
|
for offset, directive, *args in relevant_directives:
|
|
if (directive == 'FILE' and feature not in args) or (directive == 'ENDF') or (directive == feature and 'OFF' in args):
|
|
ended_at = offset
|
|
break
|
|
else:
|
|
# No more ends!
|
|
raise ValueError('MODGUESS still turned on at end of manual file!')
|
|
|
|
yield range(started_at, ended_at)
|
|
|
|
|
|
def ranges_that_must_be_in_a_single_module(gluelocs, implocs):
|
|
# both args are lists of: (offset, vtable, voffset)
|
|
|
|
gluelocs = sorted(gluelocs)
|
|
|
|
implocs = {(vt, vo): offset for (offset, vt, vo) in implocs}
|
|
|
|
# for each group of glues...
|
|
agi = 0
|
|
while agi < len(gluelocs):
|
|
bgi = agi + 1
|
|
while bgi < len(gluelocs) and gluelocs[bgi][0] == gluelocs[bgi-1][0] + 10:
|
|
bgi += 1
|
|
|
|
# agi:bgi now make a group of glues
|
|
|
|
range_stop = gluelocs[bgi-1][0] + 10
|
|
|
|
# Thing that I figured out:
|
|
# If the first element in a sequence of glues is *not* the first implementation,
|
|
# then that implementation label is an "entry point", *not* the module name
|
|
|
|
imp_of_first_glue = implocs[gluelocs[agi][1:]]
|
|
|
|
range_start = gluelocs[agi][0]
|
|
for offset, vtable, voffset in gluelocs[agi:bgi]:
|
|
this_imp = implocs[vtable, voffset]
|
|
if this_imp < range_start:
|
|
range_start = this_imp
|
|
|
|
if imp_of_first_glue > range_start:
|
|
range_start -= 2 # prohibit a cut on our left margin, because it is not a module!
|
|
|
|
yield range(range_start, range_stop)
|
|
|
|
agi = bgi
|
|
|
|
|
|
|
|
def modguess(rom, ranges, noranges=[], labels={}):
|
|
ENDERS = [
|
|
# (length of instruction, opcode of instruction)
|
|
(2, b'\x4e\x75', 'RTS'),
|
|
(4, b'\x4e\x74', 'RTD'),
|
|
(6, b'\x60\xff', 'BRA.L'),
|
|
]
|
|
|
|
for i in range(8):
|
|
ENDERS.append((2, bytes([0x4e, 0xd0+i]), 'JMP (A%d)' % i))
|
|
|
|
# Need to figure out which areas are contraindicated by glue!
|
|
|
|
noranges = iter(noranges)
|
|
try:
|
|
cur_norng = next(noranges)
|
|
except:
|
|
norng_exhausted = True
|
|
else:
|
|
norng_exhausted = False
|
|
|
|
for rng in ranges:
|
|
rng = range(max(rng.start, 16), rng.stop, 16)
|
|
|
|
for s in rng:
|
|
if not norng_exhausted:
|
|
try:
|
|
while cur_norng.stop <= s:
|
|
cur_norng = next(noranges)
|
|
except StopIteration:
|
|
norng_exhausted = True
|
|
else:
|
|
if cur_norng.start < s < cur_norng.stop:
|
|
# print('cannot split at %x because of span %x-%x' % (s, cur_norng.start, cur_norng.stop))
|
|
continue # cannot guess a module here!
|
|
|
|
# too much padding -- looks suspicious
|
|
if not any(rom[s - 16:s]): continue
|
|
|
|
reasons = ['align']
|
|
|
|
# either has a label, or is preceded by a return instruction!
|
|
labelled = s in labels
|
|
|
|
preceded_by_return = False
|
|
for oplen, opcode, opname in ENDERS:
|
|
# return instruction must leave at least 2 but no more then 14 zeroes of padding
|
|
for chk in range(max(0, s - 14 - oplen), s - oplen, 2):
|
|
if rom[chk:chk+2] == opcode and not any(rom[chk+oplen:s]):
|
|
preceded_by_return = True
|
|
break
|
|
if preceded_by_return: break
|
|
|
|
if labelled: reasons.append('label')
|
|
if preceded_by_return: reasons.append('%s then padding' % opname)
|
|
|
|
# is not in a forbidden range!
|
|
|
|
if labelled or preceded_by_return:
|
|
yield s, 'MODGUESS (%s)' % ', '.join(reasons)
|
|
|
|
|
|
def findrefs(bin):
|
|
THANGS = [
|
|
(b'\x60\x00', 2, 'BRA t'),
|
|
(b'\x61\x00', 2, 'BSR t'),
|
|
(b'\x60\xff', 4, 'BRA.L t'),
|
|
(b'\x61\xff', 4, 'BSR.L t'),
|
|
(b'\x4e\xba', 2, 'JSR t'),
|
|
(b'\x4e\xfa', 2, 'JMP t'),
|
|
(b'\x48\x7a', 2, 'PEA t'),
|
|
(b'\x41\xfa', 2, 'LEA t, A0'),
|
|
(b'\x43\xfa', 2, 'LEA t, A1'),
|
|
(b'\x45\xfa', 2, 'LEA t, A2'),
|
|
(b'\x47\xfa', 2, 'LEA t, A3'),
|
|
(b'\x49\xfa', 2, 'LEA t, A4'),
|
|
(b'\x4b\xfa', 2, 'LEA t, A5'),
|
|
(b'\x4d\xfa', 2, 'LEA t, A6'),
|
|
(b'\x4f\xfa', 2, 'LEA t, A7'),
|
|
]
|
|
|
|
for i in range(0, len(bin), 2):
|
|
for opcode, width, name in THANGS:
|
|
if bin[i:i+2] == opcode and len(bin)-i-2 >= width and bin[i+2+width-1] & 1 == 0:
|
|
break
|
|
else:
|
|
continue
|
|
|
|
targ = i + 2 + int.from_bytes(bin[i+2:i+2+width], byteorder='big', signed=True)
|
|
|
|
if not 0 <= targ <= len(rom):
|
|
continue
|
|
|
|
yield i, (targ, name, width)
|
|
|
|
|
|
def filter_by_ranges(stream, ranges):
|
|
# Dodgy sorting stuff -- so that the range start or stop marker always precedes that offset!
|
|
biglist = sorted(chain(
|
|
(itm for r in ranges for itm in [(r.start, 0, True), (r.stop, 0, False)]),
|
|
((offset, 1, payload) for (offset, payload) in stream),
|
|
))
|
|
|
|
state = False
|
|
for offset, marker, payload in biglist:
|
|
if marker == 0:
|
|
state = payload
|
|
else:
|
|
if state:
|
|
yield offset, payload
|
|
|
|
|
|
# 0=off, 1=on but islands not hidden, 2=islands hidden
|
|
def get_islandguess_state(manual):
|
|
is_on = 0
|
|
|
|
for offset, *args in manual.get('ISLANDGUESS', []):
|
|
if offset:
|
|
raise ValueError('Activate ISLANDGUESS from offset 0')
|
|
|
|
args = list(args)
|
|
|
|
if not args:
|
|
is_on = 1
|
|
elif args == ['ON']:
|
|
is_on = 1
|
|
elif args == ['HIDE']:
|
|
is_on = 2
|
|
elif args == ['OFF']:
|
|
is_on = 0
|
|
else:
|
|
raise ValueError('Illegal ISLANDGUESS args: %s', ' '.join(args))
|
|
|
|
return is_on
|
|
|
|
|
|
# Just in case islandguess finds a false positive (unlikely)
|
|
def get_nonislands_from_manual(manual):
|
|
return (offset for (offset, *args) in manual.get('NONISLAND', []))
|
|
|
|
|
|
def islandguess(rom, exclude=[]): # returns iterator of (referer, referent) pairs
|
|
exclude = set(exclude)
|
|
tenzeroes = bytes(10)
|
|
|
|
for i in range(0, len(rom), 16):
|
|
this_16 = rom[i:i+16]
|
|
|
|
if not this_16.startswith(b'\x60\xff'): continue
|
|
if not this_16.endswith(tenzeroes): continue
|
|
|
|
if i in exclude: continue
|
|
|
|
targ, = struct.unpack_from('>l', this_16, 2)
|
|
targ += i + 2
|
|
|
|
if not 0 <= targ < len(rom): continue
|
|
if targ & 1: continue
|
|
|
|
# this is definitely a legit target!
|
|
yield i, targ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
srcbase, rombase = argv[1:]
|
|
except ValueError:
|
|
srcbase = None
|
|
rombase, = argv[1:]
|
|
|
|
rom = trim_rom(open(rombase, 'rb').read())
|
|
|
|
# for offset, targoffset, kind, numbytes in findrefs(rom):
|
|
# print(hex(offset), hex(targoffset), kind, numbytes)
|
|
|
|
# Sneaky: edit the manual file as a convenience to the programmer!
|
|
# And be sure to preserve its line endings!
|
|
|
|
manual_path = rombase + '-info.txt'
|
|
|
|
try:
|
|
with open(manual_path) as f:
|
|
manual = f.read()
|
|
except FileNotFoundError:
|
|
pass
|
|
else:
|
|
myre = re.compile(r'^(\+?)([A-Fa-f0-9]+)')
|
|
accum = 0
|
|
numlen = 1
|
|
|
|
linelist = []
|
|
|
|
for line in manual.splitlines(keepends=True):
|
|
m = myre.match(line)
|
|
if m:
|
|
if m.group(1):
|
|
nowat = accum + int(m.group(2), 16)
|
|
line = ('%0' + str(numlen) + 'X%s') % (nowat, line[m.span()[1]:])
|
|
else:
|
|
numlen = len(m.group(2))
|
|
accum = int(m.group(2), 16)
|
|
|
|
linelist.append(line)
|
|
|
|
bigstr = ''.join(linelist)
|
|
|
|
if bigstr != manual:
|
|
with open(manual_path, 'w') as f:
|
|
f.write(bigstr)
|
|
|
|
try:
|
|
manual = parse_manual(parse_asm(read_mac_source_file(manual_path)))
|
|
except FileNotFoundError:
|
|
manual = {}
|
|
|
|
|
|
romvecinfo = Dataset(headers=['vtable', 'voffset', 'offset'])
|
|
romvecglue = Dataset(headers=['vtable', 'voffset', 'register', 'offset'])
|
|
srcvecinfo = Dataset(headers=['name', 'vtable', 'voffset', 'register', 'comment'])
|
|
romtrapinfo = Dataset(headers=['trap', 'offset'])
|
|
srctrapinfo = Dataset(headers=['trap', 'name'])
|
|
|
|
if srcbase:
|
|
for vtloc in [path.join(srcbase, 'Make', 'VectorTable.a'), path.join(srcbase, 'VectorTable.a')]:
|
|
try:
|
|
vectbl_src = read_mac_source_file(vtloc)
|
|
except FileNotFoundError:
|
|
continue
|
|
else:
|
|
romvecinfo = vector_table_from_rom(rom)
|
|
romvecglue = vector_glue_from_rom(rom)
|
|
srcvecinfo = vector_routines_from_source(vectbl_src)
|
|
break
|
|
|
|
for ttloc in [path.join(srcbase, 'OS', 'DispTable.a'), path.join(srcbase, 'DispTable.a')]:
|
|
try:
|
|
traptbl_src = read_mac_source_file(ttloc)
|
|
except FileNotFoundError:
|
|
continue
|
|
else:
|
|
romtrapinfo = trap_offsets_from_rom(rom)
|
|
srctrapinfo = trap_routines_from_source(traptbl_src)
|
|
break
|
|
|
|
|
|
|
|
|
|
glueoffsets = romvecglue.get_col(romvecglue.headers.index('offset'))
|
|
|
|
|
|
|
|
# Generic, useful code:
|
|
|
|
|
|
# This dict helps us figure out the correct order to place the "entry point" chunks so that
|
|
# Vectorize will insert the vector glues in the original order
|
|
|
|
gluelocs = {(vt, vo): offset for (vt, vo, reg, offset) in romvecglue}
|
|
implocs = {(vt, vo): offset for (vt, vo, offset) in romvecinfo}
|
|
|
|
gluelocs_by_label = {}
|
|
for (lbl, vt, vo, reg, cmt) in srcvecinfo:
|
|
try:
|
|
gluelocs_by_label[lbl] = gluelocs[vt, vo]
|
|
except KeyError:
|
|
raise
|
|
|
|
|
|
imploc_of_glue = {}
|
|
for (vt, vo, reg, offset) in romvecglue:
|
|
imploc_of_glue[offset] = implocs[vt, vo]
|
|
all_imps = set(implocs.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ISLANDGUESS
|
|
|
|
islandguess_state = get_islandguess_state(manual)
|
|
|
|
if islandguess_state:
|
|
islands = list(islandguess(rom, exclude=get_nonislands_from_manual(manual)))
|
|
else:
|
|
islands = []
|
|
|
|
island_dict = dict(islands)
|
|
island_targ_set = set(targ for (src, targ) in islands)
|
|
|
|
# New problem: the glue is all in the wrong order! How me fixy this?
|
|
# have in island_loc -> idx mapping? That'd work...
|
|
idx_of_island = {}
|
|
last_island = None
|
|
for src, targ in islands:
|
|
if src - 16 == last_island:
|
|
this_idx = prev_idx + 1
|
|
else:
|
|
this_idx = 0
|
|
|
|
idx_of_island[src] = this_idx
|
|
|
|
last_island = src
|
|
prev_idx = this_idx
|
|
|
|
|
|
|
|
|
|
|
|
# CREATE a gigantic defaultdict with offset -> [label, label] mappings!
|
|
# (and do some freeform comments while we're at it!)
|
|
alllabels = defaultdict(lambda: set())
|
|
|
|
veclabeldict = {(vt, vo): lbl for (lbl, vt, vo, reg, cmt) in srcvecinfo} # use this for later
|
|
for vt, vo, offset in romvecinfo:
|
|
try:
|
|
lbl = veclabeldict[vt, vo]
|
|
except KeyError:
|
|
lbl = 'MGR%04X_VEC%04X' % (vt, vo)
|
|
|
|
alllabels[offset].add(lbl)
|
|
|
|
traplabeldict = dict(srctrapinfo)
|
|
for trap, offset in romtrapinfo:
|
|
lbl = traplabeldict[trap]
|
|
alllabels[offset].add(lbl)
|
|
|
|
for offset, *args in chain(manual.get('MOD', []), manual.get('ENTRY', [])):
|
|
try:
|
|
lbl, *_ = args
|
|
except ValueError:
|
|
pass
|
|
|
|
alllabels[offset].add(lbl)
|
|
|
|
# Override labels that have been set for islands???
|
|
# This allows ISLANDGUESS to be set without extensive manual editing
|
|
# Not very functional though...
|
|
|
|
for (src, targ) in islands:
|
|
# Fail if target neither has a name nor is a vector glue...
|
|
# Simply because I'd like to give the island module a temporary name
|
|
|
|
try:
|
|
targ = imploc_of_glue[targ]
|
|
except KeyError:
|
|
pass
|
|
|
|
try:
|
|
label = alllabels[targ]
|
|
if not label: raise ValueError
|
|
except (KeyError, ValueError):
|
|
label = 'UNRESOLVED_%X' % targ
|
|
#raise ValueError('ISLAND target unresolved at $%X: BRA.L $%X' % (src, targ))
|
|
else:
|
|
label = min(label, key=lambda x: (len(x), x))
|
|
|
|
label = 'ISLAND_%X_%s' % (src, label)
|
|
|
|
alllabels[src] = [label]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# DONE CREATING ^^ ... PUT THAT IN A FUNCTION.
|
|
|
|
|
|
# Now do something similar for comments
|
|
cmts_by_label = defaultdict(lambda: [])
|
|
for (lbl, vt, vo, reg, cmt) in srcvecinfo:
|
|
if cmt:
|
|
fmt = 'VectorTable comment: %s' % cmt
|
|
else:
|
|
fmt = 'VectorTable entry uncommented'
|
|
cmts_by_label[lbl].append(fmt)
|
|
for trap, lbl in srctrapinfo:
|
|
cmts_by_label[lbl].append('Trap %04X' % trap)
|
|
|
|
|
|
|
|
# And have a way to look up vector info based on offset:
|
|
|
|
|
|
|
|
|
|
filerngs = list(file_ranges_from_manual(manual))
|
|
|
|
numbytes = sum(r.stop - r.start for r, *_ in filerngs)
|
|
print('Unlinking', round(numbytes/len(rom)*100, 1), 'percent of rom')
|
|
|
|
gluelocs_for_modguess = ((offset, vt, vo) for (vt, vo, reg, offset) in romvecglue)
|
|
implocs_for_modguess = ((offset, vt, vo) for (vt, vo, offset) in romvecinfo)
|
|
|
|
base_mod_rngs_and_their_reasons = module_ranges(
|
|
starts = aggregate_reasons(
|
|
((o,'vector glue above') for o in certain_module_boundaries_from_glue_offsets(glueoffsets)),
|
|
((o+16,'island above') for (o, targ) in islands),
|
|
((o,'MOD directive') for o in (offset for offset, *_ in manual.get('MOD', []))),
|
|
((o,'FILE directive') for o in (frng.start for frng, *_ in filerngs)),
|
|
modguess(
|
|
rom=rom,
|
|
ranges=feature_ranges_from_manual(manual, 'MODGUESS'),
|
|
noranges=ranges_that_must_be_in_a_single_module(gluelocs=gluelocs_for_modguess, implocs=implocs_for_modguess),
|
|
labels=alllabels,
|
|
),
|
|
((o,'BRA.L island') for (o, targ) in islands),
|
|
((o,'start of ROM') for o in [0]),
|
|
),
|
|
stops = aggregate_reasons(
|
|
((o,'vector glue below') for o in glueoffsets),
|
|
((o,'BRA.L island below') for (o, targ) in islands),
|
|
((o+16,'end of island') for (o, targ) in islands),
|
|
((o,'end of FILE directive') for o in (frng.stop for frng, *_ in filerngs)),
|
|
((o,'end of ROM') for o in [len(rom)]),
|
|
),
|
|
)
|
|
|
|
modrngs = list(trimmed_module_ranges(base_mod_rngs_and_their_reasons, rom=rom, glues=glueoffsets))
|
|
|
|
|
|
|
|
# And let's create a list of references...
|
|
# (For now, we will not care if the targes are labelled or not)
|
|
references = list(filter_by_ranges(
|
|
stream=findrefs(rom),
|
|
ranges=feature_ranges_from_manual(manual, 'REFGUESS'),
|
|
))
|
|
|
|
|
|
|
|
# Now we know the file ranges, the module ranges, and the labels to apply to those modules!
|
|
|
|
|
|
|
|
for frng, *args in filerngs:
|
|
relpath = args[0]
|
|
if srcbase:
|
|
abspath = path.join(srcbase, relpath)
|
|
else:
|
|
abspath = path.join(path.dirname(rombase), relpath)
|
|
|
|
filemods = [(mrng, start_reasons, stop_reasons) for (mrng, start_reasons, stop_reasons) in modrngs if frng.start <= mrng.start < frng.stop]
|
|
|
|
real_start = min(mrng.start for mrng, *_ in filemods)
|
|
real_stop = max(mrng.stop for mrng, *_ in filemods)
|
|
|
|
obj = MPWObject()
|
|
|
|
obj.putfirst()
|
|
obj.putcomment('UNLINKED FROM MAC OS ROM -- ELLIOT WAS HERE\n [%X, %X)' % (real_start, real_stop))
|
|
obj.putdict(['Main'])
|
|
|
|
for (mrng, start_reasons, stop_reasons) in filemods:
|
|
mod_data = bytearray(rom[mrng.start:mrng.stop])
|
|
modname = None
|
|
|
|
veclabels = sortedlist() # automatically sorted by glue, then by offset!
|
|
nonveclabels = sortedlist()
|
|
|
|
if islandguess_state == 2 and mrng.start in island_dict: # ISLANDGUESS HIDE
|
|
continue
|
|
|
|
for offset, offset_labels in alllabels.items():
|
|
if not mrng.start <= offset < mrng.stop: continue
|
|
|
|
for label in offset_labels:
|
|
try:
|
|
glue = gluelocs_by_label[label]
|
|
except KeyError:
|
|
nonveclabels.add((0, offset, label)) # sort by offset if it's a non-glued label
|
|
else:
|
|
veclabels.add((glue, offset, label)) # or by glue location if necessary
|
|
|
|
for label_list in (veclabels, nonveclabels):
|
|
try:
|
|
glue, offset, label = label_list[0]
|
|
except IndexError:
|
|
pass
|
|
else:
|
|
if offset == mrng.start:
|
|
modname = label
|
|
label_list.pop(0)
|
|
break
|
|
else:
|
|
modname = 'AUTOMOD_%X' % mrng.start
|
|
|
|
# (offset, label) from (glue, offset, label) format
|
|
entries_in_chunk_order = [(o, l) for ll in (veclabels, nonveclabels) for (g, o, l) in ll]
|
|
entries_in_offset_order = sorted(entries_in_chunk_order)
|
|
|
|
|
|
# Decide which refs I should actually twiddle:
|
|
bad_refs = []
|
|
good_refs = []
|
|
for (offset, (targ, kind, width)) in references:
|
|
if not mrng.start <= offset < mrng.stop:
|
|
continue
|
|
|
|
# Figure out the label of the referent! This can be slightly tricky...
|
|
label = None
|
|
|
|
orig_targ = targ
|
|
|
|
if islandguess_state == 2:
|
|
try:
|
|
targ = island_dict[targ]
|
|
except KeyError:
|
|
pass
|
|
|
|
if targ in all_imps: # rare -- a direct call of a vectorised function
|
|
try:
|
|
label = alllabels[targ]
|
|
if not label: raise ValueError
|
|
except (KeyError, ValueError):
|
|
pass
|
|
else:
|
|
label = '__v__' + min(label, key=lambda x: (len(x), x))
|
|
|
|
if label is None:
|
|
try:
|
|
targ = imploc_of_glue[targ]
|
|
except KeyError:
|
|
pass
|
|
|
|
try:
|
|
label = alllabels[targ]
|
|
if not label: raise ValueError
|
|
except (KeyError, ValueError):
|
|
pass
|
|
else:
|
|
label = min(label, key=lambda x: (len(x), x))
|
|
|
|
if not label: label = None
|
|
|
|
if mrng.start <= targ < mrng.stop:
|
|
continue # currently only want to touch outgoings
|
|
|
|
if label:
|
|
good_refs.append((offset, targ, kind, width, label, idx_of_island.get(orig_targ, -1)))
|
|
|
|
if kind.startswith('B'):
|
|
# print('doing a weird one!')
|
|
offset_in_mod = offset + 2 - mrng.start
|
|
mask = (1 << (8 * width)) - 1
|
|
replacewith = (-offset_in_mod) & mask
|
|
replacewith = replacewith.to_bytes(width, byteorder='big', signed=False)
|
|
else:
|
|
replacewith = bytes(width)
|
|
mod_data[offset-mrng.start+2:offset-mrng.start+2+width] = replacewith # zero out -- put this elsewhere?
|
|
|
|
else:
|
|
bad_refs.append((offset, targ, kind, width))
|
|
|
|
# sort good_refs by idx_of_island so that branch islands are inserted in the correct order!
|
|
good_refs.sort(key=lambda tpl: tpl[-1])
|
|
good_refs = [x[:-1] for x in good_refs] # remove that extra field from good_refs
|
|
good_refs.reverse()
|
|
|
|
|
|
|
|
|
|
dict_names = (l for ll in (veclabels, nonveclabels) for (g, o, l) in ll)
|
|
dict_names = chain([modname], dict_names)
|
|
|
|
# Figured out how to insert comments into the object, now mad with power
|
|
|
|
descname = 'Module: %s [%X, %X)' % (modname, mrng.start, mrng.stop)
|
|
|
|
c = ['#' * 85, '']
|
|
c.append(descname)
|
|
c.append('')
|
|
c.append('Module started due to: %s' % ', '.join(start_reasons))
|
|
|
|
if good_refs:
|
|
c.append('')
|
|
c.append('Resolved outgoing references:')
|
|
for (offset, targ, kind, width, label) in good_refs:
|
|
s = ' +%X (%X): %s' % (offset-mrng.start, offset, kind.replace('t', label))
|
|
if not real_start <= targ < real_stop:
|
|
s += ' ; outside file'
|
|
c.append(s)
|
|
|
|
|
|
if bad_refs:
|
|
c.append('')
|
|
c.append('Unresolved outgoing references:')
|
|
for (offset, targ, kind, width) in bad_refs:
|
|
s = ' +%X (%X): %s' % (offset-mrng.start, offset, kind.replace('t', '$%X' % targ))
|
|
if not real_start <= targ < real_stop:
|
|
s += ' ; bad outside file'
|
|
c.append(s)
|
|
|
|
cmts = cmts_by_label[modname]
|
|
if cmts:
|
|
c.append('')
|
|
c.extend(cmts_by_label[modname])
|
|
|
|
for (offset, label) in entries_in_offset_order:
|
|
c.append('')
|
|
|
|
leftnote = '+%X (%X): ' % (offset - mrng.start, offset)
|
|
c.append(leftnote + label)
|
|
|
|
leftnote = ' ' * len(leftnote)
|
|
for line in cmts_by_label[label]:
|
|
c.append(leftnote + line)
|
|
|
|
if c: obj.putcomment('\n '.join(c))
|
|
|
|
# </madwithpower>
|
|
|
|
|
|
# Write actual semantic stuff:
|
|
|
|
obj.putdict(dict_names)
|
|
obj.putmod(modname)
|
|
|
|
obj.putsize(len(mod_data))
|
|
obj.putcontents(mod_data)
|
|
|
|
# Need to put the entries in the *correct order*!
|
|
for (offset, targ, kind, width, label) in good_refs:
|
|
if kind.startswith('B'):
|
|
obj.putweirdref(label, width, offset-mrng.start+2)
|
|
else:
|
|
obj.putsimpleref(label, width, offset-mrng.start+2)
|
|
|
|
|
|
|
|
for (offset, label) in entries_in_chunk_order:
|
|
obj.putentry(offset - mrng.start, label)
|
|
|
|
obj.putcomment('%s ended due to: %s' % (modname, ', '.join(stop_reasons)))
|
|
|
|
|
|
obj.putlast()
|
|
|
|
|
|
if 'WRITEOUT' not in args: continue
|
|
|
|
obj_bin = bytes(obj)
|
|
|
|
try:
|
|
with open(abspath, 'rb') as f:
|
|
old = f.read()
|
|
|
|
if old != obj_bin:
|
|
raise ValueError('must write out')
|
|
except:
|
|
print('>', relpath)
|
|
try:
|
|
makedirs(path.dirname(abspath), exist_ok=True)
|
|
except FileNotFoundError:
|
|
pass
|
|
with open(abspath, 'wb') as f:
|
|
f.write(obj_bin)
|
|
|
|
|
|
|
|
|