diff --git a/FirstPass68kToC.py b/FirstPass68kToC.py new file mode 100755 index 0000000..610e75f --- /dev/null +++ b/FirstPass68kToC.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 + + +# Pipe the output of MPW DumpObj into this script (never mind line endings). +# The output will be a good template for decompiling MPW C code by hand. + + +# Copyright (c) 2020 Elliot Nunn + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import sys +import re +lines = sys.stdin.read().replace('\r', '\n').split('\n') + +def match(*args, **kwargs): + global m + m = re.match(*args, **kwargs) + return m + +def search(*args, **kwargs): + global m + m = re.search(*args, **kwargs) + return m + + +def gethex(s): + try: + return int(s.replace('$', '').replace('#', '').replace('0x', ''), 16) + except: + raise ValueError('%r not hexable' % s) + + +name = 'NONAME' +firstline = -1 + +procedures = [] +for i, l in enumerate(lines): + if search(r'Module="(\w+)"', l): name = m.group(1) + + if search(r'LINK.W +A6', l): + firstline = i + leading_chars = m.start() + if search(r'\b(RTS|RTD|JMP)\b', l): + lastline = i + if name != 'NONAME' and firstline != -1: + procedures.append((name, firstline, lastline)) + name = 'NONAME' + firstline = -1 + +for name, firstline, lastline in procedures: + data = b'' + for l in lines[lastline+1:]: + if search(r'LINK.W +A6', l): break + if not search(r':((?: [0-9A-Fa-f]{4})+) ', l): break + data += bytes.fromhex(m.group(1)) + + + proc_offset = int(lines[firstline][:8], 16) + data_offset = int(lines[lastline][:8], 16) + 2 + if 'RTD' in lines[lastline]: data_offset += 2 # ugly way to figure out where data goes + + + for i in range(firstline, lastline+1): + lines[i] = lines[i][:leading_chars] + lines[i][leading_chars:].partition(';')[0].rstrip() + lines[i] + + statements = '' + for i in range(firstline, lastline+1): + if lines[i][8:9] != ':': continue + offset = int(lines[i][:8], 16) + opcode = lines[i][leading_chars:] + opcode, _, rest = opcode.partition(' ') + if rest: + rest = rest.strip() + + rest = re.sub(r'\*[\-\+]\$[0-9A-Fa-f]+', lambda m: 'label_%X' % (int(m.group()[1:].replace('$', ''), 16) + offset), rest) + + statements += 'label_%X:' % offset + + if rest: + statements += opcode + ' ' + rest + ';' + else: + statements += opcode + ';' + + + # Now we have a clean statement list that we can do transformations on. + + + # Transformation: get rid of unused labels + def label_if_nonunique(m): + if len(re.findall(r'\b' + m.group()[:-1] + r'\b', statements)) > 1: + return m.group() + else: + return '' + + statements = re.sub(r'\blabel_[0-9A-Fa-f]+:', label_if_nonunique, statements) + + + + # Transformation: split the A6 stack frame into variables + a6_size = -gethex(re.search(r'LINK.W A6,#(-?\$\w\w\w\w)', statements).group(1)) + a6_splits = {0, a6_size} + + def a6_sub(m): + this_split = int(m.group(1), 16) + if this_split <= a6_size: + a6_splits.add(this_split) + return 'var_%X' % this_split + else: + return m.group() + + statements = re.sub(r'-\$(\w\w\w\w)\(A6\)', a6_sub, statements) + a6_splits = sorted(a6_splits) + + a6_decs = '' + for a, b in zip(a6_splits, a6_splits[1:]): + varsize = b-a + if varsize == 1: + a6_decs += 'char var_%X;' % b + elif varsize == 2: + a6_decs += 'short var_%X;' % b + elif varsize == 4: + a6_decs += 'long var_%X;' % b + else: + a6_decs += 'char var_%X[0x%x];' % (b, varsize) + + + + # Transformation: change #$70777063 to #'pwpc' + def longliteral(m): + chars = bytes.fromhex(m.group(1)).decode('latin-1') + if all(c.isalnum() or c in ' #' for c in chars): + return repr(chars) + else: + return m.group() + + statements = re.sub(r'\$([0-9A-Fa-f]{8})\b', longliteral, statements) + + + + # Transformation: change $ to 0x + statements = statements.replace('$', '0x') + + + + # Transformation: split the trailing data into string literals + data_splits = {data_offset} + for m in re.finditer(r'\blabel_([0-9A-Fa-f]+)\b', statements): + offset = int(m.group(1), 16) + if data_offset <= offset < data_offset + len(data): + data_splits.add(offset) + data_splits = sorted(data_splits) + + def data_sub(m): + this_offset = int(m.group(1), 16) + if not (data_offset <= this_offset < data_offset + len(data)): return m.group() + + chars = b'' + for i in range(len(data)): + if i > 0 and data_offset+i in data_splits: break + chars += data[i:i+1] + + chars = chars.rstrip(b'\x00') + + quotedstring = '' + if chars and chars[0] == len(chars) - 1: + quotedstring += '\\p' + chars = chars[1:] + + for c in chars: + if c == 10: + quotedstring += '\\r' # deliberately reversed CR and LF + elif c == 13: + quotedstring += '\\n' + elif c == ord('"'): + quotedstring += '\\"' + elif 32 <= c < 127: + quotedstring += chr(c) + else: + quotedstring += '\\x%02X' % c + return '"%s"' % quotedstring + + statements = re.sub(r'\blabel_([0-9A-Fa-f]+)\b', data_sub, statements) + + + + # Does it use the self-argument-cleaning Pascal convention + prototype = 'void %s(void)' % name + if statements.split(';')[-2].startswith(('JMP (A0)', 'RTD ')): prototype = 'pascal ' + prototype + + + + def splitstatements(longstring): + return re.findall(r'[^;:]+.', longstring) + + + print(prototype) + print('{') + + for x in splitstatements(a6_decs): + print(' ' + x) + + if a6_decs: print() + + for x in splitstatements(statements): + if x.endswith(';'): + a, b, c = x.partition(' ') + if c: + b = ' ' * (8 - len(a)) + x = a + b + c + x = '// ' + x + print(' ' + x) + + print('}') + print() + print() diff --git a/FlattenSegmentedMacAppForDisassembler.py b/FlattenSegmentedMacAppForDisassembler.py new file mode 100755 index 0000000..8d3e92e --- /dev/null +++ b/FlattenSegmentedMacAppForDisassembler.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +import argparse +import macresources +import sys +import struct +import string + + +parser = argparse.ArgumentParser() +parser.add_argument('src', help='rdump file') +parser.add_argument('dest', help='binary dest (may also create .txt file)') +parser.add_argument('-rt', action='store', metavar='type=ID', default='CODE=0', help='jump table resource (usually CODE=0)') +args = parser.parse_args() + +args.rtype = args.rt.partition('=')[0].encode('mac_roman') +args.rid = int(args.rt.partition('=')[2]) + +with open(args.src, 'rb') as f: + resources = list(macresources.parse_rez_code(f.read(), original_file=f.name)) + +resources = [r for r in resources if r.type == args.rtype and r.id >= args.rid] +resources.sort(key=lambda r: r.id) + +if not resources or resources[0].id != args.rid: + sys.exit('Resource %s not found in %s' % (args.rt, args.src)) + +jt_resource, *other_resources = resources + +bigboy = bytearray() +for i, r in enumerate(resources): + while len(bigboy) < i * 0x10000: bigboy.append(0) + bigboy.extend(r) + +with open(args.dest, 'wb') as f: + f.write(bigboy) + +with open(args.dest + '.py', 'w') as idascript: + # Find MacsBug symbols + namedict = {} + for b in range(0, len(bigboy), 2): + if bigboy[b:b+2] == b'NV': # link a6, starting a compiled function + for c in range(b+2, len(bigboy), 2): + if bigboy[c:c+2] == b'NV': break + if bigboy[c] & 0xF0 == 0x80: + strlen = bigboy[c] & 0x0F + if strlen < 2: break + namestr = bigboy[c+1:c+1+strlen] + if len(namestr) < strlen: break + namestr = namestr.decode('latin-1') + if not all(c in (string.ascii_letters + string.digits + '_') for c in namestr): break + if strlen % 2 == 0 and bigboy[c+1+strlen:c+1+strlen+1] not in b'\0': break + + namedict[b] = namestr + break + + # Make some neat names for the segments... + segnames = {} + for r in other_resources: + if r.name: + segnames[r.id - args.rid] = ''.join(c for c in r.name if c in (string.ascii_letters + string.digits)) + else: + segnames[r.id - args.rid] = f'seg_{r.id-args.rid:X}' + + jt_size, a5_offset_of_jt = struct.unpack_from('>LL', jt_resource, 8) + + for jt_ofs in range(16, 16 + jt_size, 8): + ofs, be_3f3c, segnum, be_a9f0 = struct.unpack_from('>HHHH', jt_resource, jt_ofs) + if be_3f3c != 0x3f3c or be_a9f0 != 0xa9f0: break + ofs += 4 # not sure what the leading stuff is? + + bigboy_ofs = ((segnum - args.rid) * 0x10000) + ofs + a5_ofs = jt_ofs - 16 + a5_offset_of_jt + 2 + + cool_name = f'{segnames[segnum - args.rid]}_' + if bigboy_ofs in namedict: + cool_name += namedict[bigboy_ofs] + del namedict[bigboy_ofs] + else: + cool_name += f'{bigboy_ofs:X}' + + print(f'MakeFunction(0x{bigboy_ofs:X}); MakeName(0x{bigboy_ofs:X}, "{cool_name}")', file=idascript) + + call_to_me = struct.pack('>H', a5_ofs) + bb_i = -1 + while 1: + bb_i = bigboy.find(call_to_me, bb_i+1) + if bb_i == -1: break + if bb_i % 2: continue + if bigboy[bb_i-2:bb_i] not in (b'\x4e\xad', b'\x48\x6d'): continue # jsr/pea + + # Okay, found one + print(f'MakeCode(0x{bb_i-2:X}); op_man(0x{bb_i-2:X}, 0, "{cool_name}")', file=idascript) + + for bigboy_ofs, name in sorted(namedict.items()): + cool_name = f'{segnames[bigboy_ofs >> 16]}_{name}' + print(f'MakeFunction(0x{bigboy_ofs:X}); MakeName(0x{bigboy_ofs:X}, "{cool_name}")', file=idascript)