diff --git a/cfmtool.py b/cfmtool.py new file mode 100755 index 0000000..3546ea7 --- /dev/null +++ b/cfmtool.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 + +# This is a single-file library for manipulating Preferred Executable Format files +# A command line-interface is available (just call cfmtool.py --help) + + +import argparse +import datetime +import struct +import os +import textwrap +from os import path +from ast import literal_eval as eval + + +def dump(from_binary_or_path, to_path): + """Dump a CFM/PEF binary to a directory + + Command line usage: cfmtool.py BINARY DIRECTORY + + The first argument can be a bytes-like object, or a path to read from. + """ + + def write_txt(name, text): + with open(path.join(to_path, name + '.txt'), 'w') as f: + f.write(text + '\n') + + try: + bytes(from_binary_or_path) + from_binary = from_binary_or_path + except TypeError: + with open(from_binary_or_path, 'rb') as f: + from_binary = f.read() + + if not from_binary.startswith(b'J o y ! peffpwpc\x00\x00\x00\x01'.replace(b' ', b'')): + raise ValueError('not a pef (PowerPC, v1)') + + os.makedirs(to_path, exist_ok=True) + + dateTimeStamp, *versions = struct.unpack_from('>4L', from_binary, 16) + + write_txt('date', format_mac_date(dateTimeStamp)) + write_txt('version', _fmt_dict(zip(('oldDefVersion', 'oldImpVersion', 'currentVersion'), versions))) + + section_list = [] + section_count, = struct.unpack_from('>H', from_binary, 32) + offset = 40 + for i in range(section_count): + sec = dict(zip( + ('name', 'defaultAddress', 'totalLength', 'unpackedLength', 'containerLength', + 'containerOffset', 'sectionKind', 'shareKind', 'alignment'), + struct.unpack_from('>lLLLLLbbb', from_binary, offset))) + + section_list.append(sec) + + offset += 28 + + # Now offset points to the nasty table of section names + + for i, sec in enumerate(section_list): + if sec['name'] > 0: + name_offset = offset + for j in range(sec['name']): name_offset = from_binary.index(b'\0', name_offset) + 1 + sec['name'] = from_binary[name_offset:from_binary.index(b'\0', name_offset)].decode('mac_roman') + else: + sec['name'] = '' + + for i, sec in enumerate(section_list): + sec['sectionKind'] = ('code', 'data', 'pidata', 'rodata', 'loader', + 'debug', 'codedata', 'exception', 'traceback')[sec['sectionKind']] + + sec['shareKind'] = ('', 'process', '', '', 'global', 'protected')[sec['shareKind']] + + # What to call the final file... + used_basenames = [] + for i, sec in enumerate(section_list): + basename = sec['sectionKind'] + used_basenames.append(basename) + if used_basenames.count(basename) > 1: + basename += '-%d' % used_basenames.count(basename) + + sec['filename'] = basename + + # Now the conversion of sec keys to their readable form is complete + + # Are the damn sections ordered the wrong way? + sorted_section_list = sorted(section_list, key=lambda sec: sec['containerOffset']) + if sorted_section_list != section_list: + for i, sec in enumerate(sorted_section_list): + sec['_hackPackOrder'] = i + + should_end = sorted_section_list[-1]['containerOffset'] + sorted_section_list[-1]['containerLength'] + if should_end < len(from_binary): + sorted_section_list[-1]['_hackPostAlign'] = _possible_intended_alignments(len(from_binary))[-1] + + for i, sec in enumerate(section_list): + raw = from_binary[sec['containerOffset']:sec['containerOffset']+sec['containerLength']] + + possible_aligns = _possible_intended_alignments(sec['containerOffset']) + if possible_aligns[-1] > _sec_kind_min_align(sec['sectionKind']): + sec['_hackUnexpectedAlign'] = possible_aligns[-1] + + # Do we need to keep the packed data around? + unpacked = packed = raw + + if sec['sectionKind'] == 'pidata': + packed = raw + unpacked = unpack_pidata(raw) + else: + packed = None + unpacked = raw + + if unpacked.endswith(b'\0'): + sec['_hackExplicitTrailingZeros'] = len(unpacked) - len(unpacked.rstrip(b'\0')) + + if sec['unpackedLength']: + zeropad = sec['totalLength'] - len(unpacked); unpacked += bytes(zeropad) + + with open(path.join(to_path, sec['filename']), 'wb') as f: f.write(unpacked) + + if packed is not None: + with open(path.join(to_path, 'packed-' + sec['filename']), 'wb') as f: f.write(packed) + + del sec['totalLength'] + del sec['unpackedLength'] + del sec['containerLength'] + del sec['containerOffset'] + + write_txt('sections', _fmt_list(_fmt_dict(d) for d in section_list)) + + +def build(from_path, to_path=None): + """Rebuild a directory into a CFM/PEF binary + + Command line usage: cfmtool.py DIRECTORY BINARY + + If a second argument is supplied, the result will be written to that path + instead of being returned as a bytes object. + """ + + def read_txt(name): + with open(path.join(from_path, name + '.txt')) as f: + return f.read() + + try: + dateTimeStamp = parse_mac_date(read_txt('date')) + except: + raise + dateTimeStamp = 0 + + try: + versions = eval(read_txt('version')) + versions = (versions['oldDefVersion'], versions['oldImpVersion'], versions['currentVersion']) + except: + raise + versions = (0, 0, 0) + + section_list = eval(read_txt('sections')) + + # Hit the ground running + pef = bytearray(b'J o y ! peffpwpc\x00\x00\x00\x01'.replace(b' ', b'')) + + pef.extend(struct.pack('>4L', dateTimeStamp, *versions)) # leaves us at offset 0x20 + instSectionCount = len([sec for sec in section_list if _sec_kind_is_instantiated(sec['sectionKind'])]) + pef.extend(struct.pack('>HHL', len(section_list), instSectionCount, 0)) # leaves us at offset 0x28, ready for the sections + + # Pad the section headers out with zeroes, and fill in a bit later + offset = 40 + for sec in section_list: + sec['_hack_header_offset'] = offset + offset += 28 + pef.extend(bytes(offset - len(pef))) + + # Now do the stupid section name table (yuck) + namecnt = 0 + for sec in section_list: + if sec['name']: + pef.extend(sec['name'].encode('mac_roman') + b'\0') + sec['name'] = namecnt + namecnt += 1 + else: + sec['name'] = -1 + + # Stable sort, so won't do anything if unnecessary + section_list.sort(key=lambda sec: sec.get('_hackPackOrder', 0)) + + # Now put in the section data (easier said than done!) + for sec in section_list: + with open(path.join(from_path, sec['filename']), 'rb') as f: + data_total = f.read() + + data_packed = data_inited = _strip_zeroes_leaving_some(data_total, sec.get('_hackExplicitTrailingZeros', 0)) + + # Special case the damned pidata + if sec['sectionKind'] == 'pidata': + with open(path.join(from_path, 'packed-' + sec['filename']), 'rb') as f: + data_packed = f.read() + data_inited = unpack_pidata(data_packed) + + # Check that we got that right (we cannot pack the data ourselves) + if not data_total.startswith(data_inited) or any(data_total[len(data_inited):]): + data_packed = data_inited = _strip_zeroes_leaving_some(data_total, 0) + sec['sectionKind'] = 'data' + + align_now = max(_sec_kind_min_align(sec['sectionKind']), sec.get('_hackUnexpectedAlign', 1)) + + while len(pef) % align_now != 0: pef.append(0) + + struct.pack_into('>l5L3B', pef, sec['_hack_header_offset'], + sec['name'], + sec['defaultAddress'], + len(data_total) if _sec_kind_is_instantiated(sec['sectionKind']) else 0, + len(data_inited) if _sec_kind_is_instantiated(sec['sectionKind']) else 0, + len(data_packed), + len(pef), + ('code', 'data', 'pidata', 'rodata', 'loader', + 'debug', 'codedata', 'exception', 'traceback').index(sec['sectionKind']), + ('', 'process', '', '', 'global', 'protected').index(sec['shareKind']), + sec['alignment'], + ) + + pef.extend(data_packed) + + post_align = max(sec.get('_hackPostAlign', 1) for sec in section_list) + while len(pef) % post_align != 0: pef.append(0) + + if to_path is None: + return bytes(pef) + else: + with open(to_path, 'wb') as f: + f.write(pef) + + +def unpack_pidata(packed): + """Unpack pattern-initialized (compressed) data + """ + + def pullarg(from_iter): + arg = 0 + for i in range(4): + cont = next(from_iter) + arg <<= 7 + arg |= cont & 0x7f + if not (cont & 0x80): break + else: + raise ValueError('arg spread over too many bytes') + return arg + + packed = iter(packed) + unpacked = bytearray() + + for b in packed: + opcode = b >> 5 + arg = b & 0b11111 or pullarg(packed) + + if opcode == 0b000: # zero + count = arg + unpacked.extend(b'\0' * count) + + elif opcode == 0b001: # blockCopy + blockSize = arg + for i in range(blockSize): + unpacked.append(next(packed)) + + elif opcode == 0b010: # repeatedBlock + blockSize = arg + repeatCount = pullarg(packed) + 1 + rawData = bytes(next(packed) for n in range(blockSize)) + for n in range(repeatCount): + unpacked.extend(rawData) + + elif opcode == 0b011 or opcode == 0b100: # interleaveRepeatBlockWithBlockCopy + commonSize = arg # or interleaveRepeatBlockWithZero + customSize = pullarg(packed) + repeatCount = pullarg(packed) + + if opcode == 0b011: + commonData = bytes(next(packed) for n in range(commonSize)) + else: + commonData = b'\0' * commonSize + + for i in range(repeatCount): + unpacked.extend(commonData) + for j in range(customSize): + unpacked.append(next(packed)) + unpacked.extend(commonData) + + else: + raise ValueError('unknown pidata opcode/arg %s/%d' % (bin(opcode), arg)) + return + + return bytes(unpacked) + + +def format_mac_date(srcint): + """Render a 32-bit MacOS date to ISO 8601 format + """ + + dt = datetime.datetime(1904, 1, 1) + datetime.timedelta(seconds=srcint) + return dt.isoformat().replace('T', ' ') + + +def parse_mac_date(x): + """Pack an ISO 8601 date into a 32-bit MacOS date + """ + + epoch = '19040101000000' # ISO8601 with the non-numerics stripped + + # strip non-numerics and pad out using the epoch (cheeky) + stripped = ''.join(c for c in x if c in '0123456789') + stripped = stripped[:len(epoch)] + epoch[len(stripped):] + + tformat = '%Y%m%d%H%M%S' + + delta = datetime.datetime.strptime(stripped, tformat) - datetime.datetime.strptime(epoch, tformat) + delta = int(delta.total_seconds()) + + delta = min(delta, 0xFFFFFFFF) + delta = max(delta, 0) + + return delta + + +def _fmt_dict(tuple_iterator): + try: + tuple_iterator = tuple_iterator.items() + except AttributeError: + pass + + accum = '{\n' + for k, v in tuple_iterator: + if k == 'defaultAddress': + v = ('0x%08x' % v) + elif k in '_hackUnexpectedAlign _hackPostAlign': + v = hex(v) + else: + v = repr(v) + accum += textwrap.indent('%r: %s,' % (k, v), ' ') + '\n' + accum += '}' + return accum + + +def _fmt_list(iterator): + accum = '[\n' + for el in iterator: + accum += textwrap.indent(el + ',', ' ') + '\n' + accum += ']' + return accum + + +def _sec_kind_is_instantiated(sec_kind): + return sec_kind not in ('loader', 'debug', 'exception', 'traceback') + + +def _strip_zeroes_leaving_some(data, leaving): + stripped = data.rstrip(b'\0') + + while len(stripped) < len(data) and data[len(stripped)] == 0: + stripped += b'\0' + + return stripped + + +def _possible_intended_alignments(offset): + possible = list(1 << n for n in range(32)) + + possible = [p for p in possible if offset % p == 0] + + return possible + + +def _sec_kind_min_align(sec_kind): + if sec_kind in ('code', 'data', 'rodata', 'codedata'): + return 16 + else: + return 4 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=''' + Convert between a Code Fragment Manager binary and an easily-edited dump directory. + ''') + + parser.add_argument('src', metavar='SOURCE', action='store', help='Binary or directory') + parser.add_argument('dest', metavar='DEST', action='store', help='Directory or binary') + + args = parser.parse_args() + + if path.isdir(args.src): + build(args.src, args.dest) + else: + dump(args.src, args.dest)