mirror of
https://github.com/elliotnunn/tbxi-patches.git
synced 2024-06-09 01:29:30 +00:00
e8a3012b41
Reloc table opcodes are used to filter out coincidental TVector-like things. This problem was identified while trying to debug all the XTOC glue that didn't make sense. We also ignore this glue.
1030 lines
39 KiB
Python
Executable File
1030 lines
39 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright (c) 2019 Elliot Nunn
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
|
|
# This is a single-file library for manipulating Preferred Executable Format files
|
|
# A command line-interface is available (just call cfmtool.py --help)
|
|
|
|
|
|
import builtins
|
|
import argparse
|
|
import datetime
|
|
import struct
|
|
import os
|
|
import re
|
|
import textwrap
|
|
import functools
|
|
from os import path
|
|
from ast import literal_eval as eval
|
|
|
|
|
|
def dump(from_binary_or_path, to_path):
|
|
"""Dump a CFM/PEF binary to a directory
|
|
|
|
Command line usage: cfmtool.py BINARY DIRECTORY
|
|
|
|
The first argument can be a bytes-like object, or a path to read from.
|
|
"""
|
|
|
|
try:
|
|
bytes(from_binary_or_path)
|
|
from_binary = from_binary_or_path
|
|
except TypeError:
|
|
with open(from_binary_or_path, 'rb') as f:
|
|
from_binary = f.read()
|
|
|
|
if not from_binary.startswith(b'J o y ! peffpwpc\x00\x00\x00\x01'.replace(b' ', b'')):
|
|
raise ValueError('not a pef (PowerPC, v1)')
|
|
|
|
os.makedirs(to_path, exist_ok=True)
|
|
|
|
dateTimeStamp, *versions = struct.unpack_from('>4L', from_binary, 16)
|
|
|
|
write_txt(format_mac_date(dateTimeStamp), to_path, 'date.txt')
|
|
write_txt(repr(dict(zip(('oldDefVersion', 'oldImpVersion', 'currentVersion'), versions))), to_path, 'version.txt')
|
|
|
|
section_list = []
|
|
section_count, = struct.unpack_from('>H', from_binary, 32)
|
|
offset = 40
|
|
for i in range(section_count):
|
|
sec = dict(zip(
|
|
('name', 'defaultAddress', 'totalLength', 'unpackedLength', 'containerLength',
|
|
'containerOffset', 'sectionKind', 'shareKind', 'alignment'),
|
|
struct.unpack_from('>lLLLLLbbb', from_binary, offset)))
|
|
|
|
section_list.append(sec)
|
|
|
|
offset += 28
|
|
|
|
# Now offset points to the nasty table of section names
|
|
|
|
for i, sec in enumerate(section_list):
|
|
if sec['name'] > 0:
|
|
name_offset = offset
|
|
for j in range(sec['name']): name_offset = from_binary.index(b'\0', name_offset) + 1
|
|
sec['name'] = from_binary[name_offset:from_binary.index(b'\0', name_offset)].decode('mac_roman')
|
|
else:
|
|
sec['name'] = ''
|
|
|
|
for i, sec in enumerate(section_list):
|
|
sec['sectionKind'] = ('code', 'data', 'pidata', 'rodata', 'loader',
|
|
'debug', 'codedata', 'exception', 'traceback')[sec['sectionKind']]
|
|
|
|
sec['shareKind'] = ('', 'process', '', '', 'global', 'protected')[sec['shareKind']]
|
|
|
|
# What to call the final file...
|
|
used_basenames = []
|
|
for i, sec in enumerate(section_list):
|
|
basename = sec['sectionKind']
|
|
used_basenames.append(basename)
|
|
if used_basenames.count(basename) > 1:
|
|
basename += '-%d' % used_basenames.count(basename)
|
|
|
|
sec['filename'] = basename
|
|
|
|
# Now the conversion of sec keys to their readable form is complete
|
|
|
|
# Are the damn sections ordered the wrong way?
|
|
sorted_section_list = sorted(section_list, key=lambda sec: sec['containerOffset'])
|
|
if sorted_section_list != section_list:
|
|
for i, sec in enumerate(sorted_section_list):
|
|
sec['_hackPackOrder'] = i
|
|
|
|
should_end = sorted_section_list[-1]['containerOffset'] + sorted_section_list[-1]['containerLength']
|
|
if should_end < len(from_binary):
|
|
sorted_section_list[-1]['_hackPostAlign'] = _possible_intended_alignments(len(from_binary))[-1]
|
|
|
|
for i, sec in enumerate(section_list):
|
|
raw = from_binary[sec['containerOffset']:sec['containerOffset']+sec['containerLength']]
|
|
|
|
possible_aligns = _possible_intended_alignments(sec['containerOffset'])
|
|
if possible_aligns[-1] > _sec_kind_min_align(sec['sectionKind']):
|
|
sec['_hackUnexpectedAlign'] = possible_aligns[-1]
|
|
|
|
# Do we need to keep the packed data around?
|
|
unpacked = packed = raw
|
|
|
|
if sec['sectionKind'] == 'pidata':
|
|
packed = raw
|
|
unpacked = unpack_pidata(raw)
|
|
else:
|
|
packed = None
|
|
unpacked = raw
|
|
|
|
if unpacked.endswith(b'\0'):
|
|
sec['_hackExplicitTrailingZeros'] = len(unpacked) - len(unpacked.rstrip(b'\0'))
|
|
|
|
if sec['unpackedLength']:
|
|
zeropad = sec['totalLength'] - len(unpacked); unpacked += bytes(zeropad)
|
|
|
|
write_bin(unpacked, to_path, sec['filename'])
|
|
|
|
if packed is not None:
|
|
write_bin(packed, to_path, 'packed-' + sec['filename'])
|
|
|
|
del sec['totalLength']
|
|
del sec['unpackedLength']
|
|
del sec['containerLength']
|
|
del sec['containerOffset']
|
|
|
|
write_python(section_list, to_path, 'sections.txt')
|
|
dump_lowlevel(to_path)
|
|
dump_highlevel(to_path)
|
|
|
|
|
|
def build(from_path, to_path=None):
|
|
"""Rebuild a directory into a CFM/PEF binary
|
|
|
|
Command line usage: cfmtool.py DIRECTORY BINARY
|
|
|
|
If a second argument is supplied, the result will be written to that path
|
|
instead of being returned as a bytes object.
|
|
"""
|
|
|
|
try:
|
|
dateTimeStamp = parse_mac_date(read_txt(from_path, 'date.txt'))
|
|
except:
|
|
raise
|
|
dateTimeStamp = 0
|
|
|
|
try:
|
|
versions = read_python(from_path, 'version.txt')
|
|
versions = (versions['oldDefVersion'], versions['oldImpVersion'], versions['currentVersion'])
|
|
except:
|
|
raise
|
|
versions = (0, 0, 0)
|
|
|
|
section_list = read_python(from_path, 'sections.txt')
|
|
|
|
# Hit the ground running
|
|
pef = bytearray(b'J o y ! peffpwpc\x00\x00\x00\x01'.replace(b' ', b''))
|
|
|
|
pef.extend(struct.pack('>4L', dateTimeStamp, *versions)) # leaves us at offset 0x20
|
|
instSectionCount = len([sec for sec in section_list if _sec_kind_is_instantiated(sec['sectionKind'])])
|
|
pef.extend(struct.pack('>HHL', len(section_list), instSectionCount, 0)) # leaves us at offset 0x28, ready for the sections
|
|
|
|
# Pad the section headers out with zeroes, and fill in a bit later
|
|
offset = 40
|
|
for sec in section_list:
|
|
sec['_hack_header_offset'] = offset
|
|
offset += 28
|
|
pef.extend(bytes(offset - len(pef)))
|
|
|
|
# Now do the stupid section name table (yuck)
|
|
namecnt = 0
|
|
for sec in section_list:
|
|
if sec['name']:
|
|
pef.extend(sec['name'].encode('mac_roman') + b'\0')
|
|
sec['name'] = namecnt
|
|
namecnt += 1
|
|
else:
|
|
sec['name'] = -1
|
|
|
|
# Stable sort, so won't do anything if unnecessary
|
|
section_list.sort(key=lambda sec: sec.get('_hackPackOrder', 0))
|
|
|
|
# Now put in the section data (easier said than done!)
|
|
for sec in section_list:
|
|
with open(path.join(from_path, sec['filename']), 'rb') as f:
|
|
data_total = f.read()
|
|
|
|
data_packed = data_inited = _strip_zeroes_leaving_some(data_total, sec.get('_hackExplicitTrailingZeros', 0))
|
|
|
|
# Special case the damned pidata
|
|
if sec['sectionKind'] == 'pidata':
|
|
with open(path.join(from_path, 'packed-' + sec['filename']), 'rb') as f:
|
|
data_packed = f.read()
|
|
data_inited = unpack_pidata(data_packed)
|
|
|
|
# Check that we got that right (we cannot pack the data ourselves)
|
|
if not data_total.startswith(data_inited) or any(data_total[len(data_inited):]):
|
|
data_packed = data_inited = _strip_zeroes_leaving_some(data_total, 0)
|
|
sec['sectionKind'] = 'data'
|
|
|
|
align_now = max(_sec_kind_min_align(sec['sectionKind']), sec.get('_hackUnexpectedAlign', 1))
|
|
|
|
while len(pef) % align_now != 0: pef.append(0)
|
|
|
|
struct.pack_into('>l5L3B', pef, sec['_hack_header_offset'],
|
|
sec['name'],
|
|
sec['defaultAddress'],
|
|
len(data_total) if _sec_kind_is_instantiated(sec['sectionKind']) else 0,
|
|
len(data_inited) if _sec_kind_is_instantiated(sec['sectionKind']) else 0,
|
|
len(data_packed),
|
|
len(pef),
|
|
('code', 'data', 'pidata', 'rodata', 'loader',
|
|
'debug', 'codedata', 'exception', 'traceback').index(sec['sectionKind']),
|
|
('', 'process', '', '', 'global', 'protected').index(sec['shareKind']),
|
|
sec['alignment'],
|
|
)
|
|
|
|
pef.extend(data_packed)
|
|
|
|
post_align = max(sec.get('_hackPostAlign', 1) for sec in section_list)
|
|
while len(pef) % post_align != 0: pef.append(0)
|
|
|
|
if to_path is None:
|
|
return bytes(pef)
|
|
else:
|
|
with open(to_path, 'wb') as f:
|
|
f.write(pef)
|
|
|
|
|
|
def repr(obj):
|
|
"""Custom repr to prettyprint the dicts that we use
|
|
|
|
Useful if you want to write out your own edited dumps (but not essential)
|
|
"""
|
|
|
|
if isinstance(obj, list):
|
|
accum = '[\n'
|
|
for el in obj:
|
|
accum += textwrap.indent(repr(el) + ',', ' ') + '\n'
|
|
accum += ']'
|
|
return accum
|
|
|
|
elif isinstance(obj, dict):
|
|
if set(obj) == set(('kind', 'weakFlag', 'name')) or 'offset' in obj:
|
|
oneline = True
|
|
else:
|
|
oneline = False
|
|
|
|
try:
|
|
obj = obj.items()
|
|
except AttributeError:
|
|
pass
|
|
|
|
accum = []
|
|
for k, v in obj:
|
|
if k == 'defaultAddress':
|
|
v = hex(v, 8)
|
|
elif k.lower().endswith('align'):
|
|
v = hex(v)
|
|
elif k.lower().endswith('offset'):
|
|
v = hex(v, 5)
|
|
elif k in ('usbVendorID', 'usbProductID', 'usbDeviceReleaseNumber', 'usbDeviceProtocol'):
|
|
v = hex(v, 4)
|
|
elif k in ('usbConfigValue', 'usbInterfaceNum', 'usbInterfaceClass', 'usbInterfaceSubClass', 'usbInterfaceProtocol', 'usbDriverClass', 'usbDriverSubClass'):
|
|
v = hex(v, 2)
|
|
else:
|
|
v = repr(v)
|
|
accum.append('%r: %s' % (k, v))
|
|
|
|
if oneline:
|
|
return '{' + ', '.join(accum) + '}'
|
|
else:
|
|
return '{\n' + textwrap.indent(''.join(x + ',\n' for x in accum), ' ') + '}'
|
|
|
|
elif isinstance(obj, tuple):
|
|
obj = [hex(el) if (i == 0 and isinstance(el, int)) else repr(el) for (i, el) in enumerate(obj)]
|
|
return '(' + ', '.join(obj) + ')'
|
|
|
|
else:
|
|
return builtins.repr(obj)
|
|
|
|
|
|
def hex(obj, num_digits=5):
|
|
"""Pad to 5 significant digits (up to a megabyte, plenty)
|
|
"""
|
|
|
|
x = builtins.hex(obj)
|
|
while len(x.partition('x')[2]) < num_digits:
|
|
x = x.replace('x', 'x0')
|
|
return x
|
|
|
|
|
|
def unpack_pidata(packed):
|
|
"""Unpack pattern-initialized (compressed) data
|
|
"""
|
|
|
|
def pullarg(from_iter):
|
|
arg = 0
|
|
for i in range(4):
|
|
cont = next(from_iter)
|
|
arg <<= 7
|
|
arg |= cont & 0x7f
|
|
if not (cont & 0x80): break
|
|
else:
|
|
raise ValueError('arg spread over too many bytes')
|
|
return arg
|
|
|
|
packed = iter(packed)
|
|
unpacked = bytearray()
|
|
|
|
for b in packed:
|
|
opcode = b >> 5
|
|
arg = b & 0b11111 or pullarg(packed)
|
|
|
|
if opcode == 0b000: # zero
|
|
count = arg
|
|
unpacked.extend(b'\0' * count)
|
|
|
|
elif opcode == 0b001: # blockCopy
|
|
blockSize = arg
|
|
for i in range(blockSize):
|
|
unpacked.append(next(packed))
|
|
|
|
elif opcode == 0b010: # repeatedBlock
|
|
blockSize = arg
|
|
repeatCount = pullarg(packed) + 1
|
|
rawData = bytes(next(packed) for n in range(blockSize))
|
|
for n in range(repeatCount):
|
|
unpacked.extend(rawData)
|
|
|
|
elif opcode == 0b011 or opcode == 0b100: # interleaveRepeatBlockWithBlockCopy
|
|
commonSize = arg # or interleaveRepeatBlockWithZero
|
|
customSize = pullarg(packed)
|
|
repeatCount = pullarg(packed)
|
|
|
|
if opcode == 0b011:
|
|
commonData = bytes(next(packed) for n in range(commonSize))
|
|
else:
|
|
commonData = b'\0' * commonSize
|
|
|
|
for i in range(repeatCount):
|
|
unpacked.extend(commonData)
|
|
for j in range(customSize):
|
|
unpacked.append(next(packed))
|
|
unpacked.extend(commonData)
|
|
|
|
else:
|
|
raise ValueError('unknown pidata opcode/arg %s/%d' % (bin(opcode), arg))
|
|
return
|
|
|
|
return bytes(unpacked)
|
|
|
|
|
|
def dump_lowlevel(basepath):
|
|
"""Dump from the loader section: exports.txt, imports.txt, mainvectors.txt, relocations.txt
|
|
"""
|
|
|
|
section_list = read_python(basepath, 'sections.txt')
|
|
|
|
for sec in section_list:
|
|
if sec['sectionKind'] == 'loader':
|
|
loader = read_bin(basepath, sec['filename'])
|
|
break
|
|
else:
|
|
return # no loader section
|
|
|
|
importedLibraryCount, totalImportedSymbolCount, relocSectionCount, relocInstrOffset, loaderStringsOffset, \
|
|
exportHashOffset, exportHashTablePower, exportedSymbolCount = struct.unpack_from('>8L', loader, 24)
|
|
|
|
def get_mainvectors():
|
|
cardinals = {}
|
|
for ofs, knd in [(0, 'main'), (8, 'init'), (16, 'term')]:
|
|
vec_sec_idx, vec_offset = struct.unpack_from('>lL', loader, ofs)
|
|
if vec_sec_idx != -1:
|
|
cardinals[knd] = dict(section=section_list[vec_sec_idx]['filename'], offset=vec_offset)
|
|
return cardinals
|
|
|
|
def get_name(offset):
|
|
return loader[loaderStringsOffset+offset:].partition(b'\0')[0].decode('mac_roman')
|
|
|
|
def get_imported_symbol(idx):
|
|
ofs = 56 + 24 * importedLibraryCount + 4 * idx
|
|
wideval, = struct.unpack_from('>L', loader, ofs)
|
|
return dict(
|
|
kind = ('code', 'data', 'tvector', 'toc', 'glue')[(wideval >> 24) & 0xF],
|
|
weakFlag = int(bool(wideval & 0x80000000)),
|
|
name = get_name(wideval & 0xFFFFFF),
|
|
)
|
|
|
|
def get_imported_library(idx):
|
|
ofs = 56 + 24 * idx
|
|
nameOffset, oldImpVersion, currentVersion, importedSymbolCount, \
|
|
firstImportedSymbol, options = struct.unpack_from('>5LB', loader, ofs)
|
|
|
|
return dict(
|
|
name = get_name(nameOffset),
|
|
oldImpVersion = oldImpVersion,
|
|
currentVersion = currentVersion,
|
|
specialOrderFlag = int(bool(options & 0x80)),
|
|
weakFlag = int(bool(options & 0x40)),
|
|
symbols = [get_imported_symbol(n) for n in
|
|
range(firstImportedSymbol, firstImportedSymbol + importedSymbolCount)],
|
|
)
|
|
|
|
def get_relocations():
|
|
relocations = []
|
|
|
|
for idx in range(relocSectionCount):
|
|
ofs = 56 + 24 * importedLibraryCount + 4 * totalImportedSymbolCount + 12 * idx
|
|
sectionIndex, _, relocCount, firstRelocOffset, = struct.unpack_from('>HHLL', loader, ofs)
|
|
|
|
sectionIndex = section_list[sectionIndex]['filename']
|
|
|
|
data = loader[relocInstrOffset+firstRelocOffset:][:2*relocCount]
|
|
data = [struct.unpack_from('>H', data, i)[0] for i in range(0, len(data), 2)]
|
|
|
|
done = []
|
|
|
|
relocAddress = 0
|
|
importIndex = 0
|
|
|
|
if len(section_list) >= 1 and _sec_kind_is_instantiated(section_list[0]['sectionKind']):
|
|
sectionC = section_list[0]['filename']
|
|
|
|
if len(section_list) >= 2 and _sec_kind_is_instantiated(section_list[1]['sectionKind']):
|
|
sectionD = section_list[1]['filename']
|
|
|
|
def nextblock():
|
|
if not data: return None
|
|
x = data.pop(0)
|
|
done.append(x)
|
|
return x
|
|
|
|
for short in iter(nextblock, None):
|
|
#print('%04X codeA=%r dataA=%r rSymI=%d rAddr=%08X' % (short, sectionC, sectionD, importIndex, relocAddress), end=' ')
|
|
|
|
if short >> 14 == 0b00: # RelocBySectDWithSkip
|
|
skipCount = (short >> 6) & 0xFF
|
|
relocCount = short & 0x3F
|
|
#print('RelocBySectDWithSkip skipCount=%d relocCount=%d' % (skipCount, relocCount))
|
|
|
|
relocAddress += skipCount * 4
|
|
for i in range(relocCount):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
|
|
|
|
elif short >> 13 == 0b010: # The Relocate Value Group
|
|
subopcode = (short >> 9) & 0xF
|
|
runLength = (short & 0x1FF) + 1
|
|
|
|
if subopcode == 0b0000: # RelocBySectC
|
|
#print('RelocBySectC runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionC))); relocAddress += 4
|
|
|
|
elif subopcode == 0b0001: # RelocBySectD
|
|
#print('RelocBySectD runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
|
|
|
|
elif subopcode == 0b0010: # RelocTVector12
|
|
#print('RelocTVector12 runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionC))); relocAddress += 4
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
|
|
if 'code' in sectionC and 'data' in sectionD: relocations[-2]['likelytv'] = 1
|
|
relocAddress += 4
|
|
|
|
elif subopcode == 0b0011: # RelocTVector8
|
|
#print('RelocTVector8 runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionC))); relocAddress += 4
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
|
|
if 'code' in sectionC and 'data' in sectionD: relocations[-2]['likelytv'] = 1
|
|
|
|
elif subopcode == 0b0100: # RelocVTable8
|
|
#print('RelocVTable8 runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
|
|
relocAddress += 4
|
|
|
|
elif subopcode == 0b0101: # RelocImportRun
|
|
#print('RelocImportRun runLength=%d' % (runLength))
|
|
for i in range(runLength):
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('import', importIndex))); relocAddress += 4; importIndex += 1
|
|
|
|
else:
|
|
raise ValueError('bad Relocate Value Group subopcode: %s' % bin(subopcode))
|
|
|
|
elif short >> 13 == 0b011: # The Relocate By Index Group
|
|
subopcode = (short >> 9) & 0xF
|
|
index = short & 0x1FF
|
|
|
|
if subopcode == 0b0000: # RelocSmByImport
|
|
#print('RelocSmByImport index=%d' % (index))
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('import', index))); relocAddress += 4; importIndex = index + 1
|
|
|
|
elif subopcode == 0b0001: # RelocSmSetSectC
|
|
#print('RelocSmSetSectC index=%d' % (index))
|
|
sectionC = section_list[index]['filename']
|
|
|
|
elif subopcode == 0b0010: # RelocSmSetSectD
|
|
#print('RelocSmSetSectD index=%d' % (index))
|
|
sectionD = section_list[index]['filename']
|
|
|
|
elif subopcode == 0b0011: # RelocSmBySection
|
|
#print('RelocSmBySection index=%d' % (index))
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', index))); relocAddress += 4
|
|
|
|
else:
|
|
raise ValueError('bad Relocate By Index Group subopcode: %s' % bin(subopcode))
|
|
|
|
elif short >> 12 == 0b1000: # RelocIncrPosition
|
|
offset = (short & 0x0FFF) + 1
|
|
#print('RelocIncrPosition offset=%d' % (offset))
|
|
|
|
relocAddress += offset
|
|
|
|
elif short >> 12 == 0b1001: # RelocSmRepeat
|
|
blockCount = ((short >> 8) & 0xF) + 1
|
|
repeatCount = (short & 0xFF) + 1
|
|
#print('RelocSmRepeat blockCount=%d repeatCount=%d' % (blockCount, repeatCount))
|
|
|
|
data[0:0] = done[-blockCount-1:-1] * repeatCount
|
|
|
|
elif short >> 10 == 0b101000: # RelocSetPosition
|
|
offset = ((short & 0x3FF) << 16) + nextblock()
|
|
#print('RelocSetPosition offset=%d' % (offset))
|
|
|
|
relocAddress = offset
|
|
|
|
elif short >> 10 == 0b101001: # RelocLgByImport
|
|
index = ((short & 0x3FF) << 16) + nextblock()
|
|
#print('RelocLgByImport index=%d' % (index))
|
|
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('import', index))); relocAddress += 4; importIndex = index + 1
|
|
|
|
elif short >> 10 == 0b101100: # RelocLgRepeat
|
|
blockCount = ((short >> 6) & 0xF) + 1
|
|
repeatCount = ((short & 0x3F) << 16) + nextblock()
|
|
#print('RelocLgRepeat blockCount=%d repeatCount=%d' % (blockCount, repeatCount))
|
|
|
|
data[0:0] = done[-blockCount-1:-1] * repeatCount
|
|
|
|
elif short >> 10 == 0b101101: # RelocLgSetOrBySection
|
|
subopcode = (short >> 6) & 0xF
|
|
index = ((short & 0x3F) << 16) + nextblock()
|
|
|
|
if subopcode == 0b0000: # Same as RelocSmBySection
|
|
#print('~RelocSmBySection index=%d' % (index))
|
|
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', index))); relocAddress += 4
|
|
|
|
elif subopcode == 0b0001: # Same as RelocSmSetSectC
|
|
#print('~RelocSmSetSectC index=%d' % (index))
|
|
sectionC = section_list[index]['filename']
|
|
|
|
elif subopcode == 0b0010: # Same as RelocSmSetSectD
|
|
#print('~RelocSmSetSectD index=%d' % (index))
|
|
sectionD = section_list[index]['filename']
|
|
|
|
else:
|
|
raise ValueError('bad RelocLgSetOrBySection subopcode: %s' % bin(subopcode))
|
|
|
|
else:
|
|
raise ValueError('bad relocation opcode: 0x%04x' % short)
|
|
|
|
return relocations
|
|
|
|
def get_exports():
|
|
ofs = exportHashOffset
|
|
|
|
num_keys = 0
|
|
for i in range(2 ** exportHashTablePower):
|
|
htab_entry, = struct.unpack_from('>L', loader, ofs)
|
|
num_keys += htab_entry >> 18
|
|
ofs += 4
|
|
|
|
lengths = []
|
|
for i in range(num_keys):
|
|
sym_len, sym_hash = struct.unpack_from('>HH', loader, ofs)
|
|
lengths.append(sym_len)
|
|
ofs += 4
|
|
|
|
exports = []
|
|
for sym_len in lengths:
|
|
kind_and_name, sym_offset, sec_idx = struct.unpack_from('>LLh', loader, ofs)
|
|
kind = ('code', 'data', 'tvector', 'toc', 'glue')[kind_and_name >> 24]
|
|
name = loader[loaderStringsOffset+(kind_and_name&0xFFFFFF):][:sym_len].decode('mac_roman')
|
|
sec_name = section_list[sec_idx]['filename']
|
|
if sec_idx == -2:
|
|
# absolute address
|
|
pass
|
|
elif sec_idx == -3:
|
|
# re-export
|
|
pass
|
|
else:
|
|
exports.append(dict(section=sec_name, offset=sym_offset, kind=kind, name=name))
|
|
ofs += 10
|
|
|
|
exports.sort(key=lambda dct: tuple(dct.values()))
|
|
return exports
|
|
|
|
write_python(get_mainvectors(), basepath, 'ldump', 'mainvectors.txt')
|
|
write_python(get_exports(), basepath, 'ldump', 'exports.txt')
|
|
write_python(get_relocations(), basepath, 'ldump', 'relocations.txt')
|
|
write_python([get_imported_library(n) for n in range(importedLibraryCount)],
|
|
basepath, 'ldump', 'imports.txt')
|
|
|
|
|
|
def dump_highlevel(basepath):
|
|
"""Create some useful files: glue.txt
|
|
"""
|
|
|
|
section_list = read_python(basepath, 'sections.txt')
|
|
|
|
# Relocations in lookup-able form
|
|
relocs = read_python(basepath, 'ldump', 'relocations.txt')
|
|
likelytv = set((rl['section'], rl['offset']) for rl in relocs if rl.get('likelytv', False))
|
|
relocs = {(rl['section'], rl['offset']): rl['to'] for rl in relocs}
|
|
|
|
# The base of the TOC is not guaranteed to be the base of the data section... what is the TOC of our exported funcs?
|
|
tvectors = [dct for dct in read_python(basepath, 'ldump', 'exports.txt') if dct['kind'] == 'tvector']
|
|
|
|
# Failing that, the TOC of our init/main/term funcs
|
|
tvectors.extend(read_python(basepath, 'ldump', 'mainvectors.txt').values())
|
|
|
|
tvectors = [(tv['section'], tv['offset']) for tv in tvectors]
|
|
table_of_contents = {}
|
|
for section, offset in tvectors: # (section, offset) tuple
|
|
reloc_kind, toc_section = relocs.get((section, offset + 4), (None, None))
|
|
if reloc_kind == 'section':
|
|
secdata = read_bin(basepath, section)
|
|
toc_offset, = struct.unpack_from('>L', secdata, offset + 4)
|
|
|
|
table_of_contents = dict(section=toc_section, offset=toc_offset)
|
|
break
|
|
|
|
|
|
# When we export even a single TVector, the TOC can be easily found as
|
|
# above. But some fragments, e.g. native sifters (nifts) and some USB
|
|
# code, only export some sort of dispatch table in which TVector pointers
|
|
# are difficult to identify. So we scan the entire relocation table to
|
|
# find things that look like TVectors, then try to identify a consensus
|
|
# among the real-looking TVectors.
|
|
if not table_of_contents:
|
|
guesses = []
|
|
for (reloc_sec, reloc_offset), (reloc_kind, reloc_targ_section) in relocs.items():
|
|
if 'data' in reloc_sec and reloc_kind == 'section' and 'code' in reloc_targ_section and (reloc_sec, reloc_offset) in likelytv:
|
|
toc_reloc_kind, toc_reloc_targ_section = relocs.get((reloc_sec, reloc_offset+4), (None, None))
|
|
if toc_reloc_kind == 'section' and 'data' in toc_reloc_targ_section:
|
|
secdata = read_bin(basepath, reloc_sec)
|
|
toc_offset, = struct.unpack_from('>L', secdata, reloc_offset + 4)
|
|
guesses.append(dict(section=toc_reloc_targ_section, offset=toc_offset))
|
|
|
|
for x in guesses:
|
|
if guesses.count(x) >= len(guesses)//2:
|
|
table_of_contents = dict(x)
|
|
break
|
|
|
|
# Somehow we got the table of contents
|
|
if table_of_contents:
|
|
write_python(table_of_contents, basepath, 'hdump', 'table-of-contents.txt')
|
|
|
|
|
|
# Exports!
|
|
exports = read_python(basepath, 'ldump', 'exports.txt')
|
|
codelocs_exported = []
|
|
# read_bin = functools.lru_cache(read_bin)
|
|
|
|
for exp in exports:
|
|
if exp['kind'] == 'tvector':
|
|
reloc_kind, reloc_targ_section = relocs.get((exp['section'], exp['offset']), (None, None))
|
|
if reloc_kind == 'section' and 'code' in reloc_targ_section:
|
|
secdata = read_bin(basepath, exp['section'])
|
|
code_offset, = struct.unpack_from('>L', secdata, exp['offset'])
|
|
codelocs_exported.append(dict(section=reloc_targ_section, offset=code_offset, function=exp['name']))
|
|
|
|
codelocs_exported.sort(key=lambda dct: tuple(dct.values()))
|
|
write_python(codelocs_exported, basepath, 'hdump', 'codelocs-exported.txt')
|
|
|
|
|
|
# Init, term and main functions
|
|
codelocs_main = []
|
|
for kind, dct in read_python(basepath, 'ldump', 'mainvectors.txt').items():
|
|
reloc_kind, reloc_targ_section = relocs.get((dct['section'], dct['offset']), (None, None))
|
|
if reloc_kind == 'section' and 'code' in reloc_targ_section:
|
|
secdata = read_bin(basepath, dct['section'])
|
|
code_offset, = struct.unpack_from('>L', secdata, dct['offset'])
|
|
codelocs_main.append(dict(section=reloc_targ_section, offset=code_offset, function=kind))
|
|
codelocs_main.sort(key=lambda dct: tuple(dct.values()))
|
|
write_python(codelocs_main, basepath, 'hdump', 'codelocs-main.txt')
|
|
|
|
|
|
# Cross-toc glue
|
|
codelocs_xtocglue = []
|
|
|
|
if table_of_contents: # we might not have one if we export no functions!
|
|
imports = read_python(basepath, 'ldump', 'imports.txt')
|
|
imports = [sym['name'] for lib in imports for sym in lib['symbols']]
|
|
|
|
toc_imports = {}
|
|
for (reloc_sec, reloc_offset), (reloc_kind, reloc_import_num) in relocs.items():
|
|
if reloc_sec == table_of_contents['section'] and reloc_kind == 'import':
|
|
toc_imports[reloc_offset - table_of_contents['offset']] = imports[reloc_import_num]
|
|
|
|
for sec in section_list:
|
|
if 'code' not in sec['filename']: continue
|
|
code = read_bin(basepath, sec['filename'])
|
|
|
|
gluescan = []
|
|
for ofs in range(0, len(code) - 23, 4):
|
|
for a, b in zip(code[ofs:ofs+24], b'\x81\x82\xff\xff\x90\x41\x00\x14\x80\x0c\x00\x00\x80\x4c\x00\x04\x7c\x09\x03\xa6\x4e\x80\x04\x20'):
|
|
if a != b and b != 0xFF: break
|
|
else:
|
|
toc_ofs, = struct.unpack_from('>h', code, ofs+2)
|
|
try:
|
|
codelocs_xtocglue.append(dict(section=sec['filename'], offset=ofs, function=toc_imports[toc_ofs]))
|
|
except KeyError:
|
|
# The glue points inwards. This is quite rare, so just ignore it
|
|
pass
|
|
|
|
|
|
codelocs_xtocglue.sort(key=lambda dct: tuple(dct.values()))
|
|
write_python(codelocs_xtocglue, basepath, 'hdump', 'codelocs-xtocglue.txt')
|
|
|
|
|
|
# MacsBug symbol locations
|
|
codelocs_macsbug = []
|
|
|
|
for idx, sec in enumerate(section_list):
|
|
if sec['sectionKind'] != 'code': continue
|
|
|
|
code = read_bin(basepath, sec['filename'])
|
|
|
|
end_offset = 0
|
|
for i in range(0, len(code) - 17, 4):
|
|
guts = struct.unpack_from('>IIIIxB', code, i)
|
|
|
|
if guts[0] != 0: continue
|
|
|
|
if len(code) < i + 18 + guts[-1]: continue
|
|
name = code[i + 18:][:guts[-1]]
|
|
|
|
if i - guts[3] < end_offset: continue
|
|
if guts[3] % 4 != 0: continue
|
|
|
|
if not re.match(rb'^\w+$', name): continue
|
|
|
|
end_offset = i + 18 # whatever
|
|
|
|
# now interpret properly
|
|
code_ofs = i - guts[3]
|
|
code_len = guts[3]
|
|
|
|
codelocs_macsbug.append(dict(section=sec['filename'], offset=code_ofs, function=name.decode('ascii')))
|
|
|
|
codelocs_macsbug.sort(key=lambda dct: tuple(dct.values()))
|
|
write_python(codelocs_macsbug, basepath, 'hdump', 'codelocs-macsbug.txt')
|
|
|
|
|
|
# Driver description
|
|
for exp in exports:
|
|
if exp['kind'] == 'data' and exp['name'] == 'TheDriverDescription':
|
|
secdata = read_bin(basepath, exp['section'])
|
|
ofs = exp['offset']
|
|
|
|
desc = list(struct.unpack_from('>4s L 32s L L 32s 32x L', secdata, ofs))
|
|
|
|
known_bits = {
|
|
0x1: 'kDriverIsLoadedUponDiscovery',
|
|
0x2: 'kDriverIsOpenedUponLoad',
|
|
0x4: 'kDriverIsUnderExpertControl',
|
|
0x8: 'kDriverIsConcurrent',
|
|
0x10: 'kDriverQueuesIOPB',
|
|
0x20: 'kDriverIsLoadedAtBoot',
|
|
0x40: 'kDriverIsForVirtualDevice',
|
|
}
|
|
|
|
bits = []
|
|
for i in range(32):
|
|
if desc[4] & (1 << i):
|
|
bits.append(known_bits.get(1 << i, hex(1 << i)))
|
|
bits = '|'.join(bits) or '0'
|
|
|
|
ofs += 0x74
|
|
services = []
|
|
for i in range(desc[6]): # nServices
|
|
svc = struct.unpack_from('>4s 4s L', secdata, ofs)
|
|
services.append({
|
|
'serviceCategory': svc[0].decode('mac_roman'),
|
|
'serviceType': svc[1].decode('mac_roman'),
|
|
'serviceVersion': parse_mac_version(svc[2]),
|
|
})
|
|
ofs += 12
|
|
|
|
desc = {
|
|
'driverDescSignature': desc[0].decode('mac_roman'),
|
|
'driverDescVersion': desc[1],
|
|
'driverType': {
|
|
'nameInfoStr': pstring_or_cstring(desc[2]).decode('mac_roman'),
|
|
'version': parse_mac_version(desc[3]),
|
|
},
|
|
'driverOSRuntimeInfo': {
|
|
'driverRuntime': bits,
|
|
'driverName': pstring_or_cstring(desc[5]).decode('mac_roman'),
|
|
},
|
|
'driverServices': services,
|
|
}
|
|
|
|
write_python(desc, basepath, 'hdump', 'driver-description.txt')
|
|
break
|
|
|
|
|
|
# USB driver description
|
|
for exp in exports:
|
|
if exp['kind'] == 'data' and exp['name'] == 'TheUSBDriverDescription':
|
|
secdata = read_bin(basepath, exp['section'])
|
|
ofs = exp['offset']
|
|
|
|
desc = list(struct.unpack_from('>4sL HHHH BBBBBx 32sBBL L', secdata, ofs))
|
|
|
|
known_bits = {
|
|
0x1: 'kUSBDoNotMatchGenericDevice',
|
|
0x2: 'kUSBDoNotMatchInterface',
|
|
0x4: 'kUSBProtocolMustMatch',
|
|
0x8: 'kUSBInterfaceMatchOnly',
|
|
}
|
|
|
|
bits = []
|
|
for i in range(32):
|
|
if desc[15] & (1 << i):
|
|
bits.append(known_bits.get(1 << i, hex(1 << i)))
|
|
bits = '|'.join(bits) or '0'
|
|
|
|
desc = {
|
|
'usbDriverDescSignature': desc[0].decode('mac_roman'),
|
|
'usbDriverDescVersion': desc[1],
|
|
'usbDeviceInfo': {
|
|
'usbVendorID': desc[2],
|
|
'usbProductID': desc[3],
|
|
'usbDeviceReleaseNumber': desc[4],
|
|
'usbDeviceProtocol': desc[5],
|
|
},
|
|
'usbInterfaceInfo': {
|
|
'usbConfigValue': desc[6],
|
|
'usbInterfaceNum': desc[7],
|
|
'usbInterfaceClass': desc[8],
|
|
'usbInterfaceSubClass': desc[9],
|
|
'usbInterfaceProtocol': desc[10],
|
|
},
|
|
'usbDriverType': {
|
|
'nameInfoStr': pstring_or_cstring(desc[11]).decode('mac_roman'),
|
|
'usbDriverClass': desc[12],
|
|
'usbDriverSubClass': desc[13],
|
|
'usbDriverVersion': parse_mac_version(desc[14]),
|
|
},
|
|
'usbDriverLoadingOptions': bits,
|
|
}
|
|
|
|
write_python(desc, basepath, 'hdump', 'usb-driver-description.txt')
|
|
break
|
|
|
|
|
|
def format_mac_date(srcint):
|
|
"""Render a 32-bit MacOS date to ISO 8601 format
|
|
"""
|
|
|
|
dt = datetime.datetime(1904, 1, 1) + datetime.timedelta(seconds=srcint)
|
|
return dt.isoformat().replace('T', ' ')
|
|
|
|
|
|
def parse_mac_date(x):
|
|
"""Pack an ISO 8601 date into a 32-bit MacOS date
|
|
"""
|
|
|
|
epoch = '19040101000000' # ISO8601 with the non-numerics stripped
|
|
|
|
# strip non-numerics and pad out using the epoch (cheeky)
|
|
stripped = ''.join(c for c in x if c in '0123456789')
|
|
stripped = stripped[:len(epoch)] + epoch[len(stripped):]
|
|
|
|
tformat = '%Y%m%d%H%M%S'
|
|
|
|
delta = datetime.datetime.strptime(stripped, tformat) - datetime.datetime.strptime(epoch, tformat)
|
|
delta = int(delta.total_seconds())
|
|
|
|
delta = min(delta, 0xFFFFFFFF)
|
|
delta = max(delta, 0)
|
|
|
|
return delta
|
|
|
|
|
|
def parse_mac_version(num):
|
|
maj, minbug, stage, unreleased = num.to_bytes(4, byteorder='big')
|
|
|
|
maj = '%x' % maj
|
|
minor, bugfix = '%02x' % minbug
|
|
|
|
if stage == 0x80:
|
|
stage = 'f'
|
|
elif stage == 0x60:
|
|
stage = 'b'
|
|
elif stage == 0x40:
|
|
stage = 'a'
|
|
elif stage == 0x20:
|
|
stage = 'd'
|
|
else:
|
|
stage = '?'
|
|
|
|
unreleased = '%d' % unreleased
|
|
|
|
vers = maj + '.' + minor
|
|
|
|
if bugfix != '0':
|
|
vers += '.' + bugfix
|
|
|
|
if (stage, unreleased) != ('f', '0'):
|
|
vers += stage + unreleased
|
|
|
|
return vers
|
|
|
|
|
|
def pstring_or_cstring(s):
|
|
plen = s[0]
|
|
pstr = s[1:][:plen]
|
|
cstr = s.rstrip(b'\0')
|
|
if b'\0' in pstr or plen + 1 > len(s):
|
|
return cstr
|
|
else:
|
|
return pstr
|
|
|
|
|
|
def _sec_kind_is_instantiated(sec_kind):
|
|
return sec_kind not in ('loader', 'debug', 'exception', 'traceback')
|
|
|
|
|
|
def _strip_zeroes_leaving_some(data, leaving):
|
|
stripped = data.rstrip(b'\0')
|
|
|
|
while len(stripped) < len(data) and data[len(stripped)] == 0:
|
|
stripped += b'\0'
|
|
|
|
return stripped
|
|
|
|
|
|
def _possible_intended_alignments(offset):
|
|
possible = list(1 << n for n in range(32))
|
|
|
|
possible = [p for p in possible if offset % p == 0]
|
|
|
|
return possible
|
|
|
|
|
|
def _sec_kind_min_align(sec_kind):
|
|
if sec_kind in ('code', 'data', 'rodata', 'codedata'):
|
|
return 16
|
|
else:
|
|
return 4
|
|
|
|
|
|
def read_python(*path_parts):
|
|
return eval(read_txt(*path_parts))
|
|
|
|
def read_txt(*path_parts):
|
|
with open(path.join(*path_parts), 'r') as f:
|
|
return f.read().rstrip('\n')
|
|
|
|
def read_bin(*path_parts):
|
|
with open(path.join(*path_parts), 'rb') as f:
|
|
return f.read()
|
|
|
|
|
|
def write_python(python, *path_parts):
|
|
write_txt(repr(python), *path_parts)
|
|
|
|
def write_txt(txt, *path_parts):
|
|
write_bin((txt + '\n').encode('utf-8'), *path_parts)
|
|
|
|
def write_bin(bin, *path_parts):
|
|
path_parts = path.join(*path_parts)
|
|
os.makedirs(path.dirname(path_parts), exist_ok=True)
|
|
|
|
# Write only if changed (slightly hacky)
|
|
try:
|
|
if path.getsize(path_parts) != len(bin): raise Exception
|
|
with open(path_parts, 'rb') as f:
|
|
if f.read() != bin: raise Exception
|
|
except:
|
|
with open(path_parts, 'wb') as f:
|
|
f.write(bin)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(description='''
|
|
Convert between a Code Fragment Manager binary and an easily-edited dump directory.
|
|
The extra info (low/high-level) in ldump/ and hdump/ is ignored when rebuilding.
|
|
''')
|
|
|
|
# parser.add_argument('--gather', action='store_true', help='Binary or directory')
|
|
parser.add_argument('src', metavar='SOURCE', action='store', help='Binary or directory')
|
|
parser.add_argument('dest', metavar='DEST', action='store', help='Directory or binary')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if path.isdir(args.src):
|
|
build(args.src, args.dest)
|
|
else:
|
|
dump(args.src, args.dest)
|