From 9a320c71f7898c74a09fc480cdc15ae9e05da1c2 Mon Sep 17 00:00:00 2001 From: Elliot Nunn Date: Tue, 24 Sep 2019 16:40:26 +0800 Subject: [PATCH] Use the actual names of ndrv parcels, don't guess --- tbxi/parcels_dump.py | 143 +++++++++++-------------------- tbxi/pef_info.py | 194 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 94 deletions(-) create mode 100644 tbxi/pef_info.py diff --git a/tbxi/parcels_dump.py b/tbxi/parcels_dump.py index 4ee6eb2..bef4d0b 100644 --- a/tbxi/parcels_dump.py +++ b/tbxi/parcels_dump.py @@ -3,11 +3,13 @@ import os from os import path from shlex import quote import struct +import hashlib from . import dispatcher from .slow_lzss import decompress from .lowlevel import PrclNodeStruct, PrclChildStruct +from .pef_info import suggest_name HEADER_COMMENT = """ @@ -44,6 +46,10 @@ HEADER_COMMENT = """ """.strip() +def quickhash(foo): + return hashlib.sha512(foo).hexdigest() + + def walk_tree(binary): """Get low level representation of tree @@ -71,83 +77,28 @@ def unique_binary_tpl(prclchild): return (prclchild.ptr, prclchild.packedlen, prclchild.compress) -def suggest_names_to_dump(parent, child, code_name): - # We yield heaps of suggested filenames, and the shortest non-empty unique one gets chosen +def guess_binary_name(parent_struct, child_struct, adjacent_name, data): + # 4 MB ROM-in-RAM image + if parent_struct.ostype == child_struct.ostype == 'rom ': + return 'MacROM' - if parent.ostype == child.ostype == 'rom ': - yield 'MacROM' - return + # Native (PCI) driver with an embedded name and version + ndrv_name = suggest_name(data) + if ndrv_name: return ndrv_name - if 'AAPL,MacOS,PowerPC' in child.name and code_name == 'PowerMgrPlugin': - if parent.a == 'cuda' and parent.b == 'via-cuda': - yield 'PowerMgrPlugin.CUDA' - elif parent.a == 'pmu' and parent.b == 'power-mgt': - yield 'PowerMgrPlugin.PMU' - elif parent.a == 'via-pmu-99' and parent.b == 'power-mgt': - yield 'PowerMgrPlugin.PMU99' - elif parent.a == 'via-pmu-2000' and parent.b == 'power-mgt': - yield 'PowerMgrPlugin.PMU2000' - elif parent.a == 'bal' and parent.b == 'power-mgt': - yield 'PowerMgrPlugin.BlueBox' + # A "special" property called by its actual name + if parent_struct.flags & 0xF0000 or child_struct.flags & 0x80: + return child_struct.name - if ',' not in child.name: # All property names except driver,AAPL,MacOS,pef et al - yield child.name + # A driver property with an adjacent name property + if 'AAPL,MacOS,PowerPC' in child_struct.name and adjacent_name: + return adjacent_name - if child.flags & 0x80: # special-node stuff - yield child.name - yield squish_name(child.name, parent.a, parent.b) + # A lanLib (for netbooting) + if child_struct.name == 'lanLib,AAPL,MacOS,PowerPC': + return parent_struct.a - if 'AAPL,MacOS,PowerPC' in child.name: - if code_name: - yield squish_name(code_name, parent.a, parent.b) - else: - yield squish_name(parent.a, parent.b) - - -def squish_name(*parts): - squeeze = lambda x: x.lower().replace('-', '').replace('_', '') - - parts = list(parts) - keepmask = [True] * len(parts) - - for i in range(len(parts)): - for j in range(len(parts)): - if i == j: continue - if squeeze(parts[j]) == squeeze(parts[i]): - if j > i: keepmask[j] = False - elif squeeze(parts[j]) in squeeze(parts[i]): - keepmask[j] = False - - truelist = [] - for i in range(len(parts)): - if keepmask[i]: truelist.append(parts[i]) - - return '.'.join(truelist) - - -def settle_name_votes(vote_dict): - # Forbid duplicate names - duplicate_names = set(['']) - for ka, va in vote_dict.items(): - for kb, vb in vote_dict.items(): - if ka is kb: continue - - for x in va: - if x in vb: - duplicate_names.add(x) - - # Pick the shortest non-duplicate name - decision = {} - for k, v in vote_dict.items(): - allowed_names = [x for x in v if x not in duplicate_names] - if allowed_names: - decision[k] = min(allowed_names, key=len) - - return decision - - -def is_parcels(binary): - return binary.startswith(b'prcl') + return '' def dump(binary, dest_dir): @@ -159,6 +110,7 @@ def dump(binary, dest_dir): # Decompress everything unpacked_dict = {} + binary_of = lambda child: unpacked_dict[unique_binary_tpl(child)] binary_counts = Counter() for prclnode, children in basic_structure: for prclchild in children: @@ -169,36 +121,39 @@ def dump(binary, dest_dir): unpacked_dict[unique_binary_tpl(prclchild)] = data - # Suggest possible filenames for each blob - name_vote_dict = defaultdict(list) + filename_dict = {} # maps binary data to a filename for prclnode, children in basic_structure: - # is there a prop that gives contextual name information? + # A fragment prop may have an adjacent prop giving it a name, get this ready + adjacent_name = None for check_child in children: if check_child.name == 'code,AAPL,MacOS,name': - code_name = unpacked_dict[unique_binary_tpl(check_child)].rstrip(b'\0').decode('ascii') - break - else: - code_name = None + adjacent_name = unpacked_dict[unique_binary_tpl(check_child)].rstrip(b'\0').decode('ascii') - # now use that name to suggest names for all the children + # Best guess original-ish name for this binary for prclchild in children: - if prclchild.ostype in ('cstr', 'csta'): continue - votes = suggest_names_to_dump(prclnode, prclchild, code_name) - if unpacked_dict[unique_binary_tpl(prclchild)].startswith(b'Joy!'): - votes = [v + '.pef' for v in votes] - name_vote_dict[unique_binary_tpl(prclchild)].extend(votes) + if prclchild.ostype not in ('cstr', 'csta'): + base = guess_binary_name( + parent_struct=prclnode, + child_struct=prclchild, + adjacent_name=adjacent_name, + data=binary_of(prclchild), + ) + filename_dict[binary_of(prclchild)] = base - # Decide on filenames - decision = settle_name_votes(name_vote_dict) + # Post-process to ensure that all names are unique + used_names = Counter(filename_dict.values()) + for binary, filename in list(filename_dict.items()): + if used_names[filename] > 1: + if filename: filename += '-' + filename += quickhash(binary) + filename_dict[binary] = filename + + filename_dict = {b: (fn+'.pef' if b.startswith(b'Joy!peff') else fn) for (b, fn) in filename_dict.items()} # Dump blobs to disk - for tpl, filename in decision.items(): - keep_this = True - - data = unpacked_dict[tpl] + for data, filename in filename_dict.items(): dispatcher.dump(data, path.join(dest_dir, filename)) - # Get printing!!! with open(path.join(dest_dir, 'Parcelfile'), 'w') as f: f.write(HEADER_COMMENT + '\n\n') @@ -217,9 +172,9 @@ def dump(binary, dest_dir): if prclchild.name: line += ' name=%s' % quote(prclchild.name) if prclchild.ostype not in ('cstr', 'csta'): - filename = decision[unique_binary_tpl(prclchild)] + filename = filename_dict[binary_of(prclchild)] if prclchild.compress == 'lzss': filename += '.lzss' - line += ' src=%s' % filename + line += ' src=%s' % quote(filename) if binary_counts[unique_binary_tpl(prclchild)] > 1: line += ' deduplicate=1' diff --git a/tbxi/pef_info.py b/tbxi/pef_info.py new file mode 100644 index 0000000..3a99282 --- /dev/null +++ b/tbxi/pef_info.py @@ -0,0 +1,194 @@ +# Some scrounged code to give name/version suggestions for NDRVs + + +import struct + + +MAGIC = b'Joy!peff' + + +class PEF: + CONT_HEAD_FMT = '>4s4s4s5I2HI' + CONT_HEAD_LEN = struct.calcsize(CONT_HEAD_FMT) + + SEC_HEAD_FMT = '>i5I4B' + SEC_HED_LEN = struct.calcsize(SEC_HEAD_FMT) + + def __init__(self, data): + if not data.startswith(MAGIC): raise ValueError('not a pef') + + (magic, fourcc, arch, ver, + timestamp, old_def_ver, old_imp_ver, cur_ver, + sec_count, inst_sec_count, reserv) = struct.unpack_from(self.CONT_HEAD_FMT, data) + + sec_earliest = len(data) + sec_latest = 0 + + self.sections = [] + self.sectypes = [] + self.headeroffsets = [] + + self.code = None + + for i in range(sec_count): + sh_offset = self.CONT_HEAD_LEN + self.SEC_HED_LEN*i + + (sectionName, sectionAddress, execSize, + initSize, rawSize, containerOffset, + regionKind, shareKind, alignment, reserved) = struct.unpack_from(self.SEC_HEAD_FMT, data, sh_offset) + + the_sec = data[containerOffset : containerOffset + rawSize] + + if regionKind == 0 and execSize == initSize == rawSize: + the_sec = bytearray(the_sec) + self.code = the_sec + + self.sections.append(the_sec) + self.sectypes.append(regionKind) + self.headeroffsets.append(sh_offset) + + sec_earliest = min(sec_earliest, containerOffset) + sec_latest = max(sec_latest, containerOffset + rawSize) + + if any(data[sec_latest:]): + print('nonzero trailing data from', hex(sec_latest), 'to', hex(len(data)), ' ... will cause incorrect output') + + self.padmult = 1 + while len(data) % (self.padmult * 2) == 0: + self.padmult *= 2 + + self.header = data[:sec_earliest] + + def __bytes__(self): + accum = bytearray(self.header) + + for i in range(len(self.sections)): + the_sec = self.sections[i] + hoff = self.headeroffsets[i] + + while len(accum) % 16: + accum.append(0) + + new_off = len(accum) + new_len = len(the_sec) + + accum.extend(the_sec) + + struct.pack_into('>I', accum, hoff + 20, new_off) + + if the_sec is self.code: + for i in range(8, 20, 4): + struct.pack_into('>I', accum, hoff + i, new_len) + + while len(accum) % self.padmult != 0: + accum.extend(b'\x00') + + return bytes(accum) + + +def pidata(packed): + def pullarg(from_iter): + arg = 0 + for i in range(4): + cont = next(from_iter) + arg <<= 7 + arg |= cont & 0x7f + if not (cont & 0x80): break + else: + raise ValueError('arg spread over too many bytes') + return arg + + packed = iter(packed) + unpacked = bytearray() + + for b in packed: + opcode = b >> 5 + arg = b & 0b11111 or pullarg(packed) + + if opcode == 0b000: # zero + count = arg + unpacked.extend(b'\0' * count) + + elif opcode == 0b001: # blockCopy + blockSize = arg + for i in range(blockSize): + unpacked.append(next(packed)) + + elif opcode == 0b010: # repeatedBlock + blockSize = arg + repeatCount = pullarg(packed) + 1 + rawData = bytes(next(packed) for n in range(blockSize)) + for n in range(repeatCount): + unpacked.extend(rawData) + + elif opcode == 0b011 or opcode == 0b100: # interleaveRepeatBlockWithBlockCopy + commonSize = arg # or interleaveRepeatBlockWithZero + customSize = pullarg(packed) + repeatCount = pullarg(packed) + + if opcode == 0b011: + commonData = bytes(next(packed) for n in range(commonSize)) + else: + commonData = b'\0' * commonSize + + for i in range(repeatCount): + unpacked.extend(commonData) + for j in range(customSize): + unpacked.append(next(packed)) + unpacked.extend(commonData) + + else: + raise ValueError('unknown pidata opcode/arg %s/%d' % (bin(opcode), arg)) + return + + return bytes(unpacked) + + +def parse_version(num): + maj, minbug, stage, unreleased = num.to_bytes(4, byteorder='big') + + maj = '%x' % maj + minor, bugfix = '%02x' % minbug + + if stage == 0x80: + stage = 'f' + elif stage == 0x60: + stage = 'b' + elif stage == 0x40: + stage = 'a' + elif stage == 0x20: + stage = 'd' + else: + stage = '?' + + unreleased = '%0x' % unreleased + + vers = maj + '.' + minor + + if bugfix != '0': + vers += '.' + bugfix + + if (stage, unreleased) != ('f', '0'): + vers += stage + unreleased + + return vers + + +def suggest_name(pef): + if not pef.startswith(b'Joy!peff'): return + + try: + pef = PEF(pef) + + for sectype, section in zip(pef.sectypes, pef.sections): + if sectype == 2: section = pidata(section) + + if section and sectype in (1, 2): + hdr_ofs = section.find(b'mtej') + if hdr_ofs != -1: + sig, strvers, devnam, drvvers = struct.unpack_from('>4s L 32p L', section, hdr_ofs) + + sugg = devnam.decode('mac_roman') + '-' + parse_version(drvvers) + return sugg + except: + pass # do not complain about corrupt PEFs