Use the actual names of ndrv parcels, don't guess

This commit is contained in:
Elliot Nunn 2019-09-24 16:40:26 +08:00
parent 6ea035cf52
commit 9a320c71f7
2 changed files with 243 additions and 94 deletions

View File

@ -3,11 +3,13 @@ import os
from os import path
from shlex import quote
import struct
import hashlib
from . import dispatcher
from .slow_lzss import decompress
from .lowlevel import PrclNodeStruct, PrclChildStruct
from .pef_info import suggest_name
HEADER_COMMENT = """
@ -44,6 +46,10 @@ HEADER_COMMENT = """
""".strip()
def quickhash(foo):
return hashlib.sha512(foo).hexdigest()
def walk_tree(binary):
"""Get low level representation of tree
@ -71,83 +77,28 @@ def unique_binary_tpl(prclchild):
return (prclchild.ptr, prclchild.packedlen, prclchild.compress)
def suggest_names_to_dump(parent, child, code_name):
# We yield heaps of suggested filenames, and the shortest non-empty unique one gets chosen
def guess_binary_name(parent_struct, child_struct, adjacent_name, data):
# 4 MB ROM-in-RAM image
if parent_struct.ostype == child_struct.ostype == 'rom ':
return 'MacROM'
if parent.ostype == child.ostype == 'rom ':
yield 'MacROM'
return
# Native (PCI) driver with an embedded name and version
ndrv_name = suggest_name(data)
if ndrv_name: return ndrv_name
if 'AAPL,MacOS,PowerPC' in child.name and code_name == 'PowerMgrPlugin':
if parent.a == 'cuda' and parent.b == 'via-cuda':
yield 'PowerMgrPlugin.CUDA'
elif parent.a == 'pmu' and parent.b == 'power-mgt':
yield 'PowerMgrPlugin.PMU'
elif parent.a == 'via-pmu-99' and parent.b == 'power-mgt':
yield 'PowerMgrPlugin.PMU99'
elif parent.a == 'via-pmu-2000' and parent.b == 'power-mgt':
yield 'PowerMgrPlugin.PMU2000'
elif parent.a == 'bal' and parent.b == 'power-mgt':
yield 'PowerMgrPlugin.BlueBox'
# A "special" property called by its actual name
if parent_struct.flags & 0xF0000 or child_struct.flags & 0x80:
return child_struct.name
if ',' not in child.name: # All property names except driver,AAPL,MacOS,pef et al
yield child.name
# A driver property with an adjacent name property
if 'AAPL,MacOS,PowerPC' in child_struct.name and adjacent_name:
return adjacent_name
if child.flags & 0x80: # special-node stuff
yield child.name
yield squish_name(child.name, parent.a, parent.b)
# A lanLib (for netbooting)
if child_struct.name == 'lanLib,AAPL,MacOS,PowerPC':
return parent_struct.a
if 'AAPL,MacOS,PowerPC' in child.name:
if code_name:
yield squish_name(code_name, parent.a, parent.b)
else:
yield squish_name(parent.a, parent.b)
def squish_name(*parts):
squeeze = lambda x: x.lower().replace('-', '').replace('_', '')
parts = list(parts)
keepmask = [True] * len(parts)
for i in range(len(parts)):
for j in range(len(parts)):
if i == j: continue
if squeeze(parts[j]) == squeeze(parts[i]):
if j > i: keepmask[j] = False
elif squeeze(parts[j]) in squeeze(parts[i]):
keepmask[j] = False
truelist = []
for i in range(len(parts)):
if keepmask[i]: truelist.append(parts[i])
return '.'.join(truelist)
def settle_name_votes(vote_dict):
# Forbid duplicate names
duplicate_names = set([''])
for ka, va in vote_dict.items():
for kb, vb in vote_dict.items():
if ka is kb: continue
for x in va:
if x in vb:
duplicate_names.add(x)
# Pick the shortest non-duplicate name
decision = {}
for k, v in vote_dict.items():
allowed_names = [x for x in v if x not in duplicate_names]
if allowed_names:
decision[k] = min(allowed_names, key=len)
return decision
def is_parcels(binary):
return binary.startswith(b'prcl')
return ''
def dump(binary, dest_dir):
@ -159,6 +110,7 @@ def dump(binary, dest_dir):
# Decompress everything
unpacked_dict = {}
binary_of = lambda child: unpacked_dict[unique_binary_tpl(child)]
binary_counts = Counter()
for prclnode, children in basic_structure:
for prclchild in children:
@ -169,36 +121,39 @@ def dump(binary, dest_dir):
unpacked_dict[unique_binary_tpl(prclchild)] = data
# Suggest possible filenames for each blob
name_vote_dict = defaultdict(list)
filename_dict = {} # maps binary data to a filename
for prclnode, children in basic_structure:
# is there a prop that gives contextual name information?
# A fragment prop may have an adjacent prop giving it a name, get this ready
adjacent_name = None
for check_child in children:
if check_child.name == 'code,AAPL,MacOS,name':
code_name = unpacked_dict[unique_binary_tpl(check_child)].rstrip(b'\0').decode('ascii')
break
else:
code_name = None
adjacent_name = unpacked_dict[unique_binary_tpl(check_child)].rstrip(b'\0').decode('ascii')
# now use that name to suggest names for all the children
# Best guess original-ish name for this binary
for prclchild in children:
if prclchild.ostype in ('cstr', 'csta'): continue
votes = suggest_names_to_dump(prclnode, prclchild, code_name)
if unpacked_dict[unique_binary_tpl(prclchild)].startswith(b'Joy!'):
votes = [v + '.pef' for v in votes]
name_vote_dict[unique_binary_tpl(prclchild)].extend(votes)
if prclchild.ostype not in ('cstr', 'csta'):
base = guess_binary_name(
parent_struct=prclnode,
child_struct=prclchild,
adjacent_name=adjacent_name,
data=binary_of(prclchild),
)
filename_dict[binary_of(prclchild)] = base
# Decide on filenames
decision = settle_name_votes(name_vote_dict)
# Post-process to ensure that all names are unique
used_names = Counter(filename_dict.values())
for binary, filename in list(filename_dict.items()):
if used_names[filename] > 1:
if filename: filename += '-'
filename += quickhash(binary)
filename_dict[binary] = filename
filename_dict = {b: (fn+'.pef' if b.startswith(b'Joy!peff') else fn) for (b, fn) in filename_dict.items()}
# Dump blobs to disk
for tpl, filename in decision.items():
keep_this = True
data = unpacked_dict[tpl]
for data, filename in filename_dict.items():
dispatcher.dump(data, path.join(dest_dir, filename))
# Get printing!!!
with open(path.join(dest_dir, 'Parcelfile'), 'w') as f:
f.write(HEADER_COMMENT + '\n\n')
@ -217,9 +172,9 @@ def dump(binary, dest_dir):
if prclchild.name: line += ' name=%s' % quote(prclchild.name)
if prclchild.ostype not in ('cstr', 'csta'):
filename = decision[unique_binary_tpl(prclchild)]
filename = filename_dict[binary_of(prclchild)]
if prclchild.compress == 'lzss': filename += '.lzss'
line += ' src=%s' % filename
line += ' src=%s' % quote(filename)
if binary_counts[unique_binary_tpl(prclchild)] > 1:
line += ' deduplicate=1'

194
tbxi/pef_info.py Normal file
View File

@ -0,0 +1,194 @@
# Some scrounged code to give name/version suggestions for NDRVs
import struct
MAGIC = b'Joy!peff'
class PEF:
CONT_HEAD_FMT = '>4s4s4s5I2HI'
CONT_HEAD_LEN = struct.calcsize(CONT_HEAD_FMT)
SEC_HEAD_FMT = '>i5I4B'
SEC_HED_LEN = struct.calcsize(SEC_HEAD_FMT)
def __init__(self, data):
if not data.startswith(MAGIC): raise ValueError('not a pef')
(magic, fourcc, arch, ver,
timestamp, old_def_ver, old_imp_ver, cur_ver,
sec_count, inst_sec_count, reserv) = struct.unpack_from(self.CONT_HEAD_FMT, data)
sec_earliest = len(data)
sec_latest = 0
self.sections = []
self.sectypes = []
self.headeroffsets = []
self.code = None
for i in range(sec_count):
sh_offset = self.CONT_HEAD_LEN + self.SEC_HED_LEN*i
(sectionName, sectionAddress, execSize,
initSize, rawSize, containerOffset,
regionKind, shareKind, alignment, reserved) = struct.unpack_from(self.SEC_HEAD_FMT, data, sh_offset)
the_sec = data[containerOffset : containerOffset + rawSize]
if regionKind == 0 and execSize == initSize == rawSize:
the_sec = bytearray(the_sec)
self.code = the_sec
self.sections.append(the_sec)
self.sectypes.append(regionKind)
self.headeroffsets.append(sh_offset)
sec_earliest = min(sec_earliest, containerOffset)
sec_latest = max(sec_latest, containerOffset + rawSize)
if any(data[sec_latest:]):
print('nonzero trailing data from', hex(sec_latest), 'to', hex(len(data)), ' ... will cause incorrect output')
self.padmult = 1
while len(data) % (self.padmult * 2) == 0:
self.padmult *= 2
self.header = data[:sec_earliest]
def __bytes__(self):
accum = bytearray(self.header)
for i in range(len(self.sections)):
the_sec = self.sections[i]
hoff = self.headeroffsets[i]
while len(accum) % 16:
accum.append(0)
new_off = len(accum)
new_len = len(the_sec)
accum.extend(the_sec)
struct.pack_into('>I', accum, hoff + 20, new_off)
if the_sec is self.code:
for i in range(8, 20, 4):
struct.pack_into('>I', accum, hoff + i, new_len)
while len(accum) % self.padmult != 0:
accum.extend(b'\x00')
return bytes(accum)
def pidata(packed):
def pullarg(from_iter):
arg = 0
for i in range(4):
cont = next(from_iter)
arg <<= 7
arg |= cont & 0x7f
if not (cont & 0x80): break
else:
raise ValueError('arg spread over too many bytes')
return arg
packed = iter(packed)
unpacked = bytearray()
for b in packed:
opcode = b >> 5
arg = b & 0b11111 or pullarg(packed)
if opcode == 0b000: # zero
count = arg
unpacked.extend(b'\0' * count)
elif opcode == 0b001: # blockCopy
blockSize = arg
for i in range(blockSize):
unpacked.append(next(packed))
elif opcode == 0b010: # repeatedBlock
blockSize = arg
repeatCount = pullarg(packed) + 1
rawData = bytes(next(packed) for n in range(blockSize))
for n in range(repeatCount):
unpacked.extend(rawData)
elif opcode == 0b011 or opcode == 0b100: # interleaveRepeatBlockWithBlockCopy
commonSize = arg # or interleaveRepeatBlockWithZero
customSize = pullarg(packed)
repeatCount = pullarg(packed)
if opcode == 0b011:
commonData = bytes(next(packed) for n in range(commonSize))
else:
commonData = b'\0' * commonSize
for i in range(repeatCount):
unpacked.extend(commonData)
for j in range(customSize):
unpacked.append(next(packed))
unpacked.extend(commonData)
else:
raise ValueError('unknown pidata opcode/arg %s/%d' % (bin(opcode), arg))
return
return bytes(unpacked)
def parse_version(num):
maj, minbug, stage, unreleased = num.to_bytes(4, byteorder='big')
maj = '%x' % maj
minor, bugfix = '%02x' % minbug
if stage == 0x80:
stage = 'f'
elif stage == 0x60:
stage = 'b'
elif stage == 0x40:
stage = 'a'
elif stage == 0x20:
stage = 'd'
else:
stage = '?'
unreleased = '%0x' % unreleased
vers = maj + '.' + minor
if bugfix != '0':
vers += '.' + bugfix
if (stage, unreleased) != ('f', '0'):
vers += stage + unreleased
return vers
def suggest_name(pef):
if not pef.startswith(b'Joy!peff'): return
try:
pef = PEF(pef)
for sectype, section in zip(pef.sectypes, pef.sections):
if sectype == 2: section = pidata(section)
if section and sectype in (1, 2):
hdr_ofs = section.find(b'mtej')
if hdr_ofs != -1:
sig, strvers, devnam, drvvers = struct.unpack_from('>4s L 32p L', section, hdr_ofs)
sugg = devnam.decode('mac_roman') + '-' + parse_version(drvvers)
return sugg
except:
pass # do not complain about corrupt PEFs