diff --git a/machfs/directory.py b/machfs/directory.py index 73c0606..ad8d421 100644 --- a/machfs/directory.py +++ b/machfs/directory.py @@ -9,7 +9,17 @@ TEXT_TYPES = [b'TEXT', b'ttro'] # Teach Text read-only def _unsyncability(name): # files named '_' reserved for directory Finder info - return name.endswith(('.rdump', '.idump')) or name.startswith('.') or name == '_' + if path.splitext(name)[1].lower() in ('.rdump', '.idump'): return True + if name.startswith('.'): return True + if name == '_': return True + if len(name) > 31: return True + + try: + name.encode('mac_roman') + except UnicodeEncodeError: + return True + + return False def _fuss_if_unsyncable(name): if _unsyncability(name): @@ -21,6 +31,37 @@ def _try_delete(name): except FileNotFoundError: pass +def _symlink_rel(src, dst): + rel_path_src = path.relpath(src, path.dirname(dst)) + os.symlink(rel_path_src, dst) + +def _get_datafork_paths(base): + """Symlinks are NOT GOOD""" + base = path.abspath(path.realpath(base)) + for dirpath, dirnames, filenames in os.walk(base): + dirnames[:] = [x for x in dirnames if not _unsyncability(x)] + filenames[:] = [x for x in filenames if not _unsyncability(x)] + + for kindcode, the_list in ((0, filenames), (1, dirnames)): + for fname in the_list: + nativepath = path.join(dirpath, fname) + hfspath = tuple(_swapsep(c) for c in path.relpath(nativepath, base).split(path.sep)) + + hfslink = kindcode # if not a link then default to this + + if path.islink(nativepath): + nativelink = path.realpath(nativepath) + if len(path.commonpath((nativelink, base))) < len(base): continue + + hfslink = tuple(_swapsep(c) for c in path.relpath(nativelink, base).split(path.sep)) + if hfslink == (path.relpath('x', 'x'),): hfslink = () # nasty special case + + yield nativepath, hfspath, hfslink + +def _swapsep(n): + return n.replace(':', path.sep) + + class AbstractFolder(MutableMapping): def __init__(self, from_dict=()): @@ -32,8 +73,12 @@ class AbstractFolder(MutableMapping): if isinstance(key, tuple): if len(key) == 1: self[key[0]] = value + return + elif len(key) == 0: + raise KeyError else: self[key[0]][key[1:]] = value + return try: key = key.decode('mac_roman') @@ -50,6 +95,8 @@ class AbstractFolder(MutableMapping): if isinstance(key, tuple): if len(key) == 1: return self[key[0]] + elif len(key) == 0: + return self else: return self[key[0]][key[1:]] @@ -65,8 +112,12 @@ class AbstractFolder(MutableMapping): if isinstance(key, tuple): if len(key) == 1: del self[key[0]] + return + elif len(key) == 0: + raise KeyError else: del self[key[0]][key[1:]] + return try: key = key.decode('mac_roman') @@ -131,78 +182,57 @@ class AbstractFolder(MutableMapping): yield from self[dn]._recursive_walk(my_path=my_path+(dn,), topdown=topdown) def read_folder(self, folder_path, date=0, mpw_dates=False): - def includefilter(n): - if n.startswith('.'): return False - if n.endswith('.rdump'): return True - if n.endswith('.idump'): return True - return True - - def swapsep(n): - return n.replace(':', path.sep) - - def mkbasename(n): - base, ext = path.splitext(n) - if ext in ('.rdump', '.idump'): - return base - else: - return n - self.crdate = self.mddate = self.bkdate = date - tmptree = {folder_path: self} + deferred_aliases = [] + for nativepath, hfspath, hfslink in _get_datafork_paths(folder_path): + if hfslink == 0: # file + thefile = File(); self[hfspath] = thefile + thefile.crdate = thefile.mddate = thefile.bkdate = date - for dirpath, dirnames, filenames in os.walk(folder_path): - dirnames[:] = [swapsep(x) for x in dirnames if includefilter(x)] - filenames[:] = [swapsep(x) for x in filenames if includefilter(x)] - - for dn in dirnames: - _fuss_if_unsyncable(dn) - - newdir = Folder() - newdir.crdate = newdir.mddate = newdir.bkdate = date - tmptree[dirpath][dn] = newdir - tmptree[path.join(dirpath, dn)] = newdir - - for fn in filenames: - basename = mkbasename(fn) - _fuss_if_unsyncable(basename) - - fullbase = path.join(dirpath, basename) - fullpath = path.join(dirpath, fn) + if mpw_dates: thefile.real_t = 0 try: - thefile = tmptree[fullbase] - except KeyError: - thefile = File() - thefile.real_t = 0 # for the MPW hack - thefile.crdate = thefile.mddate = thefile.bkdate = date - thefile.contributors = [] - tmptree[fullbase] = thefile - - if fn.endswith('.idump'): - with open(fullpath, 'rb') as f: + with open(nativepath + '.idump', 'rb') as f: + if mpw_dates: thefile.real_t = max(thefile.real_t, path.getmtime(f.name)) thefile.type = f.read(4) thefile.creator = f.read(4) - elif fn.endswith('rdump'): - rez = open(fullpath, 'rb').read() - resources = parse_rez_code(rez) - resfork = make_file(resources, align=4) - thefile.rsrc = resfork - else: - thefile.data = open(fullpath, 'rb').read() + except FileNotFoundError: + pass - thefile.contributors.append(fullpath) - if mpw_dates: - thefile.real_t = max(thefile.real_t, path.getmtime(fullpath)) + try: + with open(nativepath + '.rdump', 'rb') as f: + if mpw_dates: thefile.real_t = max(thefile.real_t, path.getmtime(f.name)) + thefile.rsrc = make_file(parse_rez_code(f.read()), align=4) + except FileNotFoundError: + pass - tmptree[dirpath][basename] = thefile + with open(nativepath, 'rb') as f: + if mpw_dates: thefile.real_t = max(thefile.real_t, path.getmtime(f.name)) + thefile.data = f.read() - for pathtpl, obj in self.iter_paths(): + if thefile.type in TEXT_TYPES: + thefile.data = thefile.data.replace(b'\r\n', b'\r').replace(b'\n', b'\r') + try: + thefile.data = thefile.data.decode('utf8').encode('mac_roman') + except UnicodeEncodeError: + pass # not happy, but whatever... + + elif hfslink == 1: # folder + thedir = Folder(); self[hfspath] = thedir + thedir.crdate = thedir.mddate = thedir.bkdate = date + + else: # symlink, i.e. alias + deferred_aliases.append((hfspath, hfslink)) # alias, targetpath + + for aliaspath, targetpath in deferred_aliases: try: - if obj.type in TEXT_TYPES: - obj.data = obj.data.decode('utf8').replace('\r\n', '\r').replace('\n', '\r').encode('mac_roman') - except AttributeError: - pass + alias = File() + alias.flags |= 0x8000 + alias.aliastarget = self[targetpath] + self[aliaspath] = alias + except (KeyError, ValueError): + raise if mpw_dates: all_real_times = set() @@ -231,6 +261,8 @@ class AbstractFolder(MutableMapping): written = [] blacklist = list() + alias_fixups = list() + valid_alias_targets = dict() for p, obj in self.iter_paths(): blacklist_test = ':'.join(p) + ':' if blacklist_test.startswith(tuple(blacklist)): continue @@ -243,10 +275,15 @@ class AbstractFolder(MutableMapping): info_path = nativepath + '.idump' rsrc_path = nativepath + '.rdump' + valid_alias_targets[id(obj)] = nativepath + if isinstance(obj, Folder): os.makedirs(nativepath, exist_ok=True) elif obj.mddate != obj.bkdate or not any_exists(nativepath): + if obj.aliastarget is not None: + alias_fixups.append((nativepath, id(obj.aliastarget))) + # always write the data fork data = obj.data if obj.type in TEXT_TYPES: @@ -275,6 +312,21 @@ class AbstractFolder(MutableMapping): for w in written: os.utime(w, (t, t)) + for alias_path, target_id in alias_fixups: + try: + target_path = valid_alias_targets[target_id] + except KeyError: + pass + else: + _try_delete(alias_path) + _try_delete(alias_path + '.idump') + _try_delete(alias_path + '.rdump') + + _symlink_rel(target_path, alias_path) + for ext in ('.idump', '.rdump'): + if path.exists(target_path + ext): + _symlink_rel(target_path + ext, alias_path + ext) + class Folder(AbstractFolder): def __init__(self): @@ -298,10 +350,17 @@ class File: self.locked = False self.crdate = self.mddate = self.bkdate = 0 + self.aliastarget = None + self.rsrc = bytearray() self.data = bytearray() def __str__(self): + if isinstance(self.aliastarget, File): + return '[alias] ' + str(self.aliastarget) + elif self.aliastarget is not None: + return '[alias to folder]' + typestr, creatorstr = (x.decode('mac_roman') for x in (self.type, self.creator)) dstr, rstr = (repr(bytes(x)) if 1 <= len(x) <= 32 else '%db' % len(x) for x in (self.data, self.rsrc)) return '[%s/%s] data=%s rsrc=%s' % (typestr, creatorstr, dstr, rstr) diff --git a/machfs/main.py b/machfs/main.py index e41790c..4faa760 100644 --- a/machfs/main.py +++ b/machfs/main.py @@ -99,6 +99,85 @@ def _bb_name(name): return bitmanip.pstring(_encode_name(name)).ljust(16) +def _common_prefix(*tuples): + for i in range(min(len(t) for t in tuples)): + for t in tuples[1:]: + if t[i] != tuples[0][i]: + return i + + return 0 + + +def _link_aliases(vol_cr_date, cnid_dict): # vol creation date confirms within-volume alias + for cnid, obj in cnid_dict.items(): + try: + if obj.flags & 0x8000: + alis_rsrc = next(r.data for r in parse_file(obj.rsrc) if r.type == b'alis') + + # print(hex(obj.flags)) + # print(obj) + # open('/tmp/interpreting' + hex(cnid),'wb').write(alis_rsrc) + + userType, aliasSize, aliasVersion, \ + thisAliasKind, volumeName, volumeCrDate, \ + volumeSig, volumeType, parDirID, fileName, \ + fileNum, fileCrDate, fileType, fdCreator, \ + nlvlFrom, nlvlTo, volumeAttributes, volumeFSID \ + = struct.unpack_from('>4s H hh 28p L 2s hL 64p LL 4s4s HHLh', alis_rsrc) + + # print(userType, aliasSize, aliasVersion, + # thisAliasKind, volumeName, volumeCrDate, + # volumeSig, volumeType, parDirID, fileName, + # fileNum, fileCrDate, fileType, fdCreator, + # nlvlFrom, nlvlTo, volumeAttributes, volumeFSID) + + if volumeCrDate != vol_cr_date: raise ValueError + + obj.aliastarget = cnid_dict[fileNum] + + except (AttributeError, KeyError, StopIteration, ValueError): + pass + + +def _defer_special_files(iter_paths): + """Defer special files (aliases) to late CNIDs, and resolve aliases""" + approved_dict = dict() + unapproved = [] + + for path, obj in iter_paths: + if isinstance(obj, File) and obj.aliastarget is not None: + unapproved.append((path, obj)) + else: + yield path, obj, None + approved_dict[id(obj)] = path + + while unapproved: + made_progress = False + + for i in reversed(range(len(unapproved))): + path, obj = unapproved[i] + + try: + targetpath = approved_dict[id(obj.aliastarget)] + except KeyError: + continue + + yield path, obj, targetpath + approved_dict[id(obj)] = path + + unapproved.pop(i) + made_progress = True + + if not made_progress: break + + +def _alis_append(alis, kind, data): + if len(alis) % 2: alis.append(0) + alis.extend(struct.pack('>hH', kind, len(data))) + alis.extend(data) + if len(alis) % 2: alis.append(0) + + class _TempWrapper: """Volume uses this to store metadata while serialising""" def __init__(self, of): @@ -225,6 +304,8 @@ class Volume(AbstractFolder): self.pop('Desktop DB', None) self.pop('Desktop DF', None) + _link_aliases(drCrDate, cnids) + def write(self, size=800*1024, align=512, desktopdb=True, bootable=True, startapp=None, sparse=False): if align < 512 or align % 512: raise ValueError('align must be multiple of 512') @@ -232,7 +313,11 @@ class Volume(AbstractFolder): if size < 400 * 1024 or size % 512: raise ValueError('size must be a multiple of 512b and >= 400K') + # These are declared up here because they are needed for aliases drVN = _encode_name(self.name, 'vol') + drSigWord = b'BD' + drAtrb = 1<<8 # volume attributes (hwlock, swlock, CLEANUNMOUNT, badblocks) + drCrDate, drLsMod, drVolBkUp = self.crdate, self.mddate, self.bkdate # overall layout: # 1. two boot blocks (offset=0) @@ -306,7 +391,7 @@ class Volume(AbstractFolder): path2wrap = {(): godwrap, (self.name,): topwrap} drNxtCNID = 16 - for path, obj in self.iter_paths(): + for path, obj, aliastarget in _defer_special_files(self.iter_paths()): path = (self.name,) + path wrap = _TempWrapper(obj) path2wrap[path] = wrap @@ -338,15 +423,83 @@ class Volume(AbstractFolder): if isinstance(obj, File) and startapp and path[1:] == tuple(startapp): startapp_folder_cnid = path2wrap[path[:-1]].cnid + if isinstance(obj, File): + wrap.data, wrap.rsrc = obj.data, obj.rsrc + wrap.type, wrap.creator = obj.type, obj.creator + + # This is the place to manage your special files (aliases for now) + if aliastarget is not None: + aliastarget = (self.name,) + aliastarget # match the convention for this function + targetobj = path2wrap[aliastarget].of # probe the target to set some metadata + + if isinstance(targetobj, Folder): + wrap.creator = b'MACS' + wrap.type = b'fdrp' + + elif isinstance(targetobj, Volume): + wrap.creator = b'MACS' + wrap.type = b'hdsk' if size > 1440*1024 else b'flpy' + + elif isinstance(targetobj, File): + wrap.creator = targetobj.creator + + if targetobj.type == b'APPL': + wrap.type = b'adrp' + else: + wrap.type = targetobj.type + + wrap.data = b'' + + userType = b'' + aliasSize = 9999 # fill this short at offset 4 + aliasVersion = 2 + thisAliasKind = 1 if isinstance(targetobj, Folder) else 0 + volumeName = drVN + volumeCrDate = drCrDate + volumeSig = drSigWord + volumeType = 5 #2 if size == 400*1024 else 3 if size == 800*1024 else 4 if size == 1440*1024 else 1 + parDirID = path2wrap[aliastarget[:-1]].cnid + fileName = _encode_name(aliastarget[-1]) + fileNum = path2wrap[aliastarget].cnid + fileCrDate = path2wrap[aliastarget].of.crdate + fileType = targetobj.type if isinstance(targetobj, File) else b'' + fdCreator = targetobj.creator if isinstance(targetobj, File) else b'' + nlvlFrom = len(path) - _common_prefix(path, aliastarget) + nlvlTo = len(aliastarget) - _common_prefix(path, aliastarget) + volumeAttributes = 0 # this is aliasmgr-specific + volumeFSID = 0 + + # Stress test: find file by name, not CNID + # fileNum = 0 + + alis = Resource(b'alis', 0, name=path[-1]) + alis.data[:] = struct.pack('>4s H hh 28p L 2s hL 64p LL 4s4s HHLh', + userType, aliasSize, aliasVersion, \ + thisAliasKind, volumeName, volumeCrDate, \ + volumeSig, volumeType, parDirID, fileName, \ + fileNum, fileCrDate, fileType, fdCreator, \ + nlvlFrom, nlvlTo, volumeAttributes, volumeFSID \ + ) + bytes(10) # reserved stuff + + _alis_append(alis.data, 0, aliastarget[-2].encode('mac_roman')) + _alis_append(alis.data, 2, ':'.join(aliastarget).encode('mac_roman')) + _alis_append(alis.data, -1, b'') + + struct.pack_into('>H', alis.data, 4, len(alis.data)) + + # open('/tmp/creating','wb').write(alis.data) + + wrap.rsrc = make_file([alis]) + if isinstance(obj, File): wrap.dfrk = wrap.rfrk = (0, 0) - if obj.data: + if wrap.data: pre = len(blkaccum) - accumulate(bitmanip.chunkify(obj.data, drAlBlkSiz)) + accumulate(bitmanip.chunkify(wrap.data, drAlBlkSiz)) wrap.dfrk = (pre, len(blkaccum)-pre) - if obj.rsrc: + if wrap.rsrc: pre = len(blkaccum) - accumulate(bitmanip.chunkify(obj.rsrc, drAlBlkSiz)) + accumulate(bitmanip.chunkify(wrap.rsrc, drAlBlkSiz)) wrap.rfrk = (pre, len(blkaccum)-pre) self._prefdict = root_dict_backup @@ -370,10 +523,10 @@ class Volume(AbstractFolder): cdrType = 2 filFlags = 1 << 1 # file thread record exists, but is not locked, nor "file record is used" filTyp = 0 - filUsrWds = struct.pack('>4s4sHHHxxxxxx', obj.type, obj.creator, obj.flags, obj.x, obj.y) + filUsrWds = struct.pack('>4s4sHHHxxxxxx', wrap.type, wrap.creator, obj.flags, obj.x, obj.y) filFlNum = wrap.cnid - filStBlk, filLgLen, filPyLen = wrap.dfrk[0], len(obj.data), bitmanip.pad_up(len(obj.data), drAlBlkSiz) - filRStBlk, filRLgLen, filRPyLen = wrap.rfrk[0], len(obj.rsrc), bitmanip.pad_up(len(obj.rsrc), drAlBlkSiz) + filStBlk, filLgLen, filPyLen = wrap.dfrk[0], len(wrap.data), bitmanip.pad_up(len(wrap.data), drAlBlkSiz) + filRStBlk, filRLgLen, filRPyLen = wrap.rfrk[0], len(wrap.rsrc), bitmanip.pad_up(len(wrap.rsrc), drAlBlkSiz) filCrDat, filMdDat, filBkDat = obj.crdate, obj.mddate, obj.bkdate filFndrInfo = bytes(16) # todo must fix filClpSize = 0 # todo must fix @@ -438,7 +591,6 @@ class Volume(AbstractFolder): startapp_folder_cnid = 0 # Create the Volume Information Block - drSigWord = b'BD' drNmFls = sum(isinstance(x, File) for x in self.values()) drNmRtDirs = sum(not isinstance(x, File) for x in self.values()) drVBMSt = 3 # first block of volume bitmap @@ -448,11 +600,9 @@ class Volume(AbstractFolder): drFreeBks = drNmAlBlks - len(blkaccum) drWrCnt = 0 # ????volume write count drVCSize = drVBMCSize = drCtlCSize = 0 - drAtrb = 1<<8 # volume attributes (hwlock, swlock, CLEANUNMOUNT, badblocks) drVolBkUp = 0 # date and time of last backup drVSeqNum = 0 # volume backup sequence number drFndrInfo = struct.pack('>LLL28x', system_folder_cnid, startapp_folder_cnid, startapp_folder_cnid) - drCrDate, drLsMod, drVolBkUp = self.crdate, self.mddate, self.bkdate vib = struct.pack('>2sLLHHHHHLLHLH28pLHLLLHLL32sHHHLHHxxxxxxxxLHHxxxxxxxx', drSigWord, drCrDate, drLsMod, drAtrb, drNmFls,