Remove speed hacks from rfx

This is practical now that the Rez code is fast
2025-01-18 17:29:44 +00:00 · 2020-01-19 14:52:06 +08:00 · 2020-01-19 14:52:06 +08:00 · aa185339bc
commit aa185339bc
parent 8413c99f74
1 changed files with 163 additions and 109 deletions
--- a/bin/rfx
+++ b/bin/rfx
@ -7,108 +7,152 @@ import os
 from os import path
 import re
 import subprocess
-import textwrap
+
+
+HELP = '''Usage: rfx [-c] command [arg | arg//type/id | arg//type | arg// ...]
+
+Expose MacOS resource forks to command
+
+Resources specified as filename//type/id are converted to tempfiles
+before command is run, then back after command returns. Truncated //
+arguments are wildcards.
+
+Supports .rdump Rez files and .hqx BinHex files. Otherwise .rdump will
+be appended implicitly.
+
+Examples:
+    rfx mv Doc.rdump//STR/0 Doc.rdump//STR/1
+    rfx cp App.hqx//PICT allpictures/
+    rfx rm System/..namedfork/rsrc//vers/2'''


 if len(sys.argv) < 2 or sys.argv[1].startswith('-'):
-    sys.exit(textwrap.dedent('''
-        usage: rfx command [arg | arg// | arg//type | arg//type/id ...]
-
-        Shell command wrapper for accessing resources inside a Rez textfile
-
-        Resources specified as filename.rdump//type/id are converted to tempfiles before
-        the command is run, and back to resources after the command returns. Truncated
-        // arguments are wildcards.
-
-        examples:
-            rfx mv Doc.rdump//STR/0 Doc.rdump//STR/1
-            rfx cp App.rdump//PICT allpictures/
-            rfx rm System.rdump//vers/2
-    ''').strip())
+    sys.exit(HELP)


-bytearray_cache = {}
-original_cache = {}
+def is_rez(the_path):
+    return path.splitext(the_path)[1].lower() == '.rdump'

+
+def is_hqx(the_path):
+    return path.splitext(the_path)[1].lower() == '.hqx'
+
+
+def is_fork(the_path):
+    return the_path.lower().endswith('/..namedfork/rsrc') or path.splitext(the_path)[1].lower() == '.rsrc'
+
+
+resourcefork_cache = {} # the_path, mutable list of resurces
+inodes = {} # deduplicates file paths so we don't screw it up
+hqx_saved_data = {} # stores data fork and Finder info so we don't strip it
 def get_cached_file(the_path):
-    # Different paths to the same file are unlikely, but just in case:
-    the_path = path.abspath(the_path)
+    path_user_entered = the_path # only for error messages
+
+    if not (is_rez(the_path) or is_fork(the_path) or is_hqx(the_path)):
+        the_path += '.rdump' # will cause is_rez to return true
+
+    # The path is already in the cache! Hooray!
+    try: return resourcefork_cache[the_path]
+    except KeyError: pass
+
+    # Hack to stop us being fooled by the same file with multiple names
+    # (Doesn't help if the file doesn't exist yet -- oh well)
+    try:
+        stat = os.stat(the_path)
+        stat = (stat.st_dev, stat.st_ino)
+        the_path = inodes.setdefault(stat, the_path)
+
+        # Have one more crack at the main cache
+        try: return resourcefork_cache[the_path]
+        except KeyError: pass
+
+    except FileNotFoundError:
+        pass

    try:
-        return bytearray_cache[the_path]
-    except KeyError:
-        try:
-            with open(the_path, 'rb') as f:
-                d = f.read()
-        except FileNotFoundError:
-            d = bytes()
+        with open(the_path, 'rb') as f:
+            raw = f.read()
+
+        try:
+            if is_rez(the_path):
+                resources = list(macresources.parse_rez_code(raw))
+            elif is_fork(the_path):
+                resources = list(macresources.parse_file(raw))
+            elif is_hqx(the_path):
+                from macresources import binhex
+                hb = binhex.HexBin(raw)
+                hqx_saved_data[the_path] = (hb.FName, hb.FInfo, hb.read())
+                rsrc = hb.read_rsrc()
+                resources = macresources.parse_file(rsrc)
+        except:
+            sys.exit('Corrupt: ' + repr(path_user_entered))
+
+    except FileNotFoundError: # Treat as empty resource fork
+        if is_rez(the_path):
+            resources = []
+        elif is_fork(the_path):
+            resources = []
+        elif is_hqx(the_path):
+            try:
+                valid_filename = path.basename(the_path)[:-4].replace(':', path.sep)
+                valid_filename.encode('mac_roman')
+                if len(valid_filename) > 31: raise ValueError
+            except:
+                sys.exit('Name not suitable for a new BinHex: ' + repr(path_user_entered))
+
+            hqx_saved_data[the_path] = (valid_filename, None, b'')
+            resources = []
+
+    resourcefork_cache[the_path] = resources
+    return resources

-        original_cache[the_path] = d
-        bytearray_cache[the_path] = bytearray(d)
-        return bytearray_cache[the_path]

 def flush_cache():
-    for the_path, the_data in bytearray_cache.items():
-        if original_cache[the_path] != the_data:
+    for the_path, resources in resourcefork_cache.items():
+        # No change, do not write the file
+        if not any(getattr(res, '__rfx_dirty', False) for res in resources): continue
+
+        # Weed out the ghost resources
+        resources = [res for res in resources if not getattr(res, '__rfx_ghost', False)]
+
+        # Support commands that pack/unpack GreggyBits etc (mistake here very rare!)
+        for res in resources:
+            if getattr(res, '__rfx_dirty', False):
+                is_compressed = (res.startswith(b'\xA8\x9F\x65\x72') and
+                    len(res) >= 6 and
+                    len(res) >= int.from_bytes(res[4:6], 'big')) # hdrlen thing
+                res.attribs = (res.attribs & ~1) | int(is_compressed)
+
+        if is_rez(the_path):
+            # For BASE.rdump to be valid, BASE must exist (my rule)
+            try:
+                with open(path.splitext(the_path)[0], 'x'): pass
+            except FileExistsError:
+                pass
+
            with open(the_path, 'wb') as f:
-                f.write(the_data)
+                f.write(macresources.make_rez_code(resources))

+        elif is_fork(the_path):
+            # For BASE/..namedfork/rsrc to be openable by macOS, BASE must exist
+            if the_path.lower().endswith('/..namedfork/rsrc'):
+                try:
+                    with open(the_path[:-17], 'x'): pass
+                except FileExistsError:
+                    pass

-def rez_resource_range(the_data, the_type, the_id):
-    if not the_data: return (0, 0)
+            with open(the_path, 'wb') as f:
+                f.write(macresources.make_file(resources))

-    # Hack... do a text search instead of Rezzing the whole file!
-    search = macresources.make_rez_code([macresources.Resource(the_type, the_id)], ascii_clean=True)
-    search = search.rpartition(b')')[0]
-
-    start = 0
-    while True:
-        start = the_data.find(search, start)
-        if start == -1: return (0, 0)
-        if (the_data[start-1:start] in b'\n') and (the_data[start+len(search):start+len(search)+1] in (b',', b')')):
-            break
-        start += len(search)
-
-    stop = the_data.index(b'\n};\n\n', start) + 5
-
-    return (start, stop)
-
-
-def rez_shrink_range(the_data, start, stop):
-    start = the_data.index(b'\n', start) + 1
-    while the_data[stop:stop+1] != b'}': stop -= 1
-
-    return (start, stop)
-
-
-def rez_get_resource(the_path, the_type, the_id):
-    the_file = get_cached_file(the_path)
-
-    start, stop = rez_resource_range(the_file, the_type, the_id)
-    if start == stop == 0: return None
-    return next(macresources.parse_rez_code(the_file[start:stop])).data
-
-
-def rez_set_resource(the_path, the_type, the_id, the_data):
-    the_file = get_cached_file(the_path)
-
-    newdata = macresources.make_rez_code([macresources.Resource(the_type, the_id, data=the_data)], ascii_clean=True)
-
-    start, stop = rez_resource_range(the_file, the_type, the_id)
-    if start == stop == 0:
-        the_file.extend(newdata)
-    else:
-        start, stop = rez_shrink_range(the_file, start, stop)
-        istart, istop = rez_shrink_range(newdata, 0, len(newdata))
-
-        the_file[start:stop] = newdata[istart:istop]
-
-
-def rez_delete_resource(the_path, the_type, the_id):
-    the_file = get_cached_file(the_path)
-
-    start, stop = rez_resource_range(the_file, the_type, the_id)
-    del the_file[start:stop]
+        elif is_hqx(the_path):
+            # Get back the non-resource-fork stuff for the BinHex file
+            from macresources import binhex
+            fname, finfo, data = hqx_saved_data[the_path]
+            rsrc = macresources.make_file(resources)
+            bh = binhex.BinHex((fname, finfo, len(data), len(rsrc)), the_path)
+            bh.write(b'')
+            bh.write_rsrc(rsrc)
+            bh.close()


 def escape_ostype(ostype):
@ -132,54 +176,64 @@ with tempfile.TemporaryDirectory() as backup_tmp_dir:
            # Do not expand this argument
            new_argv.append(arg)
        else:
-            # Expand arg into 1+ fake-resource tempfiles. This is a (filename, type, id) list.
-            res_specs = []
-
+            # Expand arg into 1+ fake-resource tempfiles, each backed by a Resource object
            res_file = m.group(1)
            res_type = m.group(2).encode('mac_roman').ljust(4)[:4] if m.group(2) else None
            res_id = int(m.group(3)) if m.group(3) else None

            if res_type is None:
                # File// = every resource
-                for found_res in macresources.parse_rez_code(get_cached_file(res_file)):
-                    res_specs.append((res_file, found_res.type, found_res.id))
+                arg_resources = get_cached_file(res_file)
            elif res_id is None:
                # File//Type/ = resources of type (can omit trailing slash)
-                for found_res in macresources.parse_rez_code(get_cached_file(res_file)):
-                    if found_res.type == res_type:
-                        res_specs.append((res_file, res_type, found_res.id))
+                arg_resources = [foundres for foundres in get_cached_file(res_file) if foundres.type == res_type]
            else:
                # File//Type/ID = 1 resource
-                res_specs.append((res_file, res_type, res_id))
+                for foundres in get_cached_file(res_file):
+                    if foundres.type == res_type and foundres.id == res_id:
+                        arg_resources = [foundres]
+                        break
+                else:
+                    arg_resources = [macresources.Resource(res_type, res_id)]
+                    arg_resources[0].__rfx_ghost = True
+                    arg_resources[0].__rfx_dirty = False
+                    get_cached_file(res_file).append(arg_resources[0])

-            if not res_specs:
+            if not arg_resources:
                # Failed to expand so leave unchanged
                new_argv.append(arg)
            else:
                # Expand!
-                tmp_subdir = path.join(backup_tmp_dir, str(i))
-                os.mkdir(tmp_subdir)
-                for res_spec in res_specs:
-                    res_file, res_type, res_id = res_spec
-                    tmp_file = path.join(tmp_subdir, '%s.%d' % (escape_ostype(res_type), res_id))
+                for j, res in enumerate(arg_resources, 1):
+                    enclosing_dir = path.join(backup_tmp_dir, '%d.%d' % (i,j))
+                    os.mkdir(enclosing_dir)

-                    to_retrieve.append((tmp_file, res_spec))
-
-                    res_data = rez_get_resource(*res_spec)
-                    if res_data is not None:
+                    tmp_file = path.join(enclosing_dir, '%s.%d' % (escape_ostype(res.type), res.id))
+                    if not getattr(res, '__rfx_ghost', False):
                        with open(tmp_file, 'wb') as f:
-                            f.write(res_data)
+                            f.write(res)

+                    to_retrieve.append((tmp_file, res))
                    new_argv.append(tmp_file)

    result = subprocess.run(new_argv)

-    for tmp_file, res_spec in to_retrieve:
+    for tmp_file, res in to_retrieve:
        try:
            with open(tmp_file, 'rb') as f:
-                rez_set_resource(*res_spec, f.read())
+                d = f.read()
+
+            if getattr(res, '__rfx_ghost', False) or d != res:
+                res[:] = d
+                res.__rfx_dirty = True
+
+            res.__rfx_ghost = False
+
        except FileNotFoundError:
-            rez_delete_resource(*res_spec)
+            if not getattr(res, '__rfx_ghost', False):
+                res.__rfx_dirty = True
+
+            res.__rfx_ghost = True

 flush_cache()