From 4dab2dfd143b1658c20d4a7ec1ecf8493fb4bbfa Mon Sep 17 00:00:00 2001 From: Elliot Nunn Date: Thu, 2 Jan 2020 17:59:10 +0800 Subject: [PATCH] Ready for 1.0! Neaten up the command line interface, rip out some bad ideas, and push with some documentation. --- README.md | 76 ++++++++++++++++++++++++++- bin/Rget | 76 --------------------------- bin/Rhqx | 80 ---------------------------- bin/hexrez | 70 +++++++++++++++++++++++++ bin/rezhex | 72 ++++++++++++++++++++++++++ bin/rfx | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bin/sortrez | 68 ++++++++++++++++++++++++ setup.py | 4 +- 8 files changed, 433 insertions(+), 159 deletions(-) delete mode 100755 bin/Rget delete mode 100644 bin/Rhqx create mode 100755 bin/hexrez create mode 100755 bin/rezhex create mode 100755 bin/rfx create mode 100755 bin/sortrez diff --git a/README.md b/README.md index 8258039..7412c9a 100644 --- a/README.md +++ b/README.md @@ -1 +1,75 @@ -`macresources` readme +# macresources + +A Python library and command line tools to work with Classic MacOS [resource +forks](https://en.wikipedia.org/wiki/Resource_fork) on a modern machine. + + +## Data Format + +First, `macresources` and its sister package +[`machfs`](https://pypi.org/project/machfs/) have a preferred representation for +Macintosh files, where Macintosh-specific information is stored in separate text +files. + +1. The data fork is stored inside a file with the original name. This must be +present for the following two files to be recognised. + +2. The resource fork is stored in a 'Rez-style' textfile with `.rdump` appended +to the original name. The format is slightly different from a vanilla 'DeRez' +dump: non-ASCII characters are escaped, giving an ASCII-clean output: + + data '\0x96tbl' (0) { + $"0000 0001 0000 0000 0000 0010 0669 4D61" /* .............iMa */ + ... + }; + +3. The four-character type and creator codes are concatenated (like a `PkgInfo` +file inside an app bundle) in a file with `.idump` appended to the original +name. If the type is `TEXT` or `ttro`, then the data fork is converted to UTF-8 +with Unix (LF) line endings. + +Several other formats exist to store this Macintosh specific data in flat files, +the best known being +[AppleSingle/AppleDouble](https://en.wikipedia.org/wiki/AppleSingle_and_AppleDouble_formats), +[MacBinary](https://en.wikipedia.org/wiki/MacBinary) and +[BinHex 4](https://en.wikipedia.org/wiki/BinHex). The data format described here +instead adapts text-friendly formats (`Rez` and `PkgInfo`). The result is +especially useful for placing legacy Macintosh source code under modern version +control. + +The role of `macresources` is to produce and parse Rez-style `.rdump` files, and +to produce and parse raw resource forks for `machfs` disk images. + + +## Command Line Interface + +`rfx` is a shell command wrapper for accessing resources inside a `.rdump` file. +Command line arguments are passed through to the command, but resources +specified as `filename.rdump//type/id` are converted to tempfiles before the +command is run, and back to resources after the command returns. This approach +even enables `cp`, `mv` and `rm` to work on individual resources. + +`rezhex` and `hexrez` convert between +[BinHex](https://en.wikipedia.org/wiki/BinHex) (`.hqx`) format and +`macresources`/`macbinary` format. + +`SimpleRez` and `SimpleDeRez` are very simple reimplementations of the +deprecated `Rez` and `DeRez` utilities. They convert between raw resource forks +and Rez-style `.rdump` files. To access a raw resource fork under Mac OS X, you +can append `/..namedfork/rsrc` to a filename. + +All utilities have online help. + + +## API + +The Python API is pretty spartan. It exists mainly to support `machfs` and the command line interface. + + from macresources import * + + make_rez_code(from_iter, ascii_clean=False) # Takes an iterator of Resource objects, returns Rez code + parse_rez_code(from_code) # Takes Rez code, returns an iterator of Resource objects + make_file(from_iter) # Takes an iterator of Resource objects, returns a raw resource fork + parse_file(from_file) # Takes a raw resource fork, returns an iterator of Resource objects + +The `Resource` class inherits from bytearray. diff --git a/bin/Rget b/bin/Rget deleted file mode 100755 index adae75c..0000000 --- a/bin/Rget +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import macresources -import sys -import tempfile -from os import path - -def fourcc(s): - b = s.encode('mac_roman').ljust(4, b' ') - if len(b) != 4: - raise ValueError('wrong length') - return b - -def seemshex(s): - s = s.lower() - if not s: return False - return all(c in '0123456789abcdef' for c in s) - -def seemsdec(s): - if not s: return False - return all(c in '0123456789' for c in s) - -def resid(s): - s = s.lower() - if s.startswith('0x') and seemshex(s[2:]): - thenum = int(s[2:], 16) - if thenum > 0x7fff: thenum -= 0x8000 - elif s.startswith('$') and seemshex(s[1:]): - thenum = int(s[1:], 16) - if thenum > 0x7fff: thenum -= 0x8000 - else: - thenum = int(s) - if not (-0x8000 <= thenum <= 0x7fff): raise ValueError - return thenum - -parser = argparse.ArgumentParser(description=''' - Copy a single MacOS resource from a source file to the standard - output. If the source filename ends with `.r' or `.rdump', then it - is parsed using the `SimpleRez' subset of the Rez language. TODO: - find a way to specify null characters so that desk accessories can - be looked up by name. -''') - -parser.add_argument('srcfile', help='resource file or Rez file') -parser.add_argument('type', type=fourcc, help='four-byte type of resource (converted to Mac Roman pre-lookup)') -parser.add_argument('id', type=resid, help='ID number of resource (-32768 to 32767)') -parser.add_argument('-f', dest='tofile', action='store_true', help='copy to a tempfile instead') - -args = parser.parse_args() - -with open(args.srcfile, 'rb') as f: - raw = f.read() - -if args.srcfile.endswith('.r') or args.srcfile.endswith('.rdump'): - resources = macresources.parse_rez_code(raw) -else: - resources = macresources.parse_file(raw) - -for r in resources: - if r.type == args.type and r.id == args.id: - myres = r - break -else: - raise ValueError(args.type, args.id) - -if args.tofile: - tmpname = '-%s-%s-%d' % (path.basename(args.srcfile), myres.type.decode('mac_roman'), myres.id) - with tempfile.NamedTemporaryFile(suffix=tmpname, delete=False, mode='wb') as f: - f.write(myres.data) - print(f.name) -else: - try: - sys.stdout.buffer.write(myres.data) - except BrokenPipeError: - pass diff --git a/bin/Rhqx b/bin/Rhqx deleted file mode 100644 index bca917d..0000000 --- a/bin/Rhqx +++ /dev/null @@ -1,80 +0,0 @@ -import os -from os import path -import argparse -import macresources -from macresources import binhex - -parser = argparse.ArgumentParser(description=''' - Supply base name to convert datafork+rdump+idump to HQX. - Supply base.hqx name to convert HQX to datafork+rdump+idump. -''') - -parser.add_argument('srcfile', nargs='*', help='base or base.hqx') - -args = parser.parse_args() - - -for srcfile in args.srcfile: - # Case 1: from BinHex - if path.splitext(path.basename(srcfile))[1].lower() == '.hqx': - hb = binhex.HexBin(srcfile) - - base = path.splitext(srcfile)[0] - - if hb.FInfo.Type == hb.FInfo.Creator == b'????': - try: - os.remove(base + '.idump') - except FileNotFoundError: - pass - else: - with open(base + '.idump', 'wb') as f: - f.write(hb.FInfo.Type + hb.FInfo.Creator) - - data = hb.read() - if hb.FInfo.Type in [b'TEXT', b'ttro']: - data = data.replace(b'\r', b'\n').decode('mac_roman').encode('utf-8') - with open(base, 'wb') as f: - f.write(data) - - rsrc = hb.read_rsrc() - if rsrc: - with open(base + '.rdump', 'wb') as f: - f.write(macresources.make_rez_code(macresources.parse_file(rsrc), ascii_clean=True)) - else: - try: - os.remove(base + '.rdump') - except FileNotFoundError: - pass - - # Case 2: to BinHex - else: - finfo = binhex.FInfo() - finfo.Flags = 0 - - try: - info = open(srcfile + '.idump', 'rb').read(8) - assert len(info) == 8 - finfo.Type = info[:4] - finfo.Creator = info[4:] - except: - pass - - try: - data = open(srcfile, 'rb').read() - if finfo.Type in [b'TEXT', b'ttro']: - data = data.replace(b'\n', b'\r').decode('utf-8').encode('mac_roman') - except: - data = b'' - - try: - rsrc = open(srcfile + '.rdump', 'rb').read() - rsrc = macresources.make_file(macresources.parse_rez_code(rsrc)) - except: - rsrc = b'' - - bh = binhex.BinHex((path.basename(srcfile), finfo, len(data), len(rsrc)), srcfile + '.hqx') - - bh.write(data) - bh.write_rsrc(rsrc) - - bh.close() diff --git a/bin/hexrez b/bin/hexrez new file mode 100755 index 0000000..3adf1f0 --- /dev/null +++ b/bin/hexrez @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import os +from os import path +import argparse +import macresources +from macresources import binhex + + +def do_file(the_path): + base_path = path.splitext(the_path)[0] # known to have hqx extension + hb = binhex.HexBin(the_path) + + if hb.FInfo.Type == hb.FInfo.Creator == b'????': + try: + os.remove(base_path + '.idump') + except FileNotFoundError: + pass + else: + with open(base_path + '.idump', 'wb') as f: + f.write(hb.FInfo.Type + hb.FInfo.Creator) + + data = hb.read() + if hb.FInfo.Type in [b'TEXT', b'ttro']: + data = data.replace(b'\r', b'\n').decode('mac_roman').encode('utf-8') + with open(base_path, 'wb') as f: + f.write(data) + + rsrc = hb.read_rsrc() + if rsrc: + with open(base_path + '.rdump', 'wb') as f: + f.write(macresources.make_rez_code(macresources.parse_file(rsrc), ascii_clean=True)) + else: + try: + os.remove(base_path + '.rdump') + except FileNotFoundError: + pass + + +def is_hqx_name(the_path): + name = path.basename(the_path) + base, ext = path.splitext(name) + if ext.lower() == '.hqx': + return True + else: + return False + + +parser = argparse.ArgumentParser(description=''' + UnBinHex (BASE.hqx) into (BASE + BASE.rdump + BASE.idump) +''') + +parser.add_argument('hqx', metavar='BASE.hqx', nargs='+', help='file or directory') + +args = parser.parse_args() + +for hqx in args.hqx: + if path.isdir(hqx): + for hqx, dirlist, filelist in os.walk(hqx): + dirlist[:] = [d for d in dirlist if not d.startswith('.')]; dirlist.sort() + filelist[:] = [f for f in filelist if not f.startswith('.')]; filelist.sort() + + for f in filelist: + if is_hqx_name(f): + do_file(path.join(hqx, f)) + else: + if not is_hqx_name(hqx): + exit('Not a BinHex file') + + do_file(hqx) diff --git a/bin/rezhex b/bin/rezhex new file mode 100755 index 0000000..97ee724 --- /dev/null +++ b/bin/rezhex @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import os +from os import path +import argparse +import macresources +from macresources import binhex + + +def do_file(the_path): + finfo = binhex.FInfo() + finfo.Flags = 0 + + try: + info = open(the_path + '.idump', 'rb').read(8) + assert len(info) == 8 + finfo.Type = info[:4] + finfo.Creator = info[4:] + except: + pass + + try: + data = open(the_path, 'rb').read() + if finfo.Type in [b'TEXT', b'ttro']: + data = data.replace(b'\n', b'\r').decode('utf-8').encode('mac_roman') + except: + data = b'' + + try: + rsrc = open(the_path + '.rdump', 'rb').read() + rsrc = macresources.make_file(macresources.parse_rez_code(rsrc)) + except: + rsrc = b'' + + bh = binhex.BinHex((path.basename(the_path), finfo, len(data), len(rsrc)), the_path + '.hqx') + + bh.write(data) + bh.write_rsrc(rsrc) + + bh.close() + + +def is_valid_base(the_path): + name = path.basename(the_path) + base, ext = path.splitext(name) + if ext.lower() in ('.hqx', '.idump', '.rdump'): return False + return True + + +parser = argparse.ArgumentParser(description=''' + BinHex (BASE + BASE.rdump + BASE.idump) into (BASE.hqx) +''') + +parser.add_argument('base', metavar='BASE', nargs='+', help='file or directory') + +args = parser.parse_args() + +for base in args.base: + if path.isdir(base): + for base, dirlist, filelist in os.walk(base): + dirlist[:] = [d for d in dirlist if not d.startswith('.')]; dirlist.sort() + filelist[:] = [f for f in filelist if not f.startswith('.')]; filelist.sort() + + for f in filelist: + if is_valid_base(f): + do_file(path.join(base, f)) + + else: + if not is_valid_base(hqx): + exit('Base names cannot have a .hqx/.idump/.rdump extension') + + do_file(base) diff --git a/bin/rfx b/bin/rfx new file mode 100755 index 0000000..0d48e8a --- /dev/null +++ b/bin/rfx @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 + +import macresources +import sys +import tempfile +from os import path +import re +import subprocess +import textwrap + + +if len(sys.argv) < 2 or sys.argv[1].startswith('-'): + sys.exit(textwrap.dedent(''' + usage: rfx command [arg[//type/id] ...] + + Shell command wrapper for accessing resources inside a Rez textfile + + Resources specified as filename.rdump//type/id are converted to tempfiles before + the command is run, and back to resources after the command returns. + + examples: + rfx mv Doc.rdump//STR/0 Doc.rdump//STR/1 + rfx cp App.rdump//PICT/2000 2000.pict + rfx rm System.rdump//vers/2 + ''').strip()) + + +bytearray_cache = {} +original_cache = {} + +def get_cached_file(the_path): + # Different paths to the same file are unlikely, but just in case: + the_path = path.abspath(the_path) + + try: + return bytearray_cache[the_path] + except KeyError: + try: + with open(the_path, 'rb') as f: + d = f.read() + except FileNotFoundError: + d = bytes() + + original_cache[the_path] = d + bytearray_cache[the_path] = bytearray(d) + return bytearray_cache[the_path] + +def flush_cache(): + for the_path, the_data in bytearray_cache.items(): + if original_cache[the_path] != the_data: + with open(the_path, 'wb') as f: + f.write(the_data) + + +def rez_resource_range(the_data, the_type, the_id): + if not the_data: return (0, 0) + + # Hack... do a text search instead of Rezzing the whole file! + search = macresources.make_rez_code([macresources.Resource(the_type, the_id)], ascii_clean=True) + search = search.rpartition(b')')[0] + + start = 0 + while True: + start = the_data.find(search, start) + if start == -1: return (0, 0) + if (the_data[start-1:start] in b'\n') and (the_data[start+len(search):start+len(search)+1] in (b',', b')')): + break + start += len(search) + + stop = the_data.index(b'\n};\n\n', start) + 5 + + return (start, stop) + + +def rez_shrink_range(the_data, start, stop): + start = the_data.index(b'\n', start) + 1 + while the_data[stop:stop+1] != b'}': stop -= 1 + + return (start, stop) + + +def rez_get_resource(the_path, the_type, the_id): + the_file = get_cached_file(the_path) + + start, stop = rez_resource_range(the_file, the_type, the_id) + if start == stop == 0: return None + return next(macresources.parse_rez_code(the_file[start:stop])).data + + +def rez_set_resource(the_path, the_type, the_id, the_data): + the_file = get_cached_file(the_path) + + newdata = macresources.make_rez_code([macresources.Resource(the_type, the_id, data=the_data)], ascii_clean=True) + + start, stop = rez_resource_range(the_file, the_type, the_id) + if start == stop == 0: + the_file.extend(newdata) + else: + start, stop = rez_shrink_range(the_file, start, stop) + istart, istop = rez_shrink_range(newdata, 0, len(newdata)) + + the_file[start:stop] = newdata[istart:istop] + + +def rez_delete_resource(the_path, the_type, the_id): + the_file = get_cached_file(the_path) + + start, stop = rez_resource_range(the_file, the_type, the_id) + del the_file[start:stop] + + +with tempfile.TemporaryDirectory() as backup_tmp_dir: + new_argv = [sys.argv[1]] + to_retrieve = [] + + for i, arg in enumerate(sys.argv[2:], 1): + m = re.match(r'(.*[^/])//([^/]{1,4})/(-?\d+)$'.replace('/', re.escape(path.sep)), arg) + + if m: + res_spec = (m.group(1), m.group(2).encode('mac_roman').ljust(4)[:4], int(m.group(3))) + tmp_file = path.join(backup_tmp_dir, str(i)) + + to_retrieve.append((tmp_file, res_spec)) + + res_data = rez_get_resource(*res_spec) + if res_data is not None: + with open(tmp_file, 'wb') as f: + f.write(res_data) + + new_argv.append(tmp_file) + + else: + new_argv.append(arg) + + result = subprocess.run(new_argv) + + for tmp_file, res_spec in to_retrieve: + try: + with open(tmp_file, 'rb') as f: + rez_set_resource(*res_spec, f.read()) + except FileNotFoundError: + rez_delete_resource(*res_spec) + +flush_cache() + +sys.exit(result.returncode) diff --git a/bin/sortrez b/bin/sortrez new file mode 100755 index 0000000..adc4bfd --- /dev/null +++ b/bin/sortrez @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import argparse +import macresources + + +MACROMAN_SORT = [ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + + 0x20, 0x22, 0x23, 0x28, 0x29, 0x2a, 0x2b, 0x2c, + 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, + 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, + + 0x47, 0x48, 0x58, 0x5a, 0x5e, 0x60, 0x67, 0x69, + 0x6b, 0x6d, 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7f, + 0x8d, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9f, 0xa1, + 0xa3, 0xa5, 0xa8, 0xaa, 0xab, 0xac, 0xad, 0xae, + + 0x54, 0x48, 0x58, 0x5a, 0x5e, 0x60, 0x67, 0x69, + 0x6b, 0x6d, 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7f, + 0x8d, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9f, 0xa1, + 0xa3, 0xa5, 0xa8, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, + + 0x4c, 0x50, 0x5c, 0x62, 0x7d, 0x81, 0x9a, 0x55, + 0x4a, 0x56, 0x4c, 0x4e, 0x50, 0x5c, 0x62, 0x64, + 0x65, 0x66, 0x6f, 0x70, 0x71, 0x72, 0x7d, 0x89, + 0x8a, 0x8b, 0x81, 0x83, 0x9c, 0x9d, 0x9e, 0x9a, + + 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0x95, + 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0x52, 0x85, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, + 0xc9, 0xca, 0xcb, 0x57, 0x8c, 0xcc, 0x52, 0x85, + + 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0x26, + 0x27, 0xd4, 0x20, 0x4a, 0x4e, 0x83, 0x87, 0x87, + 0xd5, 0xd6, 0x24, 0x25, 0x2d, 0x2e, 0xd7, 0xd8, + 0xa7, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +] + + +def sortkey(resource): + return (*(MACROMAN_SORT[char] for char in resource.type), resource.id) + + +parser = argparse.ArgumentParser(description=''' + Sort the resources in a Rez file (for diffing). +''') + +parser.add_argument('src', nargs='*', help='Rez files') +args = parser.parse_args() + +for srcfile in args.src: + with open(srcfile, 'r+b') as f: + raw = f.read() + resources = list(macresources.parse_rez_code(raw)) + resources.sort(key=sortkey) + f.seek(0) + f.truncate(0) + f.write(macresources.make_rez_code(resources, ascii_clean=True)) diff --git a/setup.py b/setup.py index 35d40e7..8de1b86 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup setup( name='macresources', - version='0.1dev', + version='1.0', author='Elliot Nunn', author_email='elliotnunn@me.com', description='Library for working with legacy Macintosh resource forks', @@ -18,5 +18,5 @@ setup( 'Development Status :: 3 - Alpha', ], packages=['macresources'], - scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/Rget', 'bin/Rhqx'], + scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx'], )