Ready for 1.0!

Neaten up the command line interface, rip out some bad ideas, and push
with some documentation.
This commit is contained in:
Elliot Nunn 2020-01-02 17:59:10 +08:00
parent 762922aa5a
commit 4dab2dfd14
8 changed files with 433 additions and 159 deletions

View File

@ -1 +1,75 @@
`macresources` readme
# macresources
A Python library and command line tools to work with Classic MacOS [resource
forks](https://en.wikipedia.org/wiki/Resource_fork) on a modern machine.
## Data Format
First, `macresources` and its sister package
[`machfs`](https://pypi.org/project/machfs/) have a preferred representation for
Macintosh files, where Macintosh-specific information is stored in separate text
files.
1. The data fork is stored inside a file with the original name. This must be
present for the following two files to be recognised.
2. The resource fork is stored in a 'Rez-style' textfile with `.rdump` appended
to the original name. The format is slightly different from a vanilla 'DeRez'
dump: non-ASCII characters are escaped, giving an ASCII-clean output:
data '\0x96tbl' (0) {
$"0000 0001 0000 0000 0000 0010 0669 4D61" /* .............iMa */
...
};
3. The four-character type and creator codes are concatenated (like a `PkgInfo`
file inside an app bundle) in a file with `.idump` appended to the original
name. If the type is `TEXT` or `ttro`, then the data fork is converted to UTF-8
with Unix (LF) line endings.
Several other formats exist to store this Macintosh specific data in flat files,
the best known being
[AppleSingle/AppleDouble](https://en.wikipedia.org/wiki/AppleSingle_and_AppleDouble_formats),
[MacBinary](https://en.wikipedia.org/wiki/MacBinary) and
[BinHex 4](https://en.wikipedia.org/wiki/BinHex). The data format described here
instead adapts text-friendly formats (`Rez` and `PkgInfo`). The result is
especially useful for placing legacy Macintosh source code under modern version
control.
The role of `macresources` is to produce and parse Rez-style `.rdump` files, and
to produce and parse raw resource forks for `machfs` disk images.
## Command Line Interface
`rfx` is a shell command wrapper for accessing resources inside a `.rdump` file.
Command line arguments are passed through to the command, but resources
specified as `filename.rdump//type/id` are converted to tempfiles before the
command is run, and back to resources after the command returns. This approach
even enables `cp`, `mv` and `rm` to work on individual resources.
`rezhex` and `hexrez` convert between
[BinHex](https://en.wikipedia.org/wiki/BinHex) (`.hqx`) format and
`macresources`/`macbinary` format.
`SimpleRez` and `SimpleDeRez` are very simple reimplementations of the
deprecated `Rez` and `DeRez` utilities. They convert between raw resource forks
and Rez-style `.rdump` files. To access a raw resource fork under Mac OS X, you
can append `/..namedfork/rsrc` to a filename.
All utilities have online help.
## API
The Python API is pretty spartan. It exists mainly to support `machfs` and the command line interface.
from macresources import *
make_rez_code(from_iter, ascii_clean=False) # Takes an iterator of Resource objects, returns Rez code
parse_rez_code(from_code) # Takes Rez code, returns an iterator of Resource objects
make_file(from_iter) # Takes an iterator of Resource objects, returns a raw resource fork
parse_file(from_file) # Takes a raw resource fork, returns an iterator of Resource objects
The `Resource` class inherits from bytearray.

View File

@ -1,76 +0,0 @@
#!/usr/bin/env python3
import argparse
import macresources
import sys
import tempfile
from os import path
def fourcc(s):
b = s.encode('mac_roman').ljust(4, b' ')
if len(b) != 4:
raise ValueError('wrong length')
return b
def seemshex(s):
s = s.lower()
if not s: return False
return all(c in '0123456789abcdef' for c in s)
def seemsdec(s):
if not s: return False
return all(c in '0123456789' for c in s)
def resid(s):
s = s.lower()
if s.startswith('0x') and seemshex(s[2:]):
thenum = int(s[2:], 16)
if thenum > 0x7fff: thenum -= 0x8000
elif s.startswith('$') and seemshex(s[1:]):
thenum = int(s[1:], 16)
if thenum > 0x7fff: thenum -= 0x8000
else:
thenum = int(s)
if not (-0x8000 <= thenum <= 0x7fff): raise ValueError
return thenum
parser = argparse.ArgumentParser(description='''
Copy a single MacOS resource from a source file to the standard
output. If the source filename ends with `.r' or `.rdump', then it
is parsed using the `SimpleRez' subset of the Rez language. TODO:
find a way to specify null characters so that desk accessories can
be looked up by name.
''')
parser.add_argument('srcfile', help='resource file or Rez file')
parser.add_argument('type', type=fourcc, help='four-byte type of resource (converted to Mac Roman pre-lookup)')
parser.add_argument('id', type=resid, help='ID number of resource (-32768 to 32767)')
parser.add_argument('-f', dest='tofile', action='store_true', help='copy to a tempfile instead')
args = parser.parse_args()
with open(args.srcfile, 'rb') as f:
raw = f.read()
if args.srcfile.endswith('.r') or args.srcfile.endswith('.rdump'):
resources = macresources.parse_rez_code(raw)
else:
resources = macresources.parse_file(raw)
for r in resources:
if r.type == args.type and r.id == args.id:
myres = r
break
else:
raise ValueError(args.type, args.id)
if args.tofile:
tmpname = '-%s-%s-%d' % (path.basename(args.srcfile), myres.type.decode('mac_roman'), myres.id)
with tempfile.NamedTemporaryFile(suffix=tmpname, delete=False, mode='wb') as f:
f.write(myres.data)
print(f.name)
else:
try:
sys.stdout.buffer.write(myres.data)
except BrokenPipeError:
pass

View File

@ -1,80 +0,0 @@
import os
from os import path
import argparse
import macresources
from macresources import binhex
parser = argparse.ArgumentParser(description='''
Supply base name to convert datafork+rdump+idump to HQX.
Supply base.hqx name to convert HQX to datafork+rdump+idump.
''')
parser.add_argument('srcfile', nargs='*', help='base or base.hqx')
args = parser.parse_args()
for srcfile in args.srcfile:
# Case 1: from BinHex
if path.splitext(path.basename(srcfile))[1].lower() == '.hqx':
hb = binhex.HexBin(srcfile)
base = path.splitext(srcfile)[0]
if hb.FInfo.Type == hb.FInfo.Creator == b'????':
try:
os.remove(base + '.idump')
except FileNotFoundError:
pass
else:
with open(base + '.idump', 'wb') as f:
f.write(hb.FInfo.Type + hb.FInfo.Creator)
data = hb.read()
if hb.FInfo.Type in [b'TEXT', b'ttro']:
data = data.replace(b'\r', b'\n').decode('mac_roman').encode('utf-8')
with open(base, 'wb') as f:
f.write(data)
rsrc = hb.read_rsrc()
if rsrc:
with open(base + '.rdump', 'wb') as f:
f.write(macresources.make_rez_code(macresources.parse_file(rsrc), ascii_clean=True))
else:
try:
os.remove(base + '.rdump')
except FileNotFoundError:
pass
# Case 2: to BinHex
else:
finfo = binhex.FInfo()
finfo.Flags = 0
try:
info = open(srcfile + '.idump', 'rb').read(8)
assert len(info) == 8
finfo.Type = info[:4]
finfo.Creator = info[4:]
except:
pass
try:
data = open(srcfile, 'rb').read()
if finfo.Type in [b'TEXT', b'ttro']:
data = data.replace(b'\n', b'\r').decode('utf-8').encode('mac_roman')
except:
data = b''
try:
rsrc = open(srcfile + '.rdump', 'rb').read()
rsrc = macresources.make_file(macresources.parse_rez_code(rsrc))
except:
rsrc = b''
bh = binhex.BinHex((path.basename(srcfile), finfo, len(data), len(rsrc)), srcfile + '.hqx')
bh.write(data)
bh.write_rsrc(rsrc)
bh.close()

70
bin/hexrez Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
import os
from os import path
import argparse
import macresources
from macresources import binhex
def do_file(the_path):
base_path = path.splitext(the_path)[0] # known to have hqx extension
hb = binhex.HexBin(the_path)
if hb.FInfo.Type == hb.FInfo.Creator == b'????':
try:
os.remove(base_path + '.idump')
except FileNotFoundError:
pass
else:
with open(base_path + '.idump', 'wb') as f:
f.write(hb.FInfo.Type + hb.FInfo.Creator)
data = hb.read()
if hb.FInfo.Type in [b'TEXT', b'ttro']:
data = data.replace(b'\r', b'\n').decode('mac_roman').encode('utf-8')
with open(base_path, 'wb') as f:
f.write(data)
rsrc = hb.read_rsrc()
if rsrc:
with open(base_path + '.rdump', 'wb') as f:
f.write(macresources.make_rez_code(macresources.parse_file(rsrc), ascii_clean=True))
else:
try:
os.remove(base_path + '.rdump')
except FileNotFoundError:
pass
def is_hqx_name(the_path):
name = path.basename(the_path)
base, ext = path.splitext(name)
if ext.lower() == '.hqx':
return True
else:
return False
parser = argparse.ArgumentParser(description='''
UnBinHex (BASE.hqx) into (BASE + BASE.rdump + BASE.idump)
''')
parser.add_argument('hqx', metavar='BASE.hqx', nargs='+', help='file or directory')
args = parser.parse_args()
for hqx in args.hqx:
if path.isdir(hqx):
for hqx, dirlist, filelist in os.walk(hqx):
dirlist[:] = [d for d in dirlist if not d.startswith('.')]; dirlist.sort()
filelist[:] = [f for f in filelist if not f.startswith('.')]; filelist.sort()
for f in filelist:
if is_hqx_name(f):
do_file(path.join(hqx, f))
else:
if not is_hqx_name(hqx):
exit('Not a BinHex file')
do_file(hqx)

72
bin/rezhex Executable file
View File

@ -0,0 +1,72 @@
#!/usr/bin/env python3
import os
from os import path
import argparse
import macresources
from macresources import binhex
def do_file(the_path):
finfo = binhex.FInfo()
finfo.Flags = 0
try:
info = open(the_path + '.idump', 'rb').read(8)
assert len(info) == 8
finfo.Type = info[:4]
finfo.Creator = info[4:]
except:
pass
try:
data = open(the_path, 'rb').read()
if finfo.Type in [b'TEXT', b'ttro']:
data = data.replace(b'\n', b'\r').decode('utf-8').encode('mac_roman')
except:
data = b''
try:
rsrc = open(the_path + '.rdump', 'rb').read()
rsrc = macresources.make_file(macresources.parse_rez_code(rsrc))
except:
rsrc = b''
bh = binhex.BinHex((path.basename(the_path), finfo, len(data), len(rsrc)), the_path + '.hqx')
bh.write(data)
bh.write_rsrc(rsrc)
bh.close()
def is_valid_base(the_path):
name = path.basename(the_path)
base, ext = path.splitext(name)
if ext.lower() in ('.hqx', '.idump', '.rdump'): return False
return True
parser = argparse.ArgumentParser(description='''
BinHex (BASE + BASE.rdump + BASE.idump) into (BASE.hqx)
''')
parser.add_argument('base', metavar='BASE', nargs='+', help='file or directory')
args = parser.parse_args()
for base in args.base:
if path.isdir(base):
for base, dirlist, filelist in os.walk(base):
dirlist[:] = [d for d in dirlist if not d.startswith('.')]; dirlist.sort()
filelist[:] = [f for f in filelist if not f.startswith('.')]; filelist.sort()
for f in filelist:
if is_valid_base(f):
do_file(path.join(base, f))
else:
if not is_valid_base(hqx):
exit('Base names cannot have a .hqx/.idump/.rdump extension')
do_file(base)

146
bin/rfx Executable file
View File

@ -0,0 +1,146 @@
#!/usr/bin/env python3
import macresources
import sys
import tempfile
from os import path
import re
import subprocess
import textwrap
if len(sys.argv) < 2 or sys.argv[1].startswith('-'):
sys.exit(textwrap.dedent('''
usage: rfx command [arg[//type/id] ...]
Shell command wrapper for accessing resources inside a Rez textfile
Resources specified as filename.rdump//type/id are converted to tempfiles before
the command is run, and back to resources after the command returns.
examples:
rfx mv Doc.rdump//STR/0 Doc.rdump//STR/1
rfx cp App.rdump//PICT/2000 2000.pict
rfx rm System.rdump//vers/2
''').strip())
bytearray_cache = {}
original_cache = {}
def get_cached_file(the_path):
# Different paths to the same file are unlikely, but just in case:
the_path = path.abspath(the_path)
try:
return bytearray_cache[the_path]
except KeyError:
try:
with open(the_path, 'rb') as f:
d = f.read()
except FileNotFoundError:
d = bytes()
original_cache[the_path] = d
bytearray_cache[the_path] = bytearray(d)
return bytearray_cache[the_path]
def flush_cache():
for the_path, the_data in bytearray_cache.items():
if original_cache[the_path] != the_data:
with open(the_path, 'wb') as f:
f.write(the_data)
def rez_resource_range(the_data, the_type, the_id):
if not the_data: return (0, 0)
# Hack... do a text search instead of Rezzing the whole file!
search = macresources.make_rez_code([macresources.Resource(the_type, the_id)], ascii_clean=True)
search = search.rpartition(b')')[0]
start = 0
while True:
start = the_data.find(search, start)
if start == -1: return (0, 0)
if (the_data[start-1:start] in b'\n') and (the_data[start+len(search):start+len(search)+1] in (b',', b')')):
break
start += len(search)
stop = the_data.index(b'\n};\n\n', start) + 5
return (start, stop)
def rez_shrink_range(the_data, start, stop):
start = the_data.index(b'\n', start) + 1
while the_data[stop:stop+1] != b'}': stop -= 1
return (start, stop)
def rez_get_resource(the_path, the_type, the_id):
the_file = get_cached_file(the_path)
start, stop = rez_resource_range(the_file, the_type, the_id)
if start == stop == 0: return None
return next(macresources.parse_rez_code(the_file[start:stop])).data
def rez_set_resource(the_path, the_type, the_id, the_data):
the_file = get_cached_file(the_path)
newdata = macresources.make_rez_code([macresources.Resource(the_type, the_id, data=the_data)], ascii_clean=True)
start, stop = rez_resource_range(the_file, the_type, the_id)
if start == stop == 0:
the_file.extend(newdata)
else:
start, stop = rez_shrink_range(the_file, start, stop)
istart, istop = rez_shrink_range(newdata, 0, len(newdata))
the_file[start:stop] = newdata[istart:istop]
def rez_delete_resource(the_path, the_type, the_id):
the_file = get_cached_file(the_path)
start, stop = rez_resource_range(the_file, the_type, the_id)
del the_file[start:stop]
with tempfile.TemporaryDirectory() as backup_tmp_dir:
new_argv = [sys.argv[1]]
to_retrieve = []
for i, arg in enumerate(sys.argv[2:], 1):
m = re.match(r'(.*[^/])//([^/]{1,4})/(-?\d+)$'.replace('/', re.escape(path.sep)), arg)
if m:
res_spec = (m.group(1), m.group(2).encode('mac_roman').ljust(4)[:4], int(m.group(3)))
tmp_file = path.join(backup_tmp_dir, str(i))
to_retrieve.append((tmp_file, res_spec))
res_data = rez_get_resource(*res_spec)
if res_data is not None:
with open(tmp_file, 'wb') as f:
f.write(res_data)
new_argv.append(tmp_file)
else:
new_argv.append(arg)
result = subprocess.run(new_argv)
for tmp_file, res_spec in to_retrieve:
try:
with open(tmp_file, 'rb') as f:
rez_set_resource(*res_spec, f.read())
except FileNotFoundError:
rez_delete_resource(*res_spec)
flush_cache()
sys.exit(result.returncode)

68
bin/sortrez Executable file
View File

@ -0,0 +1,68 @@
#!/usr/bin/env python3
import argparse
import macresources
MACROMAN_SORT = [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x22, 0x23, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
0x47, 0x48, 0x58, 0x5a, 0x5e, 0x60, 0x67, 0x69,
0x6b, 0x6d, 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7f,
0x8d, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9f, 0xa1,
0xa3, 0xa5, 0xa8, 0xaa, 0xab, 0xac, 0xad, 0xae,
0x54, 0x48, 0x58, 0x5a, 0x5e, 0x60, 0x67, 0x69,
0x6b, 0x6d, 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7f,
0x8d, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9f, 0xa1,
0xa3, 0xa5, 0xa8, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3,
0x4c, 0x50, 0x5c, 0x62, 0x7d, 0x81, 0x9a, 0x55,
0x4a, 0x56, 0x4c, 0x4e, 0x50, 0x5c, 0x62, 0x64,
0x65, 0x66, 0x6f, 0x70, 0x71, 0x72, 0x7d, 0x89,
0x8a, 0x8b, 0x81, 0x83, 0x9c, 0x9d, 0x9e, 0x9a,
0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0x95,
0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0x52, 0x85,
0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
0xc9, 0xca, 0xcb, 0x57, 0x8c, 0xcc, 0x52, 0x85,
0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0x26,
0x27, 0xd4, 0x20, 0x4a, 0x4e, 0x83, 0x87, 0x87,
0xd5, 0xd6, 0x24, 0x25, 0x2d, 0x2e, 0xd7, 0xd8,
0xa7, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
]
def sortkey(resource):
return (*(MACROMAN_SORT[char] for char in resource.type), resource.id)
parser = argparse.ArgumentParser(description='''
Sort the resources in a Rez file (for diffing).
''')
parser.add_argument('src', nargs='*', help='Rez files')
args = parser.parse_args()
for srcfile in args.src:
with open(srcfile, 'r+b') as f:
raw = f.read()
resources = list(macresources.parse_rez_code(raw))
resources.sort(key=sortkey)
f.seek(0)
f.truncate(0)
f.write(macresources.make_rez_code(resources, ascii_clean=True))

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name='macresources',
version='0.1dev',
version='1.0',
author='Elliot Nunn',
author_email='elliotnunn@me.com',
description='Library for working with legacy Macintosh resource forks',
@ -18,5 +18,5 @@ setup(
'Development Status :: 3 - Alpha',
],
packages=['macresources'],
scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/Rget', 'bin/Rhqx'],
scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx'],
)