commit 56d16589e1aa73c00ab0c5e3de88253747093942 Author: Elliot Nunn Date: Sun Oct 21 18:43:26 2018 +0800 Initial commit (works, not documented) diff --git a/README.md b/README.md new file mode 100644 index 0000000..fecb83d --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +`resourcefork` readme diff --git a/bin/SimpleDeRez b/bin/SimpleDeRez new file mode 100755 index 0000000..3a9fb51 --- /dev/null +++ b/bin/SimpleDeRez @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import argparse +import resourcefork +import sys + +parser = argparse.ArgumentParser(description=''' + Decompile legacy Mac resources to the Rez language. Unless the + `-ascii' flag is used, the output will match Apple's deprecated + DeRez utility. No attempt is made to access the native Mac resource + fork, but this can be worked around by appending `/..namedfork/rsrc' + to the name of the input file. +''') + +parser.add_argument('resourceFile', help='file to be decompiled') +parser.add_argument('-ascii', action='store_true', help='guarantee ASCII output') +parser.add_argument('-useDF', action='store_true', help='ignored: data fork is always used') + +args = parser.parse_args() + +with open(args.resourceFile, 'rb') as f: + resources = resourcefork.parse_file(f.read()) + +try: + sys.stdout.buffer.write(resourcefork.make_rez_code(resources, ascii_clean=args.ascii)) +except BrokenPipeError: + pass # like we get when we pipe into head diff --git a/bin/SimpleRez b/bin/SimpleRez new file mode 100755 index 0000000..9ccc36b --- /dev/null +++ b/bin/SimpleRez @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import argparse +import resourcefork + +parser = argparse.ArgumentParser(description=''' + Compile legacy Mac resources from a subset of the Rez language. Only + data blocks and $"" lines are supported. No attempt is made to + output to the native Mac resource fork, but this can be worked + around by appending `/..namedfork/rsrc' to the name of an existing + output file. +''') + +parser.add_argument('rezFile', nargs='+', help='resource description files') +parser.add_argument('-o', metavar='outputFile', default='Rez.out', help='default: Rez.out') +parser.add_argument('-useDF', action='store_true', help='ignored: data fork is always used') + +args = parser.parse_args() + +resources = [] +for in_path in args.rezFile: + with open(in_path, 'rb') as f: + resources.extend(resourcefork.parse_rez_code(f.read())) + +with open(args.o, 'wb') as f: + f.write(resourcefork.make_file(resources)) diff --git a/macresources/__init__.py b/macresources/__init__.py new file mode 100644 index 0000000..614819a --- /dev/null +++ b/macresources/__init__.py @@ -0,0 +1 @@ +from .main import parse_rez_code, parse_file, make_rez_code, make_file, Resource, ResourceAttrs diff --git a/macresources/main.py b/macresources/main.py new file mode 100644 index 0000000..677c522 --- /dev/null +++ b/macresources/main.py @@ -0,0 +1,352 @@ +import collections +import struct +import enum + + +MAP = bytearray(range(256)) +for i in range(32): MAP[i] = ord('.') +MAP[127] = ord('.') +MAP[9] = 0xC6 # tab -> greek delta +MAP[10] = 0xC2 # lf -> logical not + +CLEANMAP = bytearray(MAP) +for i in range(256): + if CLEANMAP[i] >= 128: + CLEANMAP[i] = ord('.') + + +def _rez_escape(src, singlequote=False, ascii_clean=False): + if singlequote: + the_quote = b"'" + else: + the_quote = b'"' + + chars = [the_quote] + for ch in src: + if 8 <= ch <= 13: + nu = b'\\' + b'btrvfn'[ch:ch+1] + elif ch < 32 or (ascii_clean and ch >= 128): + nu = b'\\0x%02X' % ch + elif ch == ord('\\'): + nu = b'\\\\' # two backslashes + elif ch == 127: # DEL character + nu = b'\\?' + elif ch == ord("'") and singlequote: + nu = b"\\'" + elif ch == ord('"') and not singlequote: + nu = b'\\"' + else: + nu = bytes([ch]) + chars.append(nu) + chars.append(the_quote) + + return b''.join(chars) + + +def _rez_unescape(src): + the_quote = src[0:1] + src = src[1:] + + backslash_dict = { + b'b': 8, + b't': 9, + b'r': 10, + b'v': 11, + b'f': 12, + b'n': 13, + b'?': 127, + } + + chars = [] + while not src.startswith(the_quote): + if src.startswith(b'\\'): + src = src[1:] + if src.startswith(b'0x'): + ch = int(src[2:4].decode('ascii'), 16) + src = src[4:] + else: + ch = backslash_dict.get(src[0:1], src[0]) + src = src[1:] + else: + ch = src[0] + src = src[1:] + chars.append(ch) + src = src[1:] # cut off the final quote + chars = bytes(chars) + return chars, src # return leftover in tuple + + +class ResourceAttrs(enum.IntFlag): + """Resource attibutes byte.""" + + _sysref = 0x80 # "reference to system/local reference" (unclear significance) + sysheap = 0x40 # load into System heap instead of app heap + purgeable = 0x20 # Memory Mgr may remove from heap to free up memory + locked = 0x10 # Memory Mgr may not move the block to reduce fragmentation + protected = 0x08 # prevents app from changing resource + preload = 0x04 # causes resource to be read into heap as soon as file is opened + _changed = 0x02 # marks a resource that has been changes since loading from file (should not be seen on disk) + _compressed = 0x01 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format) + + def _for_derez(self): + for possible in self.__class__: + if not possible.name.startswith('_') and self & possible: + yield possible.name + + +class Resource: + """ + A single Mac resource. A four-byte type, a numeric id and some + binary data are essential. Extra attributes and a name string are + optional. + """ + + ALL_ATTRIBS = [ + 'sysheap', + 'purgeable', + 'locked', + 'protected', + 'preload', + ] + + def __init__(self, type, id, name=None, attribs=0, data=None): + self.type = type + self.id = id + self.data = data or bytearray() + self.name = name + self.attribs = ResourceAttrs(0) + self.attribs |= attribs + + def __repr__(self): + datarep = repr(bytes(self.data[:4])) + if len(self.data) > len(datarep): datarep += '...%sb' % len(self.data) + return '%s(type=%r, id=%r, name=%r, attribs=%r, data=%s)' % (self.__class__.__name__, self.type, self.id, self.name, self.attribs, datarep) + + +def parse_file(from_resfile): + """Get an iterator of Resource objects from a binary resource file.""" + + if not from_resfile: # empty resource forks are fine + return + + data_offset, map_offset, data_len, map_len = struct.unpack_from('>4L', from_resfile) + + typelist_offset, namelist_offset, numtypes = struct.unpack_from('>24xHHH', from_resfile, map_offset) + typelist_offset += map_offset # something is definitely fishy here + namelist_offset += map_offset + + if numtypes == 0xFFFF: return + numtypes += 1 + + typelist = [] + for i in range(numtypes): + rtype, rtypen, reflist_offset = struct.unpack_from('>4sHH', from_resfile, typelist_offset + 2 + 8*i) + rtypen += 1 + reflist_offset += typelist_offset + typelist.append((rtype, rtypen, reflist_offset)) + + for rtype, rtypen, reflist_offset in typelist: + for i in range(rtypen): + rid, name_offset, mixedfield = struct.unpack_from('>hHL', from_resfile, reflist_offset + 12*i) + rdata_offset = mixedfield & 0xFFFFFF + rattribs = mixedfield >> 24 + + rdata_offset += data_offset + + rdata_len, = struct.unpack_from('>L', from_resfile, rdata_offset) + rdata = from_resfile[rdata_offset+4:rdata_offset+4+rdata_len] + + if name_offset == 0xFFFF: + name = None + else: + name_offset += namelist_offset + name_len = from_resfile[name_offset] + name = from_resfile[name_offset+1:name_offset+1+name_len].decode('mac_roman') + + yield Resource(type=rtype, id=rid, name=name, attribs=rattribs, data=bytearray(rdata)) + + +def parse_rez_code(from_rezcode): + """Get an iterator of Resource objects from code in a subset of the Rez language (bytes or str).""" + + try: + from_rezcode = from_rezcode.encode('mac_roman') + except AttributeError: + pass + + from_rezcode = from_rezcode.replace(b'\r\n', b'\n').replace(b'\r', b'\n') + + for line in from_rezcode.split(b'\n'): + line = line.lstrip() + if line.startswith(b'data '): + try: + yield cur_resource + except NameError: + pass + + _, _, line = line.partition(b' ') + rsrctype, line = _rez_unescape(line) + _, _, line = line.partition(b'(') + + args = [] + while True: + line = line.lstrip(b' ,\t') + if line.startswith(b')'): break + if line.startswith(b'"'): + arg, line = _rez_unescape(line) + args.append(('string', arg)) + else: + arg = bytearray() + while line and line[0:1] not in b' ,\t)': + arg.append(line[0]) + line = line[1:] + args.append(('nonstring', arg)) + + rsrcname = None + rsrcattrs = ResourceAttrs(0) + + for i, (argtype, arg) in enumerate(args): + if i == 0 and argtype == 'nonstring': + rsrcid = int(arg) + + elif i > 0: + if argtype == 'string': + rsrcname = arg.decode('mac_roman') + else: + rsrcattrs |= getattr(ResourceAttrs, arg.decode('ascii')) + + cur_resource = Resource(type=rsrctype, id=rsrcid, name=rsrcname, attribs=rsrcattrs) + + elif line.startswith(b'$"'): + hexdat = line[2:].partition(b'"')[0] + bindat = bytes.fromhex(hexdat.decode('ascii')) + cur_resource.data.extend(bindat) + + try: + yield cur_resource + except NameError: + pass + + +def make_file(from_iter): + """Pack an iterator of Resource objects into a binary resource file.""" + + class wrap: + def __init__(self, from_obj): + self.obj = from_obj + + accum = bytearray(256) # defer header + + data_offset = len(accum) + bigdict = collections.OrderedDict() # maintain order of types, but manually order IDs + for r in from_iter: + wrapped = wrap(r) + + wrapped.data_offset = len(accum) + accum.extend(struct.pack('>L', len(r.data))) + accum.extend(r.data) + + if r.type not in bigdict: + bigdict[r.type] = [] + bigdict[r.type].append(wrapped) + + map_offset = len(accum) + accum.extend(bytes(28)) + + typelist_offset = len(accum) + accum.extend(bytes(2 + 8 * len(bigdict))) + + reflist_offset = len(accum) + resource_count = sum(len(idlist) for idlist in bigdict.values()) + accum.extend(bytes(12 * resource_count)) + + namelist_offset = len(accum) + for rtype, idlist in bigdict.items(): + for res in idlist: + if res.obj.name is not None: + res.name_offset = len(accum) + as_bytes = res.obj.name.encode('mac_roman') + accum.append(len(as_bytes)) + accum.extend(as_bytes) + + # all right, now populate the reference lists... + counter = reflist_offset + for rtype, idlist in bigdict.items(): + for res in idlist: + res.ref_offset = counter + if res.obj.name is None: + this_name_offset = 0xFFFF + else: + this_name_offset = res.name_offset - namelist_offset + attribs = int(res.obj.attribs) + this_data_offset = res.data_offset - data_offset + mixedfield = (attribs << 24) | this_data_offset + struct.pack_into('>hHL', accum, counter, res.obj.id, this_name_offset, mixedfield) + + counter += 12 + + # all right, now populate the type list + struct.pack_into('>H', accum, typelist_offset, (len(bigdict) - 1) & 0xFFFF) + counter = typelist_offset + 2 + for rtype, idlist in bigdict.items(): + this_type = idlist[0].obj.type + ref_count = len(idlist) + firstref_offset = idlist[0].ref_offset - typelist_offset + struct.pack_into('>4sHH', accum, counter, this_type, ref_count - 1, firstref_offset) + + counter += 8 + + # all right, now populate the map + struct.pack_into('>24xHH', accum, map_offset, typelist_offset - map_offset, namelist_offset - map_offset) + + # all right, now populate the header + data_len = map_offset - data_offset + map_len = len(accum) - map_offset + struct.pack_into('>LLLL', accum, 0, data_offset, map_offset, data_len, map_len) + + return bytes(accum) + + +def make_rez_code(from_iter, ascii_clean=False): + """Express an iterator of Resource objects as Rez code (bytes). + + This will match the output of the deprecated Rez utility, unless the + `ascii_clean` argument is used to get a 7-bit-only code block. + """ + + from_iter = list(from_iter) + from_iter.sort(key=lambda res: res.type) + + if ascii_clean: + themap = CLEANMAP + else: + themap = MAP + + lines = [] + for resource in from_iter: + args = [] + args.append(str(resource.id).encode('ascii')) + if resource.name: args.append(_rez_escape(resource.name.encode('mac_roman'), singlequote=False, ascii_clean=ascii_clean)) + args.extend(x.encode('ascii') for x in resource.attribs._for_derez()) + args = b', '.join(args) + + fourcc = _rez_escape(resource.type, singlequote=True, ascii_clean=ascii_clean) + + lines.append(b'data %s (%s) {' % (fourcc, args)) + + step = 16 + for ofs in range(0, len(resource.data), step): + linedat = resource.data[ofs:ofs+step] + line = ' '.join(linedat[i:i+2].hex() for i in range(0, len(linedat), 2)).encode('ascii') + line = line.upper() + line = b'\t$"%s"' % line + prevstr = bytes(themap[ch] for ch in linedat).replace(b'*/', b'*.') + line = line.ljust(55) + line += b'/* %s */' % prevstr + lines.append(line) + + lines.append(b'};') + lines.append(b'') + if lines: lines.append(b'') # hack, because all posix lines end with a newline + + return b'\n'.join(lines) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..17436a0 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup + +setup( + name='macresources', + version='0.1dev', + author='Elliot Nunn', + author_email='elliotnunn@me.com', + description='Library for working with legacy Macintosh resource forks', + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + license='MIT', + url='https://github.com/elliotnunn/macresources', + classifiers=[ + 'Programming Language :: Python :: 3 :: Only', + 'Operating System :: OS Independent', + 'License :: OSI Approved :: MIT License', + 'Topic :: System :: Filesystems', + 'Development Status :: 3 - Alpha', + ], + packages=['macresources'], + scripts=['bin/SimpleRez', 'bin/SimpleDeRez'], +) diff --git a/test_all.py b/test_all.py new file mode 100644 index 0000000..b63986f --- /dev/null +++ b/test_all.py @@ -0,0 +1,42 @@ +from resourcefork import * + +RF = b'\x00\x00\x01\x00\x00\x00\x01\x08\x00\x00\x00\x08\x00\x00\x00;\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x124Vx\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x002\x00\x00elmo\x00\x00\x00\n\x00{\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08lamename' +RZF = b""" +data 'elmo' (123, "lamename") { + $"12345678" +}; +""".strip() + +def test_enum(): + r = ResourceAttrs + r1 = ResourceAttrs.sysheap + r2 = ResourceAttrs.purgeable + assert int(r1 | r2) == 0x60 + assert bool((r1 | r2) & r1) + +def test_parse_file(): + l = list(parse_file(RF)) + + assert l[0].type == b'elmo' + assert l[0].name == 'lamename' + assert l[0].data == b'\x12\x34\x56\x78' + +def test_parse_rez_code(): + l = list(parse_rez_code(RZF)) + + assert l[0].type == b'elmo' + assert l[0].name == 'lamename' + assert l[0].data == b'\x12\x34\x56\x78' + +def test_make_file(): + l = list(parse_file(RF)) + + fork = make_file(l) + + assert b'elmo' in fork + +def test_make_rez_code(): + l = list(parse_file(RF)) + + rez = make_rez_code(l) + assert b'1234 5678' in rez diff --git a/upload.sh b/upload.sh new file mode 100755 index 0000000..a918fc5 --- /dev/null +++ b/upload.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +rm -rf dist +python3 setup.py sdist bdist_wheel +twine upload dist/*