diff --git a/README.rst b/README.rst index c41e609..ab475c0 100644 --- a/README.rst +++ b/README.rst @@ -61,6 +61,33 @@ Automatic selection of data/resource fork >>> resourcerf._stream <_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'> +Command-line interface +`````````````````````` + +.. code-block:: sh + $ python3 -m rsrcfork /Users/Shared/Test.textClipping + No header system data + No header application data + No file attributes + 4 resource types: + 'utxt': 1 resources: + (256), unnamed, no attributes, 34 bytes + + 'utf8': 1 resources: + (256), unnamed, no attributes, 17 bytes + + 'TEXT': 1 resources: + (256), unnamed, no attributes, 17 bytes + + 'drag': 1 resources: + (128), unnamed, no attributes, 64 bytes + + $ python3 -m rsrcfork /Users/Shared/Test.textClipping "'TEXT' (256)" + Resource 'TEXT' (256), unnamed, no attributes, 17 bytes: + 00000000 48 65 72 65 20 69 73 20 73 6f 6d 65 20 74 65 78 |Here is some tex| + 00000010 74 |t | + + Limitations ----------- @@ -107,3 +134,16 @@ __ https://archive.org/web/ __ http://archive.is/ __ https://archive.fo/ + +Changelog +--------- + +Version 1.1.0 +````````````` + +* Added a command-line interface - run ``python3 -m rsrcfork --help`` for more info + +Version 1.0.0 +````````````` + +* Initial version diff --git a/rsrcfork.py b/rsrcfork.py index 072b64e..498961d 100644 --- a/rsrcfork.py +++ b/rsrcfork.py @@ -11,6 +11,7 @@ import enum import io import os import struct +import sys import typing __all__ = [ @@ -21,6 +22,11 @@ __all__ = [ "open", ] +__version__ = "1.1.0" + +# Translation table to replace ASCII non-printable characters with periods. +_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]} + # The formats of all following structures is as described in the Inside Macintosh book (see module docstring). # Signedness and byte order of the integers is never stated explicitly in IM. # All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs. @@ -92,7 +98,24 @@ class ResourceAttrs(enum.Flag): resProtected = 8 # "Protected/not protected", "Protected?" resPreload = 4 # "Read in at OpenResource?", "Load in on OpenResFile?" resChanged = 2 # "Existing resource changed since last update", "Resource changed?" - _UNKNWON_1 = 1 + resCompressed = 1 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format) + +_REZ_ATTR_NAMES = { + ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute" + ResourceAttrs.resSysHeap: "sysheap", + ResourceAttrs.resPurgeable: "purgeable", + ResourceAttrs.resLocked: "locked", + ResourceAttrs.resProtected: "protected", + ResourceAttrs.resPreload: "preload", + ResourceAttrs.resChanged: None, # "Illegal or reserved attribute" + ResourceAttrs.resCompressed: None, # "Extended Header resource attribute" +} + +F = typing.TypeVar("F", bound=enum.Flag, covariant=True) +def _decompose_flags(value: F) -> typing.Sequence[F]: + """Decompose an enum.Flags instance into separate enum constants.""" + + return [bit for bit in type(value) if bit in value] class Resource(object): """A single resource from a resource file.""" @@ -179,7 +202,7 @@ class ResourceFile(collections.abc.Mapping): return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>" @classmethod - def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None) -> "ResourceFile": + def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None, **kwargs) -> "ResourceFile": """Open the file at the given path as a ResourceFile. If rsrcfork is not None, it is treated as boolean and controls whether the data or resource fork of the file should be opened. (On systems other than macOS, opening resource forks will not work of course, since they don't exist.) @@ -211,7 +234,7 @@ class ResourceFile(collections.abc.Mapping): f = io.open(filename, "rb") # Use the selected fork to build a ResourceFile. - return cls(f) + return cls(f, **kwargs) def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=True): """Create a ResourceFile wrapping the given byte stream. @@ -284,13 +307,6 @@ class ResourceFile(collections.abc.Mapping): self.header_application_data, ) = self._stream_unpack(STRUCT_RESOURCE_HEADER) - if __debug__: - if self.header_system_data != bytes(len(self.header_system_data)): - print("Header system data is not all null bytes. This may be of interest.") - - if self.header_application_data != bytes(len(self.header_application_data)): - print("Header application data is not all null bytes. This may be of interest.") - assert self._tell() == self.data_offset def _read_all_resource_data(self): @@ -432,3 +448,324 @@ class ResourceFile(collections.abc.Mapping): return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>" open = ResourceFile.open + +# The following internal functions are only used by the main function. + +def _bytes_unescape(string: str) -> bytes: + """Convert a string containing ASCII characters and hex escapes to a bytestring. + + (We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.) + """ + + out = [] + it = iter(string) + n = 0 + for char in it: + if char == "\\": + try: + esc = next(it) + if esc in "\\\'\"": + out.append(esc) + elif esc == "x": + x1, x2 = next(it), next(it) + out.append(int(x1+x2, 16)) + else: + raise ValueError(f"Unknown escape character: {esc}") + except StopIteration: + raise ValueError("End of string in escape sequence") + else: + out.append(ord(char)) + n += 1 + + return bytes(out) + +def _bytes_escape(bs: bytes, *, quote: str=None) -> str: + """Convert a bytestring to a string, with non-ASCII bytes hex-escaped. + + (We implement our own escaping mechanism here to not depend on Python's str or bytes repr.) + """ + + out = [] + for byte in bs: + c = chr(byte) + if c in {quote, "\\"}: + out.append(f"\\{c}") + elif 0x20 <= byte < 0x7f: + out.append(c) + else: + out.append(f"\\x{byte:02x}") + + return "".join(out) + +def _filter_resources(rf: ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[Resource]: + matching = collections.OrderedDict() + + for filter in filters: + if len(filter) == 4: + try: + resources = rf[filter.encode("ascii")] + except KeyError: + continue + + for res in resources.values(): + matching[res.resource_type, res.resource_id] = res + elif filter[0] == filter[-1] == "'": + try: + resources = rf[_bytes_unescape(filter[1:-1])] + except KeyError: + continue + + for res in resources.values(): + matching[res.resource_type, res.resource_id] = res + else: + pos = filter.find("'", 1) + if pos == -1: + raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted") + elif filter[pos + 1] != " ": + raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space") + + restype, resid = filter[:pos + 1], filter[pos + 2:] + + if not restype[0] == restype[-1] == "'": + raise ValueError( + f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}") + restype = _bytes_unescape(restype[1:-1]) + + if len(restype) != 4: + raise ValueError( + f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}") + + if resid[0] != "(" or resid[-1] != ")": + raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized") + resid = resid[1:-1] + + try: + resources = rf[restype] + except KeyError: + continue + + if resid[0] == resid[-1] == '"': + name = _bytes_unescape(resid[1:-1]) + + for res in resources.values(): + if res.name == name: + matching[res.resource_type, res.resource_id] = res + break + elif ":" in resid: + if resid.count(":") > 1: + raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}") + start, end = resid.split(":") + start, end = int(start), int(end) + + for res in resources.values(): + if start <= res.resource_id <= end: + matching[res.resource_type, res.resource_id] = res + else: + resid = int(resid) + try: + res = resources[resid] + except KeyError: + continue + matching[res.resource_type, res.resource_id] = res + + return list(matching.values()) + +def _hexdump(data: bytes): + for i in range(0, len(data), 16): + line = data[i:i + 16] + line_hex = " ".join(f"{byte:02x}" for byte in line) + line_char = line.decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char:<16}|") + +def _raw_hexdump(data: bytes): + for i in range(0, len(data), 16): + print(" ".join(f"{byte:02x}" for byte in data[i:i + 16])) + +def main(args: typing.Sequence[str]): + import argparse + import textwrap + + ap = argparse.ArgumentParser( + add_help=False, + fromfile_prefix_chars="@", + formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent(""" + Read and display resources from a file's resource or data fork. + + When specifying resource filters, each one may be of one of the + following forms: + + An unquoted type name (without escapes): TYPE + A quoted type name: 'TYPE' + A quoted type name and an ID: 'TYPE' (42) + A quoted type name and an ID range: 'TYPE' (24:42) + A quoted type name and a resource name: 'TYPE' ("foobar") + + When multiple filters are specified, all resources matching any of them + are displayed. + """), + ) + + ap.add_argument("--help", action="help", help="Display this help message and exit") + ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit") + ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview") + ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)") + ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)") + ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else") + ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else") + ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)") + + ap.add_argument("file", help="The file to read, or - for stdin") + ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources") + + ns = ap.parse_args(args) + + ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork] + ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode] + + if ns.file == "-": + if ns.fork is not None: + print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr) + sys.exit(1) + + rf = ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode) + else: + rf = ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode) + + with rf: + if ns.header_system or ns.header_application: + if ns.header_system: + data = rf.header_system_data + else: + data = rf.header_application_data + + if ns.format == "dump": + _hexdump(data) + elif ns.format == "hex": + _raw_hexdump(data) + elif ns.format == "raw": + sys.stdout.buffer.write(data) + elif ns.format == "derez": + print("Cannot output file header data in derez format", file=sys.stderr) + sys.exit(1) + else: + raise ValueError(f"Unhandled output format: {ns.format}") + elif ns.filter or ns.all: + if ns.filter: + resources = _filter_resources(rf, ns.filter) + else: + resources = [] + for reses in rf.values(): + resources.extend(reses.values()) + + if ns.format in ("hex", "raw") and len(resources) != 1: + print(f"Format {ns.format} only supports exactly one resource, but found {len(resources)}", file=sys.stderr) + sys.exit(1) + + for res in resources: + if ns.format == "dump": + # Human-readable info and hex dump + + if res.name is None: + name = "unnamed" + else: + name = _bytes_escape(res.name, quote='"') + name = f'name "{name}"' + + attrs = _decompose_flags(res.attributes) + if attrs: + attrdesc = "attributes: " + " | ".join(attr.name for attr in attrs) + else: + attrdesc = "no attributes" + + restype = _bytes_escape(res.resource_type, quote="'") + print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {len(res.data)} bytes:") + _hexdump(res.data) + print() + elif ns.format == "hex": + # Data only as hex + + _raw_hexdump(res.data) + elif ns.format == "raw": + # Data only as raw bytes + + sys.stdout.buffer.write(res.data) + elif ns.format == "derez": + # Like DeRez with no resource definitions + + attrs = [_REZ_ATTR_NAMES[attr] for attr in _decompose_flags(res.attributes)] + if None in attrs: + attrs[:] = [f"${res.attributes.value:02X}"] + + parts = [str(res.resource_id)] + + if res.name is not None: + name = _bytes_escape(res.name, quote='"') + parts.append(f'"{name}"') + + parts += attrs + + restype = _bytes_escape(res.resource_type, quote="'") + print(f"data '{restype}' ({', '.join(parts)}) {{") + + for i in range(0, len(res.data), 16): + # Two-byte grouping is really annoying to implement. + groups = [] + for j in range(0, 16, 2): + if i+j >= len(res.data): + break + elif i+j+1 >= len(res.data): + groups.append(f"{res.data[i+j]:02X}") + else: + groups.append(f"{res.data[i+j]:02X}{res.data[i+j+1]:02X}") + + s = f'$"{" ".join(groups)}"' + comment = "/* " + res.data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */" + print(f"\t{s:<54s}{comment}") + + print("};") + print() + else: + raise ValueError(f"Unhandled output format: {ns.format}") + else: + if rf.header_system_data != bytes(len(rf.header_system_data)): + print("Header system data:") + _hexdump(rf.header_system_data) + else: + print("No header system data") + + if rf.header_application_data != bytes(len(rf.header_application_data)): + print("Header application data:") + _hexdump(rf.header_application_data) + else: + print("No header application data") + + attrs = _decompose_flags(rf.file_attributes) + if attrs: + print("File attributes: " + " | ".join(attr.name for attr in attrs)) + else: + print("No file attributes") + + print(f"{len(rf)} resource types:") + for typecode, resources in rf.items(): + restype = _bytes_escape(typecode, quote="'") + print(f"'{restype}': {len(resources)} resources:") + for resid, res in rf[typecode].items(): + if res.name is None: + name = "unnamed" + else: + name = _bytes_escape(res.name, quote='"') + name = f'name "{name}"' + + attrs = _decompose_flags(res.attributes) + if attrs: + attrdesc = " | ".join(attr.name for attr in attrs) + else: + attrdesc = "no attributes" + + print(f"({resid}), {name}, {attrdesc}, {len(res.data)} bytes") + print() + + sys.exit(0) + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/setup.py b/setup.py index cbdd062..0baf3ea 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ with open("README.rst", "r", encoding="utf-8") as f: setuptools.setup( name="rsrcfork", - version="1.0.0", + version="1.1.0", description="A pure Python library for reading old Macintosh resource manager data", long_description=long_description, url="https://github.com/dgelessus/python-rsrcfork", @@ -17,6 +17,7 @@ setuptools.setup( "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Utilities", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python",