2 Commits

Author SHA1 Message Date
d6de63e17b Add a command-line interface for reading resources 2016-12-26 23:44:49 +01:00
e741fb063f Fix nested bullet list in README.rst 2016-12-24 21:28:17 +01:00
3 changed files with 408 additions and 13 deletions

View File

@ -8,6 +8,23 @@ Requirements
Python 3.6 or later. No other libraries are required.
Installation
------------
``rsrcfork`` is available `on PyPI`__ and can be installed using ``pip``:
.. code-block:: sh
python3 -m pip install rsrcfork
Alternatively you can run the ``setup.py`` script manually:
.. code-block:: sh
python3 setup.py install
__ https://pypi.python.org/pypi/rsrcfork
Features
--------
@ -44,6 +61,33 @@ Automatic selection of data/resource fork
>>> resourcerf._stream
<_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'>
Command-line interface
``````````````````````
.. code-block:: sh
$ python3 -m rsrcfork /Users/Shared/Test.textClipping
No header system data
No header application data
No file attributes
4 resource types:
'utxt': 1 resources:
(256), unnamed, no attributes, 34 bytes
'utf8': 1 resources:
(256), unnamed, no attributes, 17 bytes
'TEXT': 1 resources:
(256), unnamed, no attributes, 17 bytes
'drag': 1 resources:
(128), unnamed, no attributes, 64 bytes
$ python3 -m rsrcfork /Users/Shared/Test.textClipping "'TEXT' (256)"
Resource 'TEXT' (256), unnamed, no attributes, 17 bytes:
00000000 48 65 72 65 20 69 73 20 73 6f 6d 65 20 74 65 78 |Here is some tex|
00000010 74 |t |
Limitations
-----------
@ -59,8 +103,8 @@ Further info on resource files
Sources of information about the resource fork data format, and the structure of common resource types:
* Inside Macintosh, Volume I, Chapter 5 "The Resource Manager". This book can probably be obtained in physical form somewhere, but the relevant chapter/book is also available in a few places online:
* `Apple's legacy documentation`__
* pagetable.com, a site that happened to have a copy of the book: `info blog post`__, `direct download`__
- `Apple's legacy documentation`__
- pagetable.com, a site that happened to have a copy of the book: `info blog post`__, `direct download`__
* `Wikipedia`__, of course
* The `Resource Fork`__ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait)
* The `KSFL`__ library (and `its wiki`__), written in Java, which supports reading and writing resource files
@ -90,3 +134,16 @@ __ https://archive.org/web/
__ http://archive.is/
__ https://archive.fo/
Changelog
---------
Version 1.1.0
`````````````
* Added a command-line interface - run ``python3 -m rsrcfork --help`` for more info
Version 1.0.0
`````````````
* Initial version

View File

@ -11,6 +11,7 @@ import enum
import io
import os
import struct
import sys
import typing
__all__ = [
@ -21,6 +22,11 @@ __all__ = [
"open",
]
__version__ = "1.1.0"
# Translation table to replace ASCII non-printable characters with periods.
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
# The formats of all following structures is as described in the Inside Macintosh book (see module docstring).
# Signedness and byte order of the integers is never stated explicitly in IM.
# All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs.
@ -92,7 +98,24 @@ class ResourceAttrs(enum.Flag):
resProtected = 8 # "Protected/not protected", "Protected?"
resPreload = 4 # "Read in at OpenResource?", "Load in on OpenResFile?"
resChanged = 2 # "Existing resource changed since last update", "Resource changed?"
_UNKNWON_1 = 1
resCompressed = 1 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format)
_REZ_ATTR_NAMES = {
ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
ResourceAttrs.resSysHeap: "sysheap",
ResourceAttrs.resPurgeable: "purgeable",
ResourceAttrs.resLocked: "locked",
ResourceAttrs.resProtected: "protected",
ResourceAttrs.resPreload: "preload",
ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
}
F = typing.TypeVar("F", bound=enum.Flag, covariant=True)
def _decompose_flags(value: F) -> typing.Sequence[F]:
"""Decompose an enum.Flags instance into separate enum constants."""
return [bit for bit in type(value) if bit in value]
class Resource(object):
"""A single resource from a resource file."""
@ -179,7 +202,7 @@ class ResourceFile(collections.abc.Mapping):
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
@classmethod
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None) -> "ResourceFile":
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None, **kwargs) -> "ResourceFile":
"""Open the file at the given path as a ResourceFile.
If rsrcfork is not None, it is treated as boolean and controls whether the data or resource fork of the file should be opened. (On systems other than macOS, opening resource forks will not work of course, since they don't exist.)
@ -211,7 +234,7 @@ class ResourceFile(collections.abc.Mapping):
f = io.open(filename, "rb")
# Use the selected fork to build a ResourceFile.
return cls(f)
return cls(f, **kwargs)
def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=True):
"""Create a ResourceFile wrapping the given byte stream.
@ -284,13 +307,6 @@ class ResourceFile(collections.abc.Mapping):
self.header_application_data,
) = self._stream_unpack(STRUCT_RESOURCE_HEADER)
if __debug__:
if self.header_system_data != bytes(len(self.header_system_data)):
print("Header system data is not all null bytes. This may be of interest.")
if self.header_application_data != bytes(len(self.header_application_data)):
print("Header application data is not all null bytes. This may be of interest.")
assert self._tell() == self.data_offset
def _read_all_resource_data(self):
@ -432,3 +448,324 @@ class ResourceFile(collections.abc.Mapping):
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
open = ResourceFile.open
# The following internal functions are only used by the main function.
def _bytes_unescape(string: str) -> bytes:
"""Convert a string containing ASCII characters and hex escapes to a bytestring.
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
"""
out = []
it = iter(string)
n = 0
for char in it:
if char == "\\":
try:
esc = next(it)
if esc in "\\\'\"":
out.append(esc)
elif esc == "x":
x1, x2 = next(it), next(it)
out.append(int(x1+x2, 16))
else:
raise ValueError(f"Unknown escape character: {esc}")
except StopIteration:
raise ValueError("End of string in escape sequence")
else:
out.append(ord(char))
n += 1
return bytes(out)
def _bytes_escape(bs: bytes, *, quote: str=None) -> str:
"""Convert a bytestring to a string, with non-ASCII bytes hex-escaped.
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
"""
out = []
for byte in bs:
c = chr(byte)
if c in {quote, "\\"}:
out.append(f"\\{c}")
elif 0x20 <= byte < 0x7f:
out.append(c)
else:
out.append(f"\\x{byte:02x}")
return "".join(out)
def _filter_resources(rf: ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[Resource]:
matching = collections.OrderedDict()
for filter in filters:
if len(filter) == 4:
try:
resources = rf[filter.encode("ascii")]
except KeyError:
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
elif filter[0] == filter[-1] == "'":
try:
resources = rf[_bytes_unescape(filter[1:-1])]
except KeyError:
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
else:
pos = filter.find("'", 1)
if pos == -1:
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
elif filter[pos + 1] != " ":
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
restype, resid = filter[:pos + 1], filter[pos + 2:]
if not restype[0] == restype[-1] == "'":
raise ValueError(
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}")
restype = _bytes_unescape(restype[1:-1])
if len(restype) != 4:
raise ValueError(
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
if resid[0] != "(" or resid[-1] != ")":
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
resid = resid[1:-1]
try:
resources = rf[restype]
except KeyError:
continue
if resid[0] == resid[-1] == '"':
name = _bytes_unescape(resid[1:-1])
for res in resources.values():
if res.name == name:
matching[res.resource_type, res.resource_id] = res
break
elif ":" in resid:
if resid.count(":") > 1:
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}")
start, end = resid.split(":")
start, end = int(start), int(end)
for res in resources.values():
if start <= res.resource_id <= end:
matching[res.resource_type, res.resource_id] = res
else:
resid = int(resid)
try:
res = resources[resid]
except KeyError:
continue
matching[res.resource_type, res.resource_id] = res
return list(matching.values())
def _hexdump(data: bytes):
for i in range(0, len(data), 16):
line = data[i:i + 16]
line_hex = " ".join(f"{byte:02x}" for byte in line)
line_char = line.decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES)
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char:<16}|")
def _raw_hexdump(data: bytes):
for i in range(0, len(data), 16):
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
def main(args: typing.Sequence[str]):
import argparse
import textwrap
ap = argparse.ArgumentParser(
add_help=False,
fromfile_prefix_chars="@",
formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""
Read and display resources from a file's resource or data fork.
When specifying resource filters, each one may be of one of the
following forms:
An unquoted type name (without escapes): TYPE
A quoted type name: 'TYPE'
A quoted type name and an ID: 'TYPE' (42)
A quoted type name and an ID range: 'TYPE' (24:42)
A quoted type name and a resource name: 'TYPE' ("foobar")
When multiple filters are specified, all resources matching any of them
are displayed.
"""),
)
ap.add_argument("--help", action="help", help="Display this help message and exit")
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)")
ap.add_argument("file", help="The file to read, or - for stdin")
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
ns = ap.parse_args(args)
ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork]
ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode]
if ns.file == "-":
if ns.fork is not None:
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
sys.exit(1)
rf = ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode)
else:
rf = ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode)
with rf:
if ns.header_system or ns.header_application:
if ns.header_system:
data = rf.header_system_data
else:
data = rf.header_application_data
if ns.format == "dump":
_hexdump(data)
elif ns.format == "hex":
_raw_hexdump(data)
elif ns.format == "raw":
sys.stdout.buffer.write(data)
elif ns.format == "derez":
print("Cannot output file header data in derez format", file=sys.stderr)
sys.exit(1)
else:
raise ValueError(f"Unhandled output format: {ns.format}")
elif ns.filter or ns.all:
if ns.filter:
resources = _filter_resources(rf, ns.filter)
else:
resources = []
for reses in rf.values():
resources.extend(reses.values())
if ns.format in ("hex", "raw") and len(resources) != 1:
print(f"Format {ns.format} only supports exactly one resource, but found {len(resources)}", file=sys.stderr)
sys.exit(1)
for res in resources:
if ns.format == "dump":
# Human-readable info and hex dump
if res.name is None:
name = "unnamed"
else:
name = _bytes_escape(res.name, quote='"')
name = f'name "{name}"'
attrs = _decompose_flags(res.attributes)
if attrs:
attrdesc = "attributes: " + " | ".join(attr.name for attr in attrs)
else:
attrdesc = "no attributes"
restype = _bytes_escape(res.resource_type, quote="'")
print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {len(res.data)} bytes:")
_hexdump(res.data)
print()
elif ns.format == "hex":
# Data only as hex
_raw_hexdump(res.data)
elif ns.format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(res.data)
elif ns.format == "derez":
# Like DeRez with no resource definitions
attrs = [_REZ_ATTR_NAMES[attr] for attr in _decompose_flags(res.attributes)]
if None in attrs:
attrs[:] = [f"${res.attributes.value:02X}"]
parts = [str(res.resource_id)]
if res.name is not None:
name = _bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attrs
restype = _bytes_escape(res.resource_type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}) {{")
for i in range(0, len(res.data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(res.data):
break
elif i+j+1 >= len(res.data):
groups.append(f"{res.data[i+j]:02X}")
else:
groups.append(f"{res.data[i+j]:02X}{res.data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + res.data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
print()
else:
raise ValueError(f"Unhandled output format: {ns.format}")
else:
if rf.header_system_data != bytes(len(rf.header_system_data)):
print("Header system data:")
_hexdump(rf.header_system_data)
else:
print("No header system data")
if rf.header_application_data != bytes(len(rf.header_application_data)):
print("Header application data:")
_hexdump(rf.header_application_data)
else:
print("No header application data")
attrs = _decompose_flags(rf.file_attributes)
if attrs:
print("File attributes: " + " | ".join(attr.name for attr in attrs))
else:
print("No file attributes")
print(f"{len(rf)} resource types:")
for typecode, resources in rf.items():
restype = _bytes_escape(typecode, quote="'")
print(f"'{restype}': {len(resources)} resources:")
for resid, res in rf[typecode].items():
if res.name is None:
name = "unnamed"
else:
name = _bytes_escape(res.name, quote='"')
name = f'name "{name}"'
attrs = _decompose_flags(res.attributes)
if attrs:
attrdesc = " | ".join(attr.name for attr in attrs)
else:
attrdesc = "no attributes"
print(f"({resid}), {name}, {attrdesc}, {len(res.data)} bytes")
print()
sys.exit(0)
if __name__ == "__main__":
main(sys.argv[1:])

View File

@ -7,7 +7,7 @@ with open("README.rst", "r", encoding="utf-8") as f:
setuptools.setup(
name="rsrcfork",
version="1.0.0",
version="1.1.0",
description="A pure Python library for reading old Macintosh resource manager data",
long_description=long_description,
url="https://github.com/dgelessus/python-rsrcfork",
@ -17,6 +17,7 @@ setuptools.setup(
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python",