8 Commits

Author SHA1 Message Date
bc4bad678a Bump version to 1.1.2 2018-02-01 11:06:22 +01:00
ee796d0eb1 Support additional resource file attributes
Added the attributes "Resources Locked" and "Printer Driver MultiFinder
Compatible" from ResEdit, as well as more dummy constants (512-16384)
for attributes with no known meaning.
2018-02-01 11:05:25 +01:00
3d802c570f Add python_requires to setup.py 2017-07-14 18:01:38 +02:00
f6e424674d Bump version to 1.1.1 2017-07-14 17:56:07 +02:00
5dbec5d905 Make _hexdump behave more like "hexdump -C" 2017-07-14 14:53:55 +02:00
c207703c9f Fix overflow with empty resource files or empty resource type entries 2017-07-14 13:50:21 +02:00
d6de63e17b Add a command-line interface for reading resources 2016-12-26 23:44:49 +01:00
e741fb063f Fix nested bullet list in README.rst 2016-12-24 21:28:17 +01:00
3 changed files with 440 additions and 15 deletions

View File

@ -8,6 +8,23 @@ Requirements
Python 3.6 or later. No other libraries are required. Python 3.6 or later. No other libraries are required.
Installation
------------
``rsrcfork`` is available `on PyPI`__ and can be installed using ``pip``:
.. code-block:: sh
python3 -m pip install rsrcfork
Alternatively you can run the ``setup.py`` script manually:
.. code-block:: sh
python3 setup.py install
__ https://pypi.python.org/pypi/rsrcfork
Features Features
-------- --------
@ -44,6 +61,34 @@ Automatic selection of data/resource fork
>>> resourcerf._stream >>> resourcerf._stream
<_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'> <_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'>
Command-line interface
``````````````````````
.. code-block:: sh
$ python3 -m rsrcfork /Users/Shared/Test.textClipping
No header system data
No header application data
No file attributes
4 resource types:
'utxt': 1 resources:
(256), unnamed, no attributes, 34 bytes
'utf8': 1 resources:
(256), unnamed, no attributes, 17 bytes
'TEXT': 1 resources:
(256), unnamed, no attributes, 17 bytes
'drag': 1 resources:
(128), unnamed, no attributes, 64 bytes
$ python3 -m rsrcfork /Users/Shared/Test.textClipping "'TEXT' (256)"
Resource 'TEXT' (256), unnamed, no attributes, 17 bytes:
00000000 48 65 72 65 20 69 73 20 73 6f 6d 65 20 74 65 78 |Here is some tex|
00000010 74 |t|
00000011
Limitations Limitations
----------- -----------
@ -59,8 +104,8 @@ Further info on resource files
Sources of information about the resource fork data format, and the structure of common resource types: Sources of information about the resource fork data format, and the structure of common resource types:
* Inside Macintosh, Volume I, Chapter 5 "The Resource Manager". This book can probably be obtained in physical form somewhere, but the relevant chapter/book is also available in a few places online: * Inside Macintosh, Volume I, Chapter 5 "The Resource Manager". This book can probably be obtained in physical form somewhere, but the relevant chapter/book is also available in a few places online:
* `Apple's legacy documentation`__ - `Apple's legacy documentation`__
* pagetable.com, a site that happened to have a copy of the book: `info blog post`__, `direct download`__ - pagetable.com, a site that happened to have a copy of the book: `info blog post`__, `direct download`__
* `Wikipedia`__, of course * `Wikipedia`__, of course
* The `Resource Fork`__ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait) * The `Resource Fork`__ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait)
* The `KSFL`__ library (and `its wiki`__), written in Java, which supports reading and writing resource files * The `KSFL`__ library (and `its wiki`__), written in Java, which supports reading and writing resource files
@ -90,3 +135,28 @@ __ https://archive.org/web/
__ http://archive.is/ __ http://archive.is/
__ https://archive.fo/ __ https://archive.fo/
Changelog
---------
Version 1.1.2
`````````````
* Added support for the resource file attributes "Resources Locked" and "Printer Driver MultiFinder Compatible" from ResEdit.
* Added more dummy constants for resource attributes with unknown meaning, so that resource files containing such attributes can be loaded without errors.
Version 1.1.1
`````````````
* Fixed overflow issue with empty resource files or empty resource type entries
* Changed ``_hexdump`` to behave more like ``hexdump -C``
Version 1.1.0
`````````````
* Added a command-line interface - run ``python3 -m rsrcfork --help`` for more info
Version 1.0.0
`````````````
* Initial version

View File

@ -11,6 +11,7 @@ import enum
import io import io
import os import os
import struct import struct
import sys
import typing import typing
__all__ = [ __all__ = [
@ -21,6 +22,11 @@ __all__ = [
"open", "open",
] ]
__version__ = "1.1.2"
# Translation table to replace ASCII non-printable characters with periods.
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
# The formats of all following structures is as described in the Inside Macintosh book (see module docstring). # The formats of all following structures is as described in the Inside Macintosh book (see module docstring).
# Signedness and byte order of the integers is never stated explicitly in IM. # Signedness and byte order of the integers is never stated explicitly in IM.
# All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs. # All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs.
@ -73,6 +79,14 @@ STRUCT_RESOURCE_NAME_HEADER = struct.Struct(">B")
class ResourceFileAttrs(enum.Flag): class ResourceFileAttrs(enum.Flag):
"""Resource file attribute flags. The descriptions for these flags are taken from comments on the map*Bit and map* enum constants in <CarbonCore/Resources.h>.""" """Resource file attribute flags. The descriptions for these flags are taken from comments on the map*Bit and map* enum constants in <CarbonCore/Resources.h>."""
mapResourcesLocked = 32768 # "Resources Locked" (undocumented, but available as a checkbox in ResEdit)
_UNKNOWN_16384 = 16384
_UNKNOWN_8192 = 8192
_UNKNOWN_4096 = 4096
_UNKNOWN_2048 = 2048
_UNKNOWN_1024 = 1024
_UNKNOWN_512 = 512
mapPrinterDriverMultiFinderCompatible = 256 # "Printer Driver MultiFinder Compatible" (undocumented, but available as a checkbox in ResEdit)
mapReadOnly = 128 # "is this file read-only?", "Resource file read-only" mapReadOnly = 128 # "is this file read-only?", "Resource file read-only"
mapCompact = 64 # "Is a compact necessary?", "Compact resource file" mapCompact = 64 # "Is a compact necessary?", "Compact resource file"
mapChanged = 32 # "Is it necessary to write map?", "Write map out at update" mapChanged = 32 # "Is it necessary to write map?", "Write map out at update"
@ -92,7 +106,24 @@ class ResourceAttrs(enum.Flag):
resProtected = 8 # "Protected/not protected", "Protected?" resProtected = 8 # "Protected/not protected", "Protected?"
resPreload = 4 # "Read in at OpenResource?", "Load in on OpenResFile?" resPreload = 4 # "Read in at OpenResource?", "Load in on OpenResFile?"
resChanged = 2 # "Existing resource changed since last update", "Resource changed?" resChanged = 2 # "Existing resource changed since last update", "Resource changed?"
_UNKNWON_1 = 1 resCompressed = 1 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format)
_REZ_ATTR_NAMES = {
ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
ResourceAttrs.resSysHeap: "sysheap",
ResourceAttrs.resPurgeable: "purgeable",
ResourceAttrs.resLocked: "locked",
ResourceAttrs.resProtected: "protected",
ResourceAttrs.resPreload: "preload",
ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
}
F = typing.TypeVar("F", bound=enum.Flag, covariant=True)
def _decompose_flags(value: F) -> typing.Sequence[F]:
"""Decompose an enum.Flags instance into separate enum constants."""
return [bit for bit in type(value) if bit in value]
class Resource(object): class Resource(object):
"""A single resource from a resource file.""" """A single resource from a resource file."""
@ -179,7 +210,7 @@ class ResourceFile(collections.abc.Mapping):
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>" return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
@classmethod @classmethod
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None) -> "ResourceFile": def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None, **kwargs) -> "ResourceFile":
"""Open the file at the given path as a ResourceFile. """Open the file at the given path as a ResourceFile.
If rsrcfork is not None, it is treated as boolean and controls whether the data or resource fork of the file should be opened. (On systems other than macOS, opening resource forks will not work of course, since they don't exist.) If rsrcfork is not None, it is treated as boolean and controls whether the data or resource fork of the file should be opened. (On systems other than macOS, opening resource forks will not work of course, since they don't exist.)
@ -211,7 +242,7 @@ class ResourceFile(collections.abc.Mapping):
f = io.open(filename, "rb") f = io.open(filename, "rb")
# Use the selected fork to build a ResourceFile. # Use the selected fork to build a ResourceFile.
return cls(f) return cls(f, **kwargs)
def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=True): def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=True):
"""Create a ResourceFile wrapping the given byte stream. """Create a ResourceFile wrapping the given byte stream.
@ -284,13 +315,6 @@ class ResourceFile(collections.abc.Mapping):
self.header_application_data, self.header_application_data,
) = self._stream_unpack(STRUCT_RESOURCE_HEADER) ) = self._stream_unpack(STRUCT_RESOURCE_HEADER)
if __debug__:
if self.header_system_data != bytes(len(self.header_system_data)):
print("Header system data is not all null bytes. This may be of interest.")
if self.header_application_data != bytes(len(self.header_application_data)):
print("Header application data is not all null bytes. This may be of interest.")
assert self._tell() == self.data_offset assert self._tell() == self.data_offset
def _read_all_resource_data(self): def _read_all_resource_data(self):
@ -329,14 +353,16 @@ class ResourceFile(collections.abc.Mapping):
self._reference_counts: typing.MutableMapping[bytes, int] = collections.OrderedDict() self._reference_counts: typing.MutableMapping[bytes, int] = collections.OrderedDict()
(type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER) (type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER)
type_list_length = (type_list_length_m1 + 1) % 0x10000
for _ in range(type_list_length_m1 + 1): for _ in range(type_list_length):
( (
resource_type, resource_type,
count_m1, count_m1,
reflist_offset, reflist_offset,
) = self._stream_unpack(STRUCT_RESOURCE_TYPE) ) = self._stream_unpack(STRUCT_RESOURCE_TYPE)
self._reference_counts[resource_type] = count_m1 + 1 count = (count_m1 + 1) % 0x10000
self._reference_counts[resource_type] = count
def _read_all_references(self): def _read_all_references(self):
"""Read all resource references, starting at the current stream position.""" """Read all resource references, starting at the current stream position."""
@ -432,3 +458,330 @@ class ResourceFile(collections.abc.Mapping):
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>" return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
open = ResourceFile.open open = ResourceFile.open
# The following internal functions are only used by the main function.
def _bytes_unescape(string: str) -> bytes:
"""Convert a string containing ASCII characters and hex escapes to a bytestring.
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
"""
out = []
it = iter(string)
n = 0
for char in it:
if char == "\\":
try:
esc = next(it)
if esc in "\\\'\"":
out.append(esc)
elif esc == "x":
x1, x2 = next(it), next(it)
out.append(int(x1+x2, 16))
else:
raise ValueError(f"Unknown escape character: {esc}")
except StopIteration:
raise ValueError("End of string in escape sequence")
else:
out.append(ord(char))
n += 1
return bytes(out)
def _bytes_escape(bs: bytes, *, quote: str=None) -> str:
"""Convert a bytestring to a string, with non-ASCII bytes hex-escaped.
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
"""
out = []
for byte in bs:
c = chr(byte)
if c in {quote, "\\"}:
out.append(f"\\{c}")
elif 0x20 <= byte < 0x7f:
out.append(c)
else:
out.append(f"\\x{byte:02x}")
return "".join(out)
def _filter_resources(rf: ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[Resource]:
matching = collections.OrderedDict()
for filter in filters:
if len(filter) == 4:
try:
resources = rf[filter.encode("ascii")]
except KeyError:
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
elif filter[0] == filter[-1] == "'":
try:
resources = rf[_bytes_unescape(filter[1:-1])]
except KeyError:
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
else:
pos = filter.find("'", 1)
if pos == -1:
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
elif filter[pos + 1] != " ":
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
restype, resid = filter[:pos + 1], filter[pos + 2:]
if not restype[0] == restype[-1] == "'":
raise ValueError(
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}")
restype = _bytes_unescape(restype[1:-1])
if len(restype) != 4:
raise ValueError(
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
if resid[0] != "(" or resid[-1] != ")":
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
resid = resid[1:-1]
try:
resources = rf[restype]
except KeyError:
continue
if resid[0] == resid[-1] == '"':
name = _bytes_unescape(resid[1:-1])
for res in resources.values():
if res.name == name:
matching[res.resource_type, res.resource_id] = res
break
elif ":" in resid:
if resid.count(":") > 1:
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}")
start, end = resid.split(":")
start, end = int(start), int(end)
for res in resources.values():
if start <= res.resource_id <= end:
matching[res.resource_type, res.resource_id] = res
else:
resid = int(resid)
try:
res = resources[resid]
except KeyError:
continue
matching[res.resource_type, res.resource_id] = res
return list(matching.values())
def _hexdump(data: bytes):
for i in range(0, len(data), 16):
line = data[i:i + 16]
line_hex = " ".join(f"{byte:02x}" for byte in line)
line_char = line.decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES)
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
if data:
print(f"{len(data):08x}")
def _raw_hexdump(data: bytes):
for i in range(0, len(data), 16):
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
def main(args: typing.Sequence[str]):
import argparse
import textwrap
ap = argparse.ArgumentParser(
add_help=False,
fromfile_prefix_chars="@",
formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""
Read and display resources from a file's resource or data fork.
When specifying resource filters, each one may be of one of the
following forms:
An unquoted type name (without escapes): TYPE
A quoted type name: 'TYPE'
A quoted type name and an ID: 'TYPE' (42)
A quoted type name and an ID range: 'TYPE' (24:42)
A quoted type name and a resource name: 'TYPE' ("foobar")
When multiple filters are specified, all resources matching any of them
are displayed.
"""),
)
ap.add_argument("--help", action="help", help="Display this help message and exit")
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)")
ap.add_argument("file", help="The file to read, or - for stdin")
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
ns = ap.parse_args(args)
ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork]
ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode]
if ns.file == "-":
if ns.fork is not None:
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
sys.exit(1)
rf = ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode)
else:
rf = ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode)
with rf:
if ns.header_system or ns.header_application:
if ns.header_system:
data = rf.header_system_data
else:
data = rf.header_application_data
if ns.format == "dump":
_hexdump(data)
elif ns.format == "hex":
_raw_hexdump(data)
elif ns.format == "raw":
sys.stdout.buffer.write(data)
elif ns.format == "derez":
print("Cannot output file header data in derez format", file=sys.stderr)
sys.exit(1)
else:
raise ValueError(f"Unhandled output format: {ns.format}")
elif ns.filter or ns.all:
if ns.filter:
resources = _filter_resources(rf, ns.filter)
else:
resources = []
for reses in rf.values():
resources.extend(reses.values())
if ns.format in ("hex", "raw") and len(resources) != 1:
print(f"Format {ns.format} only supports exactly one resource, but found {len(resources)}", file=sys.stderr)
sys.exit(1)
for res in resources:
if ns.format == "dump":
# Human-readable info and hex dump
if res.name is None:
name = "unnamed"
else:
name = _bytes_escape(res.name, quote='"')
name = f'name "{name}"'
attrs = _decompose_flags(res.attributes)
if attrs:
attrdesc = "attributes: " + " | ".join(attr.name for attr in attrs)
else:
attrdesc = "no attributes"
restype = _bytes_escape(res.resource_type, quote="'")
print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {len(res.data)} bytes:")
_hexdump(res.data)
print()
elif ns.format == "hex":
# Data only as hex
_raw_hexdump(res.data)
elif ns.format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(res.data)
elif ns.format == "derez":
# Like DeRez with no resource definitions
attrs = [_REZ_ATTR_NAMES[attr] for attr in _decompose_flags(res.attributes)]
if None in attrs:
attrs[:] = [f"${res.attributes.value:02X}"]
parts = [str(res.resource_id)]
if res.name is not None:
name = _bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attrs
restype = _bytes_escape(res.resource_type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}) {{")
for i in range(0, len(res.data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(res.data):
break
elif i+j+1 >= len(res.data):
groups.append(f"{res.data[i+j]:02X}")
else:
groups.append(f"{res.data[i+j]:02X}{res.data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + res.data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
print()
else:
raise ValueError(f"Unhandled output format: {ns.format}")
else:
if rf.header_system_data != bytes(len(rf.header_system_data)):
print("Header system data:")
_hexdump(rf.header_system_data)
else:
print("No header system data")
if rf.header_application_data != bytes(len(rf.header_application_data)):
print("Header application data:")
_hexdump(rf.header_application_data)
else:
print("No header application data")
attrs = _decompose_flags(rf.file_attributes)
if attrs:
print("File attributes: " + " | ".join(attr.name for attr in attrs))
else:
print("No file attributes")
if len(rf) > 0:
print(f"{len(rf)} resource types:")
for typecode, resources in rf.items():
restype = _bytes_escape(typecode, quote="'")
print(f"'{restype}': {len(resources)} resources:")
for resid, res in rf[typecode].items():
if res.name is None:
name = "unnamed"
else:
name = _bytes_escape(res.name, quote='"')
name = f'name "{name}"'
attrs = _decompose_flags(res.attributes)
if attrs:
attrdesc = " | ".join(attr.name for attr in attrs)
else:
attrdesc = "no attributes"
print(f"({resid}), {name}, {attrdesc}, {len(res.data)} bytes")
print()
else:
print("No resource types (empty resource file)")
sys.exit(0)
if __name__ == "__main__":
main(sys.argv[1:])

View File

@ -7,7 +7,7 @@ with open("README.rst", "r", encoding="utf-8") as f:
setuptools.setup( setuptools.setup(
name="rsrcfork", name="rsrcfork",
version="1.0.0", version="1.1.2",
description="A pure Python library for reading old Macintosh resource manager data", description="A pure Python library for reading old Macintosh resource manager data",
long_description=long_description, long_description=long_description,
url="https://github.com/dgelessus/python-rsrcfork", url="https://github.com/dgelessus/python-rsrcfork",
@ -17,6 +17,7 @@ setuptools.setup(
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"Intended Audience :: Developers", "Intended Audience :: Developers",
"Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
"License :: OSI Approved :: MIT License", "License :: OSI Approved :: MIT License",
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Programming Language :: Python", "Programming Language :: Python",
@ -25,5 +26,6 @@ setuptools.setup(
"Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.6",
], ],
keywords="rsrc fork resource manager macintosh mac macos", keywords="rsrc fork resource manager macintosh mac macos",
python_requires=">=3.6",
py_modules=["rsrcfork"], py_modules=["rsrcfork"],
) )