11 Commits
1.6 ... v1.7.0

Author SHA1 Message Date
158ca4884b Release version 1.7.0 2019-12-17 11:28:26 +01:00
8568f355c4 Remove incorrect outdated paragraph from list subcommand help 2019-12-10 16:15:18 +01:00
97d2dbe1b3 Change formatting of command help strings in source code
The automatic textwrap.dedent makes it impossible to cleanly extract
parts of the help strings into separate constants.
2019-12-10 15:58:20 +01:00
a4b6328782 Fix 'dcmp' (0) jump table decompression for large segment numbers 2019-12-04 23:36:57 +01:00
393160b5da Add raw-decompress subcommand (#6) 2019-12-04 23:36:56 +01:00
476eaecd17 Fix typo in the help text for rsrcfork read 2019-12-04 21:16:29 +01:00
546edbc31a Update and improve resource and resource map reprs 2019-12-04 02:01:40 +01:00
cf6ce3c2a6 Move _LazyResourceMap out of ResourceFile 2019-12-04 02:01:40 +01:00
af2ac70676 Simplify ResourceFile._references and ._LazyResourceMap
The _references map now stores Resource objects directly, instead of
constructing them only when they are looked up. Resource objects are
now lazy themselves, so the previous lazy resource creation mechanism
is redundant.

_LazyResourceMap is now a simple read-only wrapper around an existing
map. The custom class is now only used to provide a specialized repr.
2019-12-04 02:01:40 +01:00
5af455992b Refactor resource reading internals
The reading of resource name and data is now performed in the Resource
class (lazily, when the respective attributes are accessed) instead of
in ResourceFile._LazyResourceMap.
2019-12-04 02:01:40 +01:00
2193c81518 Bump version to 1.6.1.dev 2019-12-04 01:45:15 +01:00
5 changed files with 213 additions and 125 deletions

View File

@ -113,6 +113,16 @@ For technical info and documentation about resource files and resources, see the
Changelog
---------
Version 1.7
^^^^^^^^^^^
* Added a ``raw-decompress`` subcommand to decompress compressed resource data stored in a standalone file rather than as a resource.
* Optimized lazy loading of ``Resource`` objects. Previously, resource data would be read from disk whenever a ``Resource`` object was looked up, even if the data itself is never used. Now the resource data is only loaded once the ``data`` (or ``data_raw``) attribute is accessed.
* The same optimization applies to the ``name`` attribute, although this is unlikely to make a difference in practice.
* As a result, it is no longer possible to construct ``Resource`` objects without a resource file. This was previously possible, but had no practical use.
* Fixed a small error in the ``'dcmp' (0)`` decompression implementation.
Version 1.6.0
^^^^^^^^^^^^^

View File

@ -20,7 +20,7 @@
# * Add a new empty section for the next version to the README.rst changelog.
# * Commit and push the changes to master.
__version__ = "1.6.0"
__version__ = "1.7.0"
__all__ = [
"Resource",

View File

@ -483,7 +483,7 @@ def make_argument_parser(*, description: str, **kwargs: typing.Any) -> argparse.
ap = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent(description),
description=description,
allow_abbrev=False,
add_help=False,
**kwargs,
@ -521,21 +521,21 @@ def do_read_header(prog: str, args: typing.List[str]) -> typing.NoReturn:
ap = make_argument_parser(
prog=prog,
description="""
Read and output a resource file's header data.
The header data consists of two parts:
The system-reserved data is 112 bytes long and used by the Classic Mac OS
Finder as temporary storage space. It usually contains parts of the
file metadata (name, type/creator code, etc.).
The application-specific data is 128 bytes long and is available for use by
applications. In practice it usually contains junk data that happened to be in
memory when the resource file was written.
Mac OS X does not use the header data fields anymore. Resource files written
on Mac OS X normally have both parts of the header data set to all zero bytes.
""",
Read and output a resource file's header data.
The header data consists of two parts:
The system-reserved data is 112 bytes long and used by the Classic Mac OS
Finder as temporary storage space. It usually contains parts of the
file metadata (name, type/creator code, etc.).
The application-specific data is 128 bytes long and is available for use by
applications. In practice it usually contains junk data that happened to be in
memory when the resource file was written.
Mac OS X does not use the header data fields anymore. Resource files written
on Mac OS X normally have both parts of the header data set to all zero bytes.
""",
)
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw"], default="dump", help="How to output the header data: human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), or data only as raw bytes (raw). Default: %(default)s")
@ -586,8 +586,8 @@ def do_info(prog: str, args: typing.List[str]) -> typing.NoReturn:
ap = make_argument_parser(
prog=prog,
description="""
Display technical information and stats about the resource file.
""",
Display technical information and stats about the resource file.
""",
)
add_resource_file_args(ap)
@ -618,16 +618,13 @@ def do_list(prog: str, args: typing.List[str]) -> typing.NoReturn:
ap = make_argument_parser(
prog=prog,
description="""
List the resources stored in a resource file.
Each resource's type, ID, name (if any), attributes (if any), and data length
are displayed. For compressed resources, the compressed and decompressed data
length are displayed, as well as the ID of the 'dcmp' resource used to
decompress the resource data.
If the resource file has any global (resource map) attributes or non-zero
header data, they are displayed before the list of resources.
""",
List the resources stored in a resource file.
Each resource's type, ID, name (if any), attributes (if any), and data length
are displayed. For compressed resources, the compressed and decompressed data
length are displayed, as well as the ID of the 'dcmp' resource used to
decompress the resource data.
""",
)
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not parse the data header of compressed resources and only output their compressed length.")
@ -646,29 +643,29 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
ap = make_argument_parser(
prog=prog,
description="""
Read the data of one or more resources.
The resource filters use syntax similar to Rez (resource definition) files.
Each filter can have one of the following forms:
An unquoted type name (without escapes): TYPE
A quoted type name: 'TYPE'
A quoted type name and an ID: 'TYPE' (42)
A quoted type name and an ID range: 'TYPE' (24:42)
A quoted type name and a resource name: 'TYPE' ("foobar")
Note that the resource filter syntax uses quotes, parentheses and spaces,
which have special meanings in most shells. It is recommended to quote each
resource filter (using double quotes) to ensure that it is not interpreted
or rewritten by the shell.
""",
Read the data of one or more resources.
The resource filters use syntax similar to Rez (resource definition) files.
Each filter can have one of the following forms:
An unquoted type name (without escapes): TYPE
A quoted type name: 'TYPE'
A quoted type name and an ID: 'TYPE' (42)
A quoted type name and an ID range: 'TYPE' (24:42)
A quoted type name and a resource name: 'TYPE' ("foobar")
Note that the resource filter syntax uses quotes, parentheses and spaces,
which have special meanings in most shells. It is recommended to quote each
resource filter (using double quotes) to ensure that it is not interpreted
or rewritten by the shell.
""",
)
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output the raw compressed resource data.")
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources: human-readable info with hex dump (dump), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez). Default: %(default)s")
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.")
add_resource_file_args(ap)
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters ae specified, all resources are read.")
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters are specified, all resources are read.")
ns = ap.parse_args(args)
@ -685,12 +682,73 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
def do_raw_decompress(prog: str, args: typing.List[str]) -> typing.NoReturn:
"""Decompress raw compressed resource data."""
ap = make_argument_parser(
prog=prog,
description="""
Decompress raw compressed resource data that is stored in a standalone file
and not as a resource in a resource file.
This subcommand can be used in a shell pipeline by passing - as the input and
output file name, i. e. "%(prog)s - -".
Note: All other rsrcfork subcommands natively support compressed resources and
will automatically decompress them as needed. This subcommand is only needed
to decompress resource data that has been read from a resource file in
compressed form (e. g. using --no-decompress or another tool that does not
handle resource compression).
""",
)
ap.add_argument("--debug", action="store_true", help="Display debugging output from the decompressor on stdout. Cannot be used if the output file is - (stdout).")
ap.add_argument("input_file", help="The file from which to read the compressed resource data, or - for stdin.")
ap.add_argument("output_file", help="The file to which to write the decompressed resource data, or - for stdout.")
ns = ap.parse_args(args)
if ns.input_file == "-":
in_stream = sys.stdin.buffer
close_in_stream = False
else:
in_stream = open(ns.input_file, "rb")
close_in_stream = True
try:
header_info = compress.CompressedHeaderInfo.parse_stream(in_stream)
# Open the output file only after parsing the header, so that the file is only created (or its existing contents deleted) if the input file is valid.
if ns.output_file == "-":
if ns.debug:
print("Cannot use --debug if the decompression output file is - (stdout).", file=sys.stderr)
print("The debug output goes to stdout and would conflict with the decompressed data.", file=sys.stderr)
sys.exit(2)
out_stream = sys.stdout.buffer
close_out_stream = False
else:
out_stream = open(ns.output_file, "wb")
close_out_stream = True
try:
for chunk in compress.decompress_stream_parsed(header_info, in_stream, debug=ns.debug):
out_stream.write(chunk)
finally:
if close_out_stream:
out_stream.close()
finally:
if close_in_stream:
in_stream.close()
SUBCOMMANDS = {
"read-header": do_read_header,
"info": do_info,
"list": do_list,
"read": do_read,
"raw-decompress": do_raw_decompress,
}
@ -743,16 +801,16 @@ def main() -> typing.NoReturn:
# Custom usage string to make "subcommand ..." show up in the usage, but not as "positional arguments" in the main help text.
usage=f"{prog} (--help | --version | subcommand ...)",
description="""
%(prog)s is a tool for working with Classic Mac OS resource files.
Currently this tool can only read resource files; modifying/writing resource
files is not supported yet.
Note: This tool is intended for human users. The output format is not
machine-readable and may change at any time. The command-line syntax usually
does not change much across versions, but this should not be relied on.
Automated scripts and programs should use the Python API provided by the
rsrcfork library, which this tool is a part of.
""",
%(prog)s is a tool for working with Classic Mac OS resource files.
Currently this tool can only read resource files; modifying/writing resource
files is not supported yet.
Note: This tool is intended for human users. The output format is not
machine-readable and may change at any time. The command-line syntax usually
does not change much across versions, but this should not be relied on.
Automated scripts and programs should use the Python API provided by the
rsrcfork library, which this tool is a part of.
""",
# The list of subcommands is shown in the epilog so that it appears under the list of optional arguments.
epilog=format_subcommands_help(),
)

View File

@ -97,24 +97,31 @@ class ResourceAttrs(enum.Flag):
class Resource(object):
"""A single resource from a resource file."""
_resfile: "ResourceFile"
type: bytes
id: int
name: typing.Optional[bytes]
name_offset: int
_name: typing.Optional[bytes]
attributes: ResourceAttrs
data_raw: bytes
data_raw_offset: int
_data_raw: bytes
_compressed_info: compress.common.CompressedHeaderInfo
_data_decompressed: bytes
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes) -> None:
"""Create a new resource with the given type code, ID, name, attributes, and data."""
def __init__(self, resfile: "ResourceFile", resource_type: bytes, resource_id: int, name_offset: int, attributes: ResourceAttrs, data_raw_offset: int) -> None:
"""Create a resource object representing a resource stored in a resource file.
External code should not call this constructor manually. Resources should be looked up through a ResourceFile object instead.
"""
super().__init__()
self._resfile = resfile
self.type = resource_type
self.id = resource_id
self.name = name
self.name_offset = name_offset
self.attributes = attributes
self.data_raw = data_raw
self.data_raw_offset = data_raw_offset
def __repr__(self) -> str:
try:
@ -133,7 +140,7 @@ class Resource(object):
if not decompress_ok:
data_repr = f"<decompression failed - compressed data: {data_repr}>"
return f"{type(self).__module__}.{type(self).__qualname__}(type={self.type}, id={self.id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
return f"<{type(self).__qualname__} type {self.type}, id {self.id}, name {self.name}, attributes {self.attributes}, data {data_repr}>"
@property
def resource_type(self) -> bytes:
@ -145,6 +152,30 @@ class Resource(object):
warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
return self.id
@property
def name(self) -> typing.Optional[bytes]:
try:
return self._name
except AttributeError:
if self.name_offset == 0xffff:
self._name = None
else:
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + self.name_offset)
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
self._name = self._resfile._read_exact(name_length)
return self._name
@property
def data_raw(self) -> bytes:
try:
return self._data_raw
except AttributeError:
self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset)
(data_raw_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
self._data_raw = self._resfile._read_exact(data_raw_length)
return self._data_raw
@property
def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
"""The compressed resource header information, or None if this resource is not compressed.
@ -198,65 +229,54 @@ class Resource(object):
else:
return self.data_raw
class _LazyResourceMap(typing.Mapping[int, Resource]):
"""Internal class: Read-only wrapper for a mapping of resource IDs to resource objects.
This class behaves like a normal read-only mapping. The main difference to a plain dict (or similar mapping) is that this mapping has a specialized repr to avoid excessive output when working in the REPL.
"""
type: bytes
_submap: typing.Mapping[int, Resource]
def __init__(self, resource_type: bytes, submap: typing.Mapping[int, Resource]) -> None:
"""Create a new _LazyResourceMap that wraps the given mapping."""
super().__init__()
self.type = resource_type
self._submap = submap
def __len__(self) -> int:
"""Get the number of resources with this type code."""
return len(self._submap)
def __iter__(self) -> typing.Iterator[int]:
"""Iterate over the IDs of all resources with this type code."""
return iter(self._submap)
def __contains__(self, key: object) -> bool:
"""Check if a resource with the given ID exists for this type code."""
return key in self._submap
def __getitem__(self, key: int) -> Resource:
"""Get a resource with the given ID for this type code."""
return self._submap[key]
def __repr__(self) -> str:
if len(self) == 1:
contents = f"one resource: {next(iter(self.values()))}"
else:
contents = f"{len(self)} resources with IDs {list(self)}"
return f"<Resource map for type {self.type}, containing {contents}>"
class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.ContextManager["ResourceFile"]):
"""A resource file reader operating on a byte stream."""
# noinspection PyProtectedMember
class _LazyResourceMap(typing.Mapping[int, Resource]):
"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile."""
_resfile: "ResourceFile"
_restype: bytes
_submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]]
def __init__(self, resfile: "ResourceFile", restype: bytes) -> None:
"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype."""
super().__init__()
self._resfile = resfile
self._restype = restype
self._submap = self._resfile._references[self._restype]
def __len__(self) -> int:
"""Get the number of resources with this type code."""
return len(self._submap)
def __iter__(self) -> typing.Iterator[int]:
"""Iterate over the IDs of all resources with this type code."""
return iter(self._submap)
def __contains__(self, key: object) -> bool:
"""Check if a resource with the given ID exists for this type code."""
return key in self._submap
def __getitem__(self, key: int) -> Resource:
"""Get a resource with the given ID for this type code."""
name_offset, attributes, data_offset = self._submap[key]
if name_offset == 0xffff:
name = None
else:
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
name = self._resfile._read_exact(name_length)
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
data = self._resfile._read_exact(data_length)
return Resource(self._restype, key, name, attributes, data)
def __repr__(self) -> str:
if len(self) == 1:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>"
else:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
_close_stream: bool
_stream: typing.BinaryIO
@ -272,7 +292,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
file_attributes: ResourceFileAttrs
_reference_counts: typing.MutableMapping[bytes, int]
_references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]]
_references: typing.MutableMapping[bytes, typing.MutableMapping[int, Resource]]
@classmethod
def open(cls, filename: typing.Union[str, os.PathLike], *, fork: str="auto", **kwargs: typing.Any) -> "ResourceFile":
@ -433,7 +453,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
self._references = collections.OrderedDict()
for resource_type, count in self._reference_counts.items():
resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict()
resmap: typing.MutableMapping[int, Resource] = collections.OrderedDict()
self._references[resource_type] = resmap
for _ in range(count):
(
@ -445,7 +465,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
attributes = attributes_and_data_offset >> 24
data_offset = attributes_and_data_offset & ((1 << 24) - 1)
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
resmap[resource_id] = Resource(self, resource_type, resource_id, name_offset, ResourceAttrs(attributes), data_offset)
def close(self) -> None:
"""Close this ResourceFile.
@ -483,10 +503,10 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
return key in self._references
def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap":
def __getitem__(self, key: bytes) -> "_LazyResourceMap":
"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
return ResourceFile._LazyResourceMap(self, key)
return _LazyResourceMap(key, self._references[key])
def __repr__(self) -> str:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"

View File

@ -119,7 +119,7 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
print(f"\t-> segment number: {segment_number_int:#x}")
# The tail part of all jump table entries (i. e. everything except for the address).
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=True) + b"\xa9\xf0"
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=False) + b"\xa9\xf0"
# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
yield entry_tail