11 Commits
1.6 ... v1.7.0

Author SHA1 Message Date
158ca4884b Release version 1.7.0 2019-12-17 11:28:26 +01:00
8568f355c4 Remove incorrect outdated paragraph from list subcommand help 2019-12-10 16:15:18 +01:00
97d2dbe1b3 Change formatting of command help strings in source code
The automatic textwrap.dedent makes it impossible to cleanly extract
parts of the help strings into separate constants.
2019-12-10 15:58:20 +01:00
a4b6328782 Fix 'dcmp' (0) jump table decompression for large segment numbers 2019-12-04 23:36:57 +01:00
393160b5da Add raw-decompress subcommand (#6) 2019-12-04 23:36:56 +01:00
476eaecd17 Fix typo in the help text for rsrcfork read 2019-12-04 21:16:29 +01:00
546edbc31a Update and improve resource and resource map reprs 2019-12-04 02:01:40 +01:00
cf6ce3c2a6 Move _LazyResourceMap out of ResourceFile 2019-12-04 02:01:40 +01:00
af2ac70676 Simplify ResourceFile._references and ._LazyResourceMap
The _references map now stores Resource objects directly, instead of
constructing them only when they are looked up. Resource objects are
now lazy themselves, so the previous lazy resource creation mechanism
is redundant.

_LazyResourceMap is now a simple read-only wrapper around an existing
map. The custom class is now only used to provide a specialized repr.
2019-12-04 02:01:40 +01:00
5af455992b Refactor resource reading internals
The reading of resource name and data is now performed in the Resource
class (lazily, when the respective attributes are accessed) instead of
in ResourceFile._LazyResourceMap.
2019-12-04 02:01:40 +01:00
2193c81518 Bump version to 1.6.1.dev 2019-12-04 01:45:15 +01:00
5 changed files with 213 additions and 125 deletions

View File

@ -113,6 +113,16 @@ For technical info and documentation about resource files and resources, see the
Changelog Changelog
--------- ---------
Version 1.7
^^^^^^^^^^^
* Added a ``raw-decompress`` subcommand to decompress compressed resource data stored in a standalone file rather than as a resource.
* Optimized lazy loading of ``Resource`` objects. Previously, resource data would be read from disk whenever a ``Resource`` object was looked up, even if the data itself is never used. Now the resource data is only loaded once the ``data`` (or ``data_raw``) attribute is accessed.
* The same optimization applies to the ``name`` attribute, although this is unlikely to make a difference in practice.
* As a result, it is no longer possible to construct ``Resource`` objects without a resource file. This was previously possible, but had no practical use.
* Fixed a small error in the ``'dcmp' (0)`` decompression implementation.
Version 1.6.0 Version 1.6.0
^^^^^^^^^^^^^ ^^^^^^^^^^^^^

View File

@ -20,7 +20,7 @@
# * Add a new empty section for the next version to the README.rst changelog. # * Add a new empty section for the next version to the README.rst changelog.
# * Commit and push the changes to master. # * Commit and push the changes to master.
__version__ = "1.6.0" __version__ = "1.7.0"
__all__ = [ __all__ = [
"Resource", "Resource",

View File

@ -483,7 +483,7 @@ def make_argument_parser(*, description: str, **kwargs: typing.Any) -> argparse.
ap = argparse.ArgumentParser( ap = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent(description), description=description,
allow_abbrev=False, allow_abbrev=False,
add_help=False, add_help=False,
**kwargs, **kwargs,
@ -624,9 +624,6 @@ def do_list(prog: str, args: typing.List[str]) -> typing.NoReturn:
are displayed. For compressed resources, the compressed and decompressed data are displayed. For compressed resources, the compressed and decompressed data
length are displayed, as well as the ID of the 'dcmp' resource used to length are displayed, as well as the ID of the 'dcmp' resource used to
decompress the resource data. decompress the resource data.
If the resource file has any global (resource map) attributes or non-zero
header data, they are displayed before the list of resources.
""", """,
) )
@ -668,7 +665,7 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources: human-readable info with hex dump (dump), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez). Default: %(default)s") ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources: human-readable info with hex dump (dump), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez). Default: %(default)s")
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.") ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.")
add_resource_file_args(ap) add_resource_file_args(ap)
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters ae specified, all resources are read.") ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters are specified, all resources are read.")
ns = ap.parse_args(args) ns = ap.parse_args(args)
@ -685,12 +682,73 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress) show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
def do_raw_decompress(prog: str, args: typing.List[str]) -> typing.NoReturn:
"""Decompress raw compressed resource data."""
ap = make_argument_parser(
prog=prog,
description="""
Decompress raw compressed resource data that is stored in a standalone file
and not as a resource in a resource file.
This subcommand can be used in a shell pipeline by passing - as the input and
output file name, i. e. "%(prog)s - -".
Note: All other rsrcfork subcommands natively support compressed resources and
will automatically decompress them as needed. This subcommand is only needed
to decompress resource data that has been read from a resource file in
compressed form (e. g. using --no-decompress or another tool that does not
handle resource compression).
""",
)
ap.add_argument("--debug", action="store_true", help="Display debugging output from the decompressor on stdout. Cannot be used if the output file is - (stdout).")
ap.add_argument("input_file", help="The file from which to read the compressed resource data, or - for stdin.")
ap.add_argument("output_file", help="The file to which to write the decompressed resource data, or - for stdout.")
ns = ap.parse_args(args)
if ns.input_file == "-":
in_stream = sys.stdin.buffer
close_in_stream = False
else:
in_stream = open(ns.input_file, "rb")
close_in_stream = True
try:
header_info = compress.CompressedHeaderInfo.parse_stream(in_stream)
# Open the output file only after parsing the header, so that the file is only created (or its existing contents deleted) if the input file is valid.
if ns.output_file == "-":
if ns.debug:
print("Cannot use --debug if the decompression output file is - (stdout).", file=sys.stderr)
print("The debug output goes to stdout and would conflict with the decompressed data.", file=sys.stderr)
sys.exit(2)
out_stream = sys.stdout.buffer
close_out_stream = False
else:
out_stream = open(ns.output_file, "wb")
close_out_stream = True
try:
for chunk in compress.decompress_stream_parsed(header_info, in_stream, debug=ns.debug):
out_stream.write(chunk)
finally:
if close_out_stream:
out_stream.close()
finally:
if close_in_stream:
in_stream.close()
SUBCOMMANDS = { SUBCOMMANDS = {
"read-header": do_read_header, "read-header": do_read_header,
"info": do_info, "info": do_info,
"list": do_list, "list": do_list,
"read": do_read, "read": do_read,
"raw-decompress": do_raw_decompress,
} }

View File

@ -97,24 +97,31 @@ class ResourceAttrs(enum.Flag):
class Resource(object): class Resource(object):
"""A single resource from a resource file.""" """A single resource from a resource file."""
_resfile: "ResourceFile"
type: bytes type: bytes
id: int id: int
name: typing.Optional[bytes] name_offset: int
_name: typing.Optional[bytes]
attributes: ResourceAttrs attributes: ResourceAttrs
data_raw: bytes data_raw_offset: int
_data_raw: bytes
_compressed_info: compress.common.CompressedHeaderInfo _compressed_info: compress.common.CompressedHeaderInfo
_data_decompressed: bytes _data_decompressed: bytes
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes) -> None: def __init__(self, resfile: "ResourceFile", resource_type: bytes, resource_id: int, name_offset: int, attributes: ResourceAttrs, data_raw_offset: int) -> None:
"""Create a new resource with the given type code, ID, name, attributes, and data.""" """Create a resource object representing a resource stored in a resource file.
External code should not call this constructor manually. Resources should be looked up through a ResourceFile object instead.
"""
super().__init__() super().__init__()
self._resfile = resfile
self.type = resource_type self.type = resource_type
self.id = resource_id self.id = resource_id
self.name = name self.name_offset = name_offset
self.attributes = attributes self.attributes = attributes
self.data_raw = data_raw self.data_raw_offset = data_raw_offset
def __repr__(self) -> str: def __repr__(self) -> str:
try: try:
@ -133,7 +140,7 @@ class Resource(object):
if not decompress_ok: if not decompress_ok:
data_repr = f"<decompression failed - compressed data: {data_repr}>" data_repr = f"<decompression failed - compressed data: {data_repr}>"
return f"{type(self).__module__}.{type(self).__qualname__}(type={self.type}, id={self.id}, name={self.name}, attributes={self.attributes}, data={data_repr})" return f"<{type(self).__qualname__} type {self.type}, id {self.id}, name {self.name}, attributes {self.attributes}, data {data_repr}>"
@property @property
def resource_type(self) -> bytes: def resource_type(self) -> bytes:
@ -145,6 +152,30 @@ class Resource(object):
warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead.")) warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
return self.id return self.id
@property
def name(self) -> typing.Optional[bytes]:
try:
return self._name
except AttributeError:
if self.name_offset == 0xffff:
self._name = None
else:
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + self.name_offset)
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
self._name = self._resfile._read_exact(name_length)
return self._name
@property
def data_raw(self) -> bytes:
try:
return self._data_raw
except AttributeError:
self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset)
(data_raw_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
self._data_raw = self._resfile._read_exact(data_raw_length)
return self._data_raw
@property @property
def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]: def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
"""The compressed resource header information, or None if this resource is not compressed. """The compressed resource header information, or None if this resource is not compressed.
@ -198,25 +229,22 @@ class Resource(object):
else: else:
return self.data_raw return self.data_raw
class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.ContextManager["ResourceFile"]):
"""A resource file reader operating on a byte stream."""
# noinspection PyProtectedMember
class _LazyResourceMap(typing.Mapping[int, Resource]): class _LazyResourceMap(typing.Mapping[int, Resource]):
"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile.""" """Internal class: Read-only wrapper for a mapping of resource IDs to resource objects.
_resfile: "ResourceFile" This class behaves like a normal read-only mapping. The main difference to a plain dict (or similar mapping) is that this mapping has a specialized repr to avoid excessive output when working in the REPL.
_restype: bytes """
_submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]]
def __init__(self, resfile: "ResourceFile", restype: bytes) -> None: type: bytes
"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype.""" _submap: typing.Mapping[int, Resource]
def __init__(self, resource_type: bytes, submap: typing.Mapping[int, Resource]) -> None:
"""Create a new _LazyResourceMap that wraps the given mapping."""
super().__init__() super().__init__()
self._resfile = resfile self.type = resource_type
self._restype = restype self._submap = submap
self._submap = self._resfile._references[self._restype]
def __len__(self) -> int: def __len__(self) -> int:
"""Get the number of resources with this type code.""" """Get the number of resources with this type code."""
@ -236,26 +264,18 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
def __getitem__(self, key: int) -> Resource: def __getitem__(self, key: int) -> Resource:
"""Get a resource with the given ID for this type code.""" """Get a resource with the given ID for this type code."""
name_offset, attributes, data_offset = self._submap[key] return self._submap[key]
if name_offset == 0xffff:
name = None
else:
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
name = self._resfile._read_exact(name_length)
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
data = self._resfile._read_exact(data_length)
return Resource(self._restype, key, name, attributes, data)
def __repr__(self) -> str: def __repr__(self) -> str:
if len(self) == 1: if len(self) == 1:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>" contents = f"one resource: {next(iter(self.values()))}"
else: else:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>" contents = f"{len(self)} resources with IDs {list(self)}"
return f"<Resource map for type {self.type}, containing {contents}>"
class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.ContextManager["ResourceFile"]):
"""A resource file reader operating on a byte stream."""
_close_stream: bool _close_stream: bool
_stream: typing.BinaryIO _stream: typing.BinaryIO
@ -272,7 +292,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
file_attributes: ResourceFileAttrs file_attributes: ResourceFileAttrs
_reference_counts: typing.MutableMapping[bytes, int] _reference_counts: typing.MutableMapping[bytes, int]
_references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]] _references: typing.MutableMapping[bytes, typing.MutableMapping[int, Resource]]
@classmethod @classmethod
def open(cls, filename: typing.Union[str, os.PathLike], *, fork: str="auto", **kwargs: typing.Any) -> "ResourceFile": def open(cls, filename: typing.Union[str, os.PathLike], *, fork: str="auto", **kwargs: typing.Any) -> "ResourceFile":
@ -433,7 +453,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
self._references = collections.OrderedDict() self._references = collections.OrderedDict()
for resource_type, count in self._reference_counts.items(): for resource_type, count in self._reference_counts.items():
resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict() resmap: typing.MutableMapping[int, Resource] = collections.OrderedDict()
self._references[resource_type] = resmap self._references[resource_type] = resmap
for _ in range(count): for _ in range(count):
( (
@ -445,7 +465,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
attributes = attributes_and_data_offset >> 24 attributes = attributes_and_data_offset >> 24
data_offset = attributes_and_data_offset & ((1 << 24) - 1) data_offset = attributes_and_data_offset & ((1 << 24) - 1)
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset) resmap[resource_id] = Resource(self, resource_type, resource_id, name_offset, ResourceAttrs(attributes), data_offset)
def close(self) -> None: def close(self) -> None:
"""Close this ResourceFile. """Close this ResourceFile.
@ -483,10 +503,10 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
return key in self._references return key in self._references
def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap": def __getitem__(self, key: bytes) -> "_LazyResourceMap":
"""Get a lazy mapping of all resources with the given type in this ResourceFile.""" """Get a lazy mapping of all resources with the given type in this ResourceFile."""
return ResourceFile._LazyResourceMap(self, key) return _LazyResourceMap(key, self._references[key])
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>" return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"

View File

@ -119,7 +119,7 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
print(f"\t-> segment number: {segment_number_int:#x}") print(f"\t-> segment number: {segment_number_int:#x}")
# The tail part of all jump table entries (i. e. everything except for the address). # The tail part of all jump table entries (i. e. everything except for the address).
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=True) + b"\xa9\xf0" entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=False) + b"\xa9\xf0"
# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code. # The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
yield entry_tail yield entry_tail