mirror of
https://github.com/dgelessus/python-rsrcfork.git
synced 2025-07-04 07:23:54 +00:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
c650ec1fb8 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,6 +6,3 @@ __pycache__/
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
|
80
README.rst
80
README.rst
@ -56,7 +56,7 @@ Simple example
|
|||||||
>>> rf
|
>>> rf
|
||||||
<rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
|
<rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
|
||||||
>>> rf[b"TEXT"]
|
>>> rf[b"TEXT"]
|
||||||
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(type=b'TEXT', id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
|
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(resource_type=b'TEXT', resource_id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
|
||||||
|
|
||||||
Automatic selection of data/resource fork
|
Automatic selection of data/resource fork
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
@ -108,78 +108,30 @@ Writing resource data is not supported at all.
|
|||||||
Further info on resource files
|
Further info on resource files
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
For technical info and documentation about resource files and resources, see the `"resource forks" section of the mac_file_format_docs repo <https://github.com/dgelessus/mac_file_format_docs/blob/master/README.md#resource-forks>`_.
|
Sources of information about the resource fork data format, and the structure of common resource types:
|
||||||
|
|
||||||
|
* Inside Macintosh, Volume I, Chapter 5 "The Resource Manager". This book can probably be obtained in physical form somewhere, but the relevant chapter/book is also available in a few places online:
|
||||||
|
|
||||||
|
* `Apple's legacy documentation <https://developer.apple.com/legacy/library/documentation/mac/pdf/MoreMacintoshToolbox.pdf>`_
|
||||||
|
* pagetable.com, a site that happened to have a copy of the book: `info blog post <http://www.pagetable.com/?p=50>`_, `direct download <http://www.weihenstephan.org/~michaste/pagetable/mac/Inside_Macintosh.pdf>`_
|
||||||
|
|
||||||
|
* `Wikipedia <https://en.wikipedia.org/wiki/Resource_fork>`_, of course
|
||||||
|
* The `Resource Fork <http://fileformats.archiveteam.org/wiki/Resource_Fork>`_ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait)
|
||||||
|
* The `KSFL <https://github.com/kreativekorp/ksfl>`_ library (and `its wiki <https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format>`_), written in Java, which supports reading and writing resource files
|
||||||
|
* Alysis Software Corporation's article on resource compression (found on `the company's website <http://www.alysis.us/arctechnology.htm>`_ and in `MacTech Magazine's online archive <http://preserve.mactech.com/articles/mactech/Vol.09/09.01/ResCompression/index.html>`_) has some information on the structure of certain kinds of compressed resources.
|
||||||
|
* Apple's macOS SDK, which is distributed with Xcode. The latest version of Xcode is available for free from the Mac App Store. Current and previous versions can be downloaded from `the Apple Developer download page <https://developer.apple.com/download/more/>`_. Accessing these downloads requires an Apple ID with (at least) a free developer program membership.
|
||||||
|
* Apple's MPW (Macintosh Programmer's Workshop) and related developer tools. These were previously available from Apple's FTP server at ftp://ftp.apple.com/, which is no longer functional. Because of this, these downloads are only available on mirror sites, such as http://staticky.com/mirrors/ftp.apple.com/.
|
||||||
|
|
||||||
|
If these links are no longer functional, some are archived in the `Internet Archive Wayback Machine <https://archive.org/web/>`_ or `archive.is <http://archive.is/>`_ aka `archive.fo <https://archive.fo/>`_.
|
||||||
|
|
||||||
Changelog
|
Changelog
|
||||||
---------
|
---------
|
||||||
|
|
||||||
Version 1.7
|
|
||||||
^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Added a ``raw-decompress`` subcommand to decompress compressed resource data stored in a standalone file rather than as a resource.
|
|
||||||
* Optimized lazy loading of ``Resource`` objects. Previously, resource data would be read from disk whenever a ``Resource`` object was looked up, even if the data itself is never used. Now the resource data is only loaded once the ``data`` (or ``data_raw``) attribute is accessed.
|
|
||||||
|
|
||||||
* The same optimization applies to the ``name`` attribute, although this is unlikely to make a difference in practice.
|
|
||||||
* As a result, it is no longer possible to construct ``Resource`` objects without a resource file. This was previously possible, but had no practical use.
|
|
||||||
* Fixed a small error in the ``'dcmp' (0)`` decompression implementation.
|
|
||||||
|
|
||||||
Version 1.6.0
|
|
||||||
^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Added a new subcommand-based command-line syntax to the ``rsrcfork`` tool, similar to other CLI tools such as ``git`` or ``diskutil``.
|
|
||||||
|
|
||||||
* This subcommand-based syntax is meant to replace the old CLI options, as the subcommand structure is easier to understand and more extensible in the future.
|
|
||||||
* Currently there are three subcommands: ``list`` to list resources in a file, ``read`` to read/display resource data, and ``read-header`` to read a resource file's header data. These subcommands can be used to perform all operations that were also available with the old CLI syntax.
|
|
||||||
* The old CLI syntax is still supported for now, but it will be removed soon.
|
|
||||||
* The new syntax no longer supports reading CLI arguments from a file (using ``@args_file.txt``), abbreviating long options (e. g. ``--no-d`` instead of ``--no-decompress``), or the short option ``-f`` instead of ``--fork``. If you have a need for any of these features, please open an issue.
|
|
||||||
|
|
||||||
Version 1.5.0
|
|
||||||
^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Added stream-based decompression methods to the ``rsrcfork.compress`` module.
|
|
||||||
|
|
||||||
* The internal decompressor implementations have been refactored to use streams.
|
|
||||||
* This allows for incremental decompression of compressed resource data. In practice this has no noticeable effect yet, because the main ``rsrcfork`` API doesn't support incremental reading of resource data.
|
|
||||||
|
|
||||||
* Fixed the command line tool always displaying an incorrect error "Cannot specify an explicit fork when reading from stdin" when using ``-`` (stdin) as the input file.
|
|
||||||
|
|
||||||
Version 1.4.0
|
|
||||||
^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Added ``length`` and ``length_raw`` attributes to ``Resource``. These attributes are equivalent to the ``len`` of ``data`` and ``data_raw`` respectively, but may be faster to access.
|
|
||||||
|
|
||||||
* Currently, the only optimized case is ``length`` for compressed resources, but more optimizations may be added in the future.
|
|
||||||
|
|
||||||
* Added a ``compressed_info`` attribute to ``Resource`` that provides access to the header information of compressed resources.
|
|
||||||
* Improved handling of compressed resources when listing resource files with the command line tool.
|
|
||||||
|
|
||||||
* Metadata of compressed resources is now displayed even if no decompressor implementation is available (as long as the compressed data header can be parsed).
|
|
||||||
* Performance has been improved - the data no longer needs to be fully decompressed to get its length, this information is now read from the header.
|
|
||||||
* The ``'dcmp'`` ID used to decompress each resource is displayed.
|
|
||||||
|
|
||||||
* Fixed an incorrect ``options.packages`` in ``setup.cfg``, which made the library unusable except when installing from source using ``--editable``.
|
|
||||||
* Fixed ``ResourceFile.__enter__`` returning ``None``, which made it impossible to use ``ResourceFile`` properly in a ``with`` statement.
|
|
||||||
* Fixed various minor errors reported by type checking with ``mypy``.
|
|
||||||
|
|
||||||
Version 1.3.0.post1
|
|
||||||
^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Fixed an incorrect ``options.packages`` in ``setup.cfg``, which made the library unusable except when installing from source using ``--editable``.
|
|
||||||
|
|
||||||
Version 1.2.0.post1
|
Version 1.2.0.post1
|
||||||
^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
* Fixed an incorrect ``options.packages`` in ``setup.cfg``, which made the library unusable except when installing from source using ``--editable``.
|
* Fixed an incorrect ``options.packages`` in ``setup.cfg``, which made the library unusable except when installing from source using ``--editable``.
|
||||||
|
|
||||||
Version 1.3.0
|
|
||||||
^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
* Added a ``--group`` command line option to group resources in list format by type (the default), ID, or with no grouping.
|
|
||||||
* Added a ``dump-text`` output format to the command line tool. This format is identical to ``dump``, but instead of a hex dump, it outputs the resource data as text. The data is decoded as MacRoman and classic Mac newlines (``\r``) are translated. This is useful for examining resources that contain mostly plain text.
|
|
||||||
* Changed the command line tool to sort resources by type and ID, and added a ``--no-sort`` option to disable sorting and output resources in file order (which was the previous behavior).
|
|
||||||
* Renamed the ``rsrcfork.Resource`` attributes ``resource_type`` and ``resource_id`` to ``type`` and ``id``, respectively. The old names have been deprecated and will be removed in the future, but are still supported for now.
|
|
||||||
* Changed ``--format=dump`` output to match ``hexdump -C``'s format - spacing has been adjusted, and multiple subsequent identical lines are collapsed into a single ``*``.
|
|
||||||
|
|
||||||
Version 1.2.0
|
Version 1.2.0
|
||||||
^^^^^^^^^^^^^
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -1,26 +1,6 @@
|
|||||||
"""A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files."""
|
"""A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files."""
|
||||||
|
|
||||||
# To release a new version:
|
__version__ = "1.2.0.post1"
|
||||||
# * Remove the .dev suffix from the version number in this file.
|
|
||||||
# * Update the changelog in the README.rst (rename the "next version" section to the correct version number).
|
|
||||||
# * Remove the ``dist`` directory (if it exists) to clean up any old release files.
|
|
||||||
# * Run ``python3 setup.py sdist bdist_wheel`` to build the release files.
|
|
||||||
# * Run ``python3 -m twine check dist/*`` to check the release files.
|
|
||||||
# * Fix any errors reported by the build and/or check steps.
|
|
||||||
# * Commit the changes to master.
|
|
||||||
# * Tag the release commit with the version number, prefixed with a "v" (e. g. version 1.2.3 is tagged as v1.2.3).
|
|
||||||
# * Fast-forward the release branch to the new release commit.
|
|
||||||
# * Push the master and release branches.
|
|
||||||
# * Upload the release files to PyPI using ``python3 -m twine upload dist/*``.
|
|
||||||
# * On the GitHub repo's Releases page, edit the new release tag and add the relevant changelog section from the README.rst. (Note: The README is in reStructuredText format, but GitHub's release notes use Markdown, so it may be necessary to adjust the markup syntax.)
|
|
||||||
|
|
||||||
# After releasing:
|
|
||||||
# * (optional) Remove the build and dist directories from the previous release as they are no longer needed.
|
|
||||||
# * Bump the version number in this file to the next version and add a .dev suffix.
|
|
||||||
# * Add a new empty section for the next version to the README.rst changelog.
|
|
||||||
# * Commit and push the changes to master.
|
|
||||||
|
|
||||||
__version__ = "1.7.0"
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Resource",
|
"Resource",
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
import enum
|
import enum
|
||||||
import itertools
|
|
||||||
import pathlib
|
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
import typing
|
import typing
|
||||||
@ -26,13 +24,13 @@ _REZ_ATTR_NAMES = {
|
|||||||
api.ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
|
api.ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
|
||||||
}
|
}
|
||||||
|
|
||||||
F = typing.TypeVar("F", bound=enum.Flag)
|
F = typing.TypeVar("F", bound=enum.Flag, covariant=True)
|
||||||
def decompose_flags(value: F) -> typing.Sequence[F]:
|
def _decompose_flags(value: F) -> typing.Sequence[F]:
|
||||||
"""Decompose an enum.Flags instance into separate enum constants."""
|
"""Decompose an enum.Flags instance into separate enum constants."""
|
||||||
|
|
||||||
return [bit for bit in type(value) if bit in value]
|
return [bit for bit in type(value) if bit in value]
|
||||||
|
|
||||||
def is_printable(char: str) -> bool:
|
def _is_printable(char: str) -> bool:
|
||||||
"""Determine whether a character is printable for our purposes.
|
"""Determine whether a character is printable for our purposes.
|
||||||
|
|
||||||
We mainly use Python's definition of printable (i. e. everything that Unicode does not consider a separator or "other" character). However, we also treat U+F8FF as printable, which is the private use codepoint used for the Apple logo character.
|
We mainly use Python's definition of printable (i. e. everything that Unicode does not consider a separator or "other" character). However, we also treat U+F8FF as printable, which is the private use codepoint used for the Apple logo character.
|
||||||
@ -40,20 +38,20 @@ def is_printable(char: str) -> bool:
|
|||||||
|
|
||||||
return char.isprintable() or char == "\uf8ff"
|
return char.isprintable() or char == "\uf8ff"
|
||||||
|
|
||||||
def bytes_unescape(string: str) -> bytes:
|
def _bytes_unescape(string: str) -> bytes:
|
||||||
"""Convert a string containing text (in _TEXT_ENCODING) and hex escapes to a bytestring.
|
"""Convert a string containing text (in _TEXT_ENCODING) and hex escapes to a bytestring.
|
||||||
|
|
||||||
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
|
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
out: typing.List[int] = []
|
out = []
|
||||||
it = iter(string)
|
it = iter(string)
|
||||||
for char in it:
|
for char in it:
|
||||||
if char == "\\":
|
if char == "\\":
|
||||||
try:
|
try:
|
||||||
esc = next(it)
|
esc = next(it)
|
||||||
if esc in "\\\'\"":
|
if esc in "\\\'\"":
|
||||||
out.extend(esc.encode(_TEXT_ENCODING))
|
out.append(esc)
|
||||||
elif esc == "x":
|
elif esc == "x":
|
||||||
x1, x2 = next(it), next(it)
|
x1, x2 = next(it), next(it)
|
||||||
out.append(int(x1+x2, 16))
|
out.append(int(x1+x2, 16))
|
||||||
@ -66,7 +64,7 @@ def bytes_unescape(string: str) -> bytes:
|
|||||||
|
|
||||||
return bytes(out)
|
return bytes(out)
|
||||||
|
|
||||||
def bytes_escape(bs: bytes, *, quote: typing.Optional[str]=None) -> str:
|
def _bytes_escape(bs: bytes, *, quote: str=None) -> str:
|
||||||
"""Convert a bytestring to a string (using _TEXT_ENCODING), with non-printable characters hex-escaped.
|
"""Convert a bytestring to a string (using _TEXT_ENCODING), with non-printable characters hex-escaped.
|
||||||
|
|
||||||
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
|
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
|
||||||
@ -76,15 +74,15 @@ def bytes_escape(bs: bytes, *, quote: typing.Optional[str]=None) -> str:
|
|||||||
for byte, char in zip(bs, bs.decode(_TEXT_ENCODING)):
|
for byte, char in zip(bs, bs.decode(_TEXT_ENCODING)):
|
||||||
if char in {quote, "\\"}:
|
if char in {quote, "\\"}:
|
||||||
out.append(f"\\{char}")
|
out.append(f"\\{char}")
|
||||||
elif is_printable(char):
|
elif _is_printable(char):
|
||||||
out.append(char)
|
out.append(char)
|
||||||
else:
|
else:
|
||||||
out.append(f"\\x{byte:02x}")
|
out.append(f"\\x{byte:02x}")
|
||||||
|
|
||||||
return "".join(out)
|
return "".join(out)
|
||||||
|
|
||||||
def filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typing.List[api.Resource]:
|
def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[api.Resource]:
|
||||||
matching: typing.MutableMapping[typing.Tuple[bytes, int], api.Resource] = collections.OrderedDict()
|
matching = collections.OrderedDict()
|
||||||
|
|
||||||
for filter in filters:
|
for filter in filters:
|
||||||
if len(filter) == 4:
|
if len(filter) == 4:
|
||||||
@ -94,15 +92,15 @@ def filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typ
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for res in resources.values():
|
for res in resources.values():
|
||||||
matching[res.type, res.id] = res
|
matching[res.resource_type, res.resource_id] = res
|
||||||
elif filter[0] == filter[-1] == "'":
|
elif filter[0] == filter[-1] == "'":
|
||||||
try:
|
try:
|
||||||
resources = rf[bytes_unescape(filter[1:-1])]
|
resources = rf[_bytes_unescape(filter[1:-1])]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for res in resources.values():
|
for res in resources.values():
|
||||||
matching[res.type, res.id] = res
|
matching[res.resource_type, res.resource_id] = res
|
||||||
else:
|
else:
|
||||||
pos = filter.find("'", 1)
|
pos = filter.find("'", 1)
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
@ -110,86 +108,71 @@ def filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typ
|
|||||||
elif filter[pos + 1] != " ":
|
elif filter[pos + 1] != " ":
|
||||||
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
|
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
|
||||||
|
|
||||||
restype_str, resid_str = filter[:pos + 1], filter[pos + 2:]
|
restype, resid = filter[:pos + 1], filter[pos + 2:]
|
||||||
|
|
||||||
if not restype_str[0] == restype_str[-1] == "'":
|
if not restype[0] == restype[-1] == "'":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype_str!r}")
|
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}")
|
||||||
restype = bytes_unescape(restype_str[1:-1])
|
restype = _bytes_unescape(restype[1:-1])
|
||||||
|
|
||||||
if len(restype) != 4:
|
if len(restype) != 4:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
|
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
|
||||||
|
|
||||||
if resid_str[0] != "(" or resid_str[-1] != ")":
|
if resid[0] != "(" or resid[-1] != ")":
|
||||||
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
|
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
|
||||||
resid_str = resid_str[1:-1]
|
resid = resid[1:-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resources = rf[restype]
|
resources = rf[restype]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if resid_str[0] == resid_str[-1] == '"':
|
if resid[0] == resid[-1] == '"':
|
||||||
name = bytes_unescape(resid_str[1:-1])
|
name = _bytes_unescape(resid[1:-1])
|
||||||
|
|
||||||
for res in resources.values():
|
for res in resources.values():
|
||||||
if res.name == name:
|
if res.name == name:
|
||||||
matching[res.type, res.id] = res
|
matching[res.resource_type, res.resource_id] = res
|
||||||
break
|
break
|
||||||
elif ":" in resid_str:
|
elif ":" in resid:
|
||||||
if resid_str.count(":") > 1:
|
if resid.count(":") > 1:
|
||||||
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid_str!r}")
|
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}")
|
||||||
start_str, end_str = resid_str.split(":")
|
start, end = resid.split(":")
|
||||||
start, end = int(start_str), int(end_str)
|
start, end = int(start), int(end)
|
||||||
|
|
||||||
for res in resources.values():
|
for res in resources.values():
|
||||||
if start <= res.id <= end:
|
if start <= res.resource_id <= end:
|
||||||
matching[res.type, res.id] = res
|
matching[res.resource_type, res.resource_id] = res
|
||||||
else:
|
else:
|
||||||
resid = int(resid_str)
|
resid = int(resid)
|
||||||
try:
|
try:
|
||||||
res = resources[resid]
|
res = resources[resid]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
matching[res.type, res.id] = res
|
matching[res.resource_type, res.resource_id] = res
|
||||||
|
|
||||||
return list(matching.values())
|
return list(matching.values())
|
||||||
|
|
||||||
def hexdump(data: bytes) -> None:
|
def _hexdump(data: bytes):
|
||||||
last_line = None
|
|
||||||
asterisk_shown = False
|
|
||||||
for i in range(0, len(data), 16):
|
for i in range(0, len(data), 16):
|
||||||
line = data[i:i + 16]
|
line = data[i:i + 16]
|
||||||
# If the same 16-byte lines appear multiple times, print only the first one, and replace all further lines with a single line with an asterisk.
|
line_hex = " ".join(f"{byte:02x}" for byte in line)
|
||||||
# This is unambiguous - to find out how many lines were collapsed this way, the user can compare the addresses of the lines before and after the asterisk.
|
|
||||||
if line == last_line:
|
|
||||||
if not asterisk_shown:
|
|
||||||
print("*")
|
|
||||||
asterisk_shown = True
|
|
||||||
else:
|
|
||||||
line_hex_left = " ".join(f"{byte:02x}" for byte in line[:8])
|
|
||||||
line_hex_right = " ".join(f"{byte:02x}" for byte in line[8:])
|
|
||||||
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
|
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
|
||||||
print(f"{i:08x} {line_hex_left:<{8*2+7}} {line_hex_right:<{8*2+7}} |{line_char}|")
|
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
|
||||||
asterisk_shown = False
|
|
||||||
last_line = line
|
|
||||||
|
|
||||||
if data:
|
if data:
|
||||||
print(f"{len(data):08x}")
|
print(f"{len(data):08x}")
|
||||||
|
|
||||||
def raw_hexdump(data: bytes) -> None:
|
def _raw_hexdump(data: bytes):
|
||||||
for i in range(0, len(data), 16):
|
for i in range(0, len(data), 16):
|
||||||
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
|
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
|
||||||
|
|
||||||
def translate_text(data: bytes) -> str:
|
def _describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
|
||||||
return data.decode(_TEXT_ENCODING).replace("\r", "\n")
|
id_desc_parts = [f"{res.resource_id}"]
|
||||||
|
|
||||||
def describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
|
|
||||||
id_desc_parts = [f"{res.id}"]
|
|
||||||
|
|
||||||
if res.name is not None:
|
if res.name is not None:
|
||||||
name = bytes_escape(res.name, quote='"')
|
name = _bytes_escape(res.name, quote='"')
|
||||||
id_desc_parts.append(f'"{name}"')
|
id_desc_parts.append(f'"{name}"')
|
||||||
|
|
||||||
id_desc = ", ".join(id_desc_parts)
|
id_desc = ", ".join(id_desc_parts)
|
||||||
@ -198,18 +181,16 @@ def describe_resource(res: api.Resource, *, include_type: bool, decompress: bool
|
|||||||
|
|
||||||
if decompress and api.ResourceAttrs.resCompressed in res.attributes:
|
if decompress and api.ResourceAttrs.resCompressed in res.attributes:
|
||||||
try:
|
try:
|
||||||
res.compressed_info
|
res.data
|
||||||
except compress.DecompressError:
|
except compress.DecompressError:
|
||||||
length_desc = f"unparseable compressed data header ({res.length_raw} bytes compressed)"
|
length_desc = f"decompression failed ({len(res.data_raw)} bytes compressed)"
|
||||||
else:
|
else:
|
||||||
assert res.compressed_info is not None
|
length_desc = f"{len(res.data)} bytes ({len(res.data_raw)} bytes compressed)"
|
||||||
length_desc = f"{res.length} bytes ({res.length_raw} bytes compressed, 'dcmp' ({res.compressed_info.dcmp_id}) format)"
|
|
||||||
else:
|
else:
|
||||||
assert res.compressed_info is None
|
length_desc = f"{len(res.data_raw)} bytes"
|
||||||
length_desc = f"{res.length_raw} bytes"
|
|
||||||
content_desc_parts.append(length_desc)
|
content_desc_parts.append(length_desc)
|
||||||
|
|
||||||
attrs = decompose_flags(res.attributes)
|
attrs = _decompose_flags(res.attributes)
|
||||||
if attrs:
|
if attrs:
|
||||||
content_desc_parts.append(" | ".join(attr.name for attr in attrs))
|
content_desc_parts.append(" | ".join(attr.name for attr in attrs))
|
||||||
|
|
||||||
@ -217,11 +198,11 @@ def describe_resource(res: api.Resource, *, include_type: bool, decompress: bool
|
|||||||
|
|
||||||
desc = f"({id_desc}): {content_desc}"
|
desc = f"({id_desc}): {content_desc}"
|
||||||
if include_type:
|
if include_type:
|
||||||
restype = bytes_escape(res.type, quote="'")
|
restype = _bytes_escape(res.resource_type, quote="'")
|
||||||
desc = f"'{restype}' {desc}"
|
desc = f"'{restype}' {desc}"
|
||||||
return desc
|
return desc
|
||||||
|
|
||||||
def parse_args_old(args: typing.List[str]) -> argparse.Namespace:
|
def main():
|
||||||
ap = argparse.ArgumentParser(
|
ap = argparse.ArgumentParser(
|
||||||
add_help=False,
|
add_help=False,
|
||||||
fromfile_prefix_chars="@",
|
fromfile_prefix_chars="@",
|
||||||
@ -248,99 +229,108 @@ def parse_args_old(args: typing.List[str]) -> argparse.Namespace:
|
|||||||
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
|
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
|
||||||
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
|
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
|
||||||
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
|
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
|
||||||
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
||||||
ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources in list view by type or ID, or disable grouping (default: type)")
|
|
||||||
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID")
|
|
||||||
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
||||||
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
||||||
|
|
||||||
ap.add_argument("file", help="The file to read, or - for stdin")
|
ap.add_argument("file", help="The file to read, or - for stdin")
|
||||||
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
ns = ap.parse_args()
|
||||||
return ns
|
|
||||||
|
|
||||||
def show_header_data(data: bytes, *, format: str) -> None:
|
if ns.file == "-":
|
||||||
if format == "dump":
|
if ns.fork is not None:
|
||||||
hexdump(data)
|
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
||||||
elif format == "dump-text":
|
sys.exit(1)
|
||||||
print(translate_text(data))
|
|
||||||
elif format == "hex":
|
rf = api.ResourceFile(sys.stdin.buffer)
|
||||||
raw_hexdump(data)
|
else:
|
||||||
elif format == "raw":
|
rf = api.ResourceFile.open(ns.file, fork=ns.fork)
|
||||||
|
|
||||||
|
with rf:
|
||||||
|
if ns.header_system or ns.header_application:
|
||||||
|
if ns.header_system:
|
||||||
|
data = rf.header_system_data
|
||||||
|
else:
|
||||||
|
data = rf.header_application_data
|
||||||
|
|
||||||
|
if ns.format == "dump":
|
||||||
|
_hexdump(data)
|
||||||
|
elif ns.format == "hex":
|
||||||
|
_raw_hexdump(data)
|
||||||
|
elif ns.format == "raw":
|
||||||
sys.stdout.buffer.write(data)
|
sys.stdout.buffer.write(data)
|
||||||
elif format == "derez":
|
elif ns.format == "derez":
|
||||||
print("Cannot output file header data in derez format", file=sys.stderr)
|
print("Cannot output file header data in derez format", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unhandled output format: {format}")
|
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||||
|
elif ns.filter or ns.all:
|
||||||
|
if ns.filter:
|
||||||
|
resources = _filter_resources(rf, ns.filter)
|
||||||
|
else:
|
||||||
|
resources = []
|
||||||
|
for reses in rf.values():
|
||||||
|
resources.extend(reses.values())
|
||||||
|
|
||||||
def show_filtered_resources(resources: typing.Sequence[api.Resource], format: str, decompress: bool) -> None:
|
|
||||||
if not resources:
|
if not resources:
|
||||||
if format in ("dump", "dump-text"):
|
if ns.format == "dump":
|
||||||
print("No resources matched the filter")
|
print("No resources matched the filter")
|
||||||
elif format in ("hex", "raw"):
|
elif ns.format in ("hex", "raw"):
|
||||||
print("No resources matched the filter", file=sys.stderr)
|
print("No resources matched the filter", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif format == "derez":
|
elif ns.format == "derez":
|
||||||
print("/* No resources matched the filter */")
|
print("/* No resources matched the filter */")
|
||||||
else:
|
else:
|
||||||
raise AssertionError(f"Unhandled output format: {format}")
|
raise AssertionError(f"Unhandled output format: {ns.format}")
|
||||||
elif format in ("hex", "raw") and len(resources) != 1:
|
elif ns.format in ("hex", "raw") and len(resources) != 1:
|
||||||
print(f"Format {format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
|
print(f"Format {ns.format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
for res in resources:
|
for res in resources:
|
||||||
if decompress:
|
if ns.decompress:
|
||||||
data = res.data
|
data = res.data
|
||||||
else:
|
else:
|
||||||
data = res.data_raw
|
data = res.data_raw
|
||||||
|
|
||||||
if format in ("dump", "dump-text"):
|
if ns.format == "dump":
|
||||||
# Human-readable info and hex or text dump
|
# Human-readable info and hex dump
|
||||||
desc = describe_resource(res, include_type=True, decompress=decompress)
|
desc = _describe_resource(res, include_type=True, decompress=ns.decompress)
|
||||||
print(f"Resource {desc}:")
|
print(f"Resource {desc}:")
|
||||||
if format == "dump":
|
_hexdump(data)
|
||||||
hexdump(data)
|
|
||||||
elif format == "dump-text":
|
|
||||||
print(translate_text(data))
|
|
||||||
else:
|
|
||||||
raise AssertionError(f"Unhandled format: {format!r}")
|
|
||||||
print()
|
print()
|
||||||
elif format == "hex":
|
elif ns.format == "hex":
|
||||||
# Data only as hex
|
# Data only as hex
|
||||||
|
|
||||||
raw_hexdump(data)
|
_raw_hexdump(data)
|
||||||
elif format == "raw":
|
elif ns.format == "raw":
|
||||||
# Data only as raw bytes
|
# Data only as raw bytes
|
||||||
|
|
||||||
sys.stdout.buffer.write(data)
|
sys.stdout.buffer.write(data)
|
||||||
elif format == "derez":
|
elif ns.format == "derez":
|
||||||
# Like DeRez with no resource definitions
|
# Like DeRez with no resource definitions
|
||||||
|
|
||||||
attrs = list(decompose_flags(res.attributes))
|
attrs = list(_decompose_flags(res.attributes))
|
||||||
|
|
||||||
if decompress and api.ResourceAttrs.resCompressed in attrs:
|
if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
|
||||||
attrs.remove(api.ResourceAttrs.resCompressed)
|
attrs.remove(api.ResourceAttrs.resCompressed)
|
||||||
attrs_comment = " /* was compressed */"
|
attrs_comment = " /* was compressed */"
|
||||||
else:
|
else:
|
||||||
attrs_comment = ""
|
attrs_comment = ""
|
||||||
|
|
||||||
attr_descs_with_none = [_REZ_ATTR_NAMES[attr] for attr in attrs]
|
attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
|
||||||
if None in attr_descs_with_none:
|
if None in attr_descs:
|
||||||
attr_descs = [f"${res.attributes.value:02X}"]
|
attr_descs[:] = [f"${res.attributes.value:02X}"]
|
||||||
else:
|
|
||||||
attr_descs = typing.cast(typing.List[str], attr_descs_with_none)
|
|
||||||
|
|
||||||
parts = [str(res.id)]
|
parts = [str(res.resource_id)]
|
||||||
|
|
||||||
if res.name is not None:
|
if res.name is not None:
|
||||||
name = bytes_escape(res.name, quote='"')
|
name = _bytes_escape(res.name, quote='"')
|
||||||
parts.append(f'"{name}"')
|
parts.append(f'"{name}"')
|
||||||
|
|
||||||
parts += attr_descs
|
parts += attr_descs
|
||||||
|
|
||||||
restype = bytes_escape(res.type, quote="'")
|
restype = _bytes_escape(res.resource_type, quote="'")
|
||||||
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
|
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
|
||||||
|
|
||||||
for i in range(0, len(data), 16):
|
for i in range(0, len(data), 16):
|
||||||
@ -361,494 +351,32 @@ def show_filtered_resources(resources: typing.Sequence[api.Resource], format: st
|
|||||||
print("};")
|
print("};")
|
||||||
print()
|
print()
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unhandled output format: {format}")
|
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||||
|
|
||||||
def list_resource_file(rf: api.ResourceFile, *, sort: bool, group: str, decompress: bool) -> None:
|
|
||||||
if len(rf) == 0:
|
|
||||||
print("No resources (empty resource file)")
|
|
||||||
return
|
|
||||||
|
|
||||||
if group == "none":
|
|
||||||
all_resources: typing.List[api.Resource] = []
|
|
||||||
for reses in rf.values():
|
|
||||||
all_resources.extend(reses.values())
|
|
||||||
if sort:
|
|
||||||
all_resources.sort(key=lambda res: (res.type, res.id))
|
|
||||||
print(f"{len(all_resources)} resources:")
|
|
||||||
for res in all_resources:
|
|
||||||
print(describe_resource(res, include_type=True, decompress=decompress))
|
|
||||||
elif group == "type":
|
|
||||||
print(f"{len(rf)} resource types:")
|
|
||||||
restype_items: typing.Collection[typing.Tuple[bytes, typing.Mapping[int, api.Resource]]] = rf.items()
|
|
||||||
if sort:
|
|
||||||
restype_items = sorted(restype_items, key=lambda item: item[0])
|
|
||||||
for typecode, resources_map in restype_items:
|
|
||||||
restype = bytes_escape(typecode, quote="'")
|
|
||||||
print(f"'{restype}': {len(resources_map)} resources:")
|
|
||||||
resources_items: typing.Collection[typing.Tuple[int, api.Resource]] = resources_map.items()
|
|
||||||
if sort:
|
|
||||||
resources_items = sorted(resources_items, key=lambda item: item[0])
|
|
||||||
for resid, res in resources_items:
|
|
||||||
print(describe_resource(res, include_type=False, decompress=decompress))
|
|
||||||
print()
|
|
||||||
elif group == "id":
|
|
||||||
all_resources = []
|
|
||||||
for reses in rf.values():
|
|
||||||
all_resources.extend(reses.values())
|
|
||||||
all_resources.sort(key=lambda res: res.id)
|
|
||||||
resources_by_id = {resid: list(reses) for resid, reses in itertools.groupby(all_resources, key=lambda res: res.id)}
|
|
||||||
print(f"{len(resources_by_id)} resource IDs:")
|
|
||||||
for resid, resources in resources_by_id.items():
|
|
||||||
print(f"({resid}): {len(resources)} resources:")
|
|
||||||
if sort:
|
|
||||||
resources.sort(key=lambda res: res.type)
|
|
||||||
for res in resources:
|
|
||||||
print(describe_resource(res, include_type=True, decompress=decompress))
|
|
||||||
print()
|
|
||||||
else:
|
else:
|
||||||
raise AssertionError(f"Unhandled group mode: {group!r}")
|
|
||||||
|
|
||||||
def main_old(args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
ns = parse_args_old(args)
|
|
||||||
|
|
||||||
if ns.file == "-":
|
|
||||||
if ns.fork != "auto":
|
|
||||||
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
rf = api.ResourceFile(sys.stdin.buffer)
|
|
||||||
else:
|
|
||||||
rf = api.ResourceFile.open(ns.file, fork=ns.fork)
|
|
||||||
|
|
||||||
with rf:
|
|
||||||
print("Warning: The syntax of the rsrcfork command has changed.", file=sys.stderr)
|
|
||||||
|
|
||||||
if ns.header_system or ns.header_application:
|
|
||||||
if ns.header_system:
|
|
||||||
print('Please use "rsrcfork read-header --part=system <file>" instead of "rsrcfork --header-system <file>".', file=sys.stderr)
|
|
||||||
print(file=sys.stderr)
|
|
||||||
|
|
||||||
data = rf.header_system_data
|
|
||||||
else:
|
|
||||||
print('Please use "rsrcfork read-header --part=application <file>" instead of "rsrcfork --header-application <file>".', file=sys.stderr)
|
|
||||||
print(file=sys.stderr)
|
|
||||||
|
|
||||||
data = rf.header_application_data
|
|
||||||
|
|
||||||
show_header_data(data, format=ns.format)
|
|
||||||
elif ns.filter or ns.all:
|
|
||||||
if ns.filter:
|
|
||||||
print('Please use "rsrcfork read <file> <filters...>" instead of "rsrcfork <file> <filters...>".', file=sys.stderr)
|
|
||||||
print(file=sys.stderr)
|
|
||||||
|
|
||||||
resources = filter_resources(rf, ns.filter)
|
|
||||||
else:
|
|
||||||
print('Please use "rsrcfork read <file>" instead of "rsrcfork <file> --all".', file=sys.stderr)
|
|
||||||
print(file=sys.stderr)
|
|
||||||
|
|
||||||
resources = []
|
|
||||||
for reses in rf.values():
|
|
||||||
resources.extend(reses.values())
|
|
||||||
|
|
||||||
if ns.sort:
|
|
||||||
resources.sort(key=lambda res: (res.type, res.id))
|
|
||||||
|
|
||||||
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
|
|
||||||
else:
|
|
||||||
print('Please use "rsrcfork list <file>" instead of "rsrcfork <file>".', file=sys.stderr)
|
|
||||||
print(file=sys.stderr)
|
|
||||||
|
|
||||||
if rf.header_system_data != bytes(len(rf.header_system_data)):
|
if rf.header_system_data != bytes(len(rf.header_system_data)):
|
||||||
print("Header system data:")
|
print("Header system data:")
|
||||||
hexdump(rf.header_system_data)
|
_hexdump(rf.header_system_data)
|
||||||
|
|
||||||
if rf.header_application_data != bytes(len(rf.header_application_data)):
|
if rf.header_application_data != bytes(len(rf.header_application_data)):
|
||||||
print("Header application data:")
|
print("Header application data:")
|
||||||
hexdump(rf.header_application_data)
|
_hexdump(rf.header_application_data)
|
||||||
|
|
||||||
attrs = decompose_flags(rf.file_attributes)
|
attrs = _decompose_flags(rf.file_attributes)
|
||||||
if attrs:
|
if attrs:
|
||||||
print("File attributes: " + " | ".join(attr.name for attr in attrs))
|
print("File attributes: " + " | ".join(attr.name for attr in attrs))
|
||||||
|
|
||||||
list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
|
if len(rf) > 0:
|
||||||
|
print(f"{len(rf)} resource types:")
|
||||||
|
for typecode, resources in rf.items():
|
||||||
|
restype = _bytes_escape(typecode, quote="'")
|
||||||
|
print(f"'{restype}': {len(resources)} resources:")
|
||||||
|
for resid, res in rf[typecode].items():
|
||||||
|
print(_describe_resource(res, include_type=False, decompress=ns.decompress))
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
print("No resource types (empty resource file)")
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
def make_argument_parser(*, description: str, **kwargs: typing.Any) -> argparse.ArgumentParser:
|
|
||||||
"""Create an argparse.ArgumentParser with some slightly modified defaults.
|
|
||||||
|
|
||||||
This function is used to ensure that all subcommands use the same base configuration for their ArgumentParser.
|
|
||||||
"""
|
|
||||||
|
|
||||||
ap = argparse.ArgumentParser(
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
description=description,
|
|
||||||
allow_abbrev=False,
|
|
||||||
add_help=False,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--help", action="help", help="Display this help message and exit")
|
|
||||||
|
|
||||||
return ap
|
|
||||||
|
|
||||||
def add_resource_file_args(ap: argparse.ArgumentParser) -> None:
|
|
||||||
"""Define common options/arguments for specifying an input resource file.
|
|
||||||
|
|
||||||
This includes a positional argument for the resource file's path, and the ``--fork`` option to select which fork of the file to use.
|
|
||||||
"""
|
|
||||||
|
|
||||||
ap.add_argument("--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource file data, or auto to guess. Default: %(default)s")
|
|
||||||
ap.add_argument("file", help="The file from which to read resources, or - for stdin.")
|
|
||||||
|
|
||||||
def open_resource_file(file: str, *, fork: str = None) -> api.ResourceFile:
|
|
||||||
"""Open a resource file at the given path, using the specified fork."""
|
|
||||||
|
|
||||||
if file == "-":
|
|
||||||
if fork != "auto":
|
|
||||||
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
return api.ResourceFile(sys.stdin.buffer)
|
|
||||||
else:
|
|
||||||
return api.ResourceFile.open(file, fork=fork)
|
|
||||||
|
|
||||||
|
|
||||||
def do_read_header(prog: str, args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
"""Read the header data from a resource file."""
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
description="""
|
|
||||||
Read and output a resource file's header data.
|
|
||||||
|
|
||||||
The header data consists of two parts:
|
|
||||||
|
|
||||||
The system-reserved data is 112 bytes long and used by the Classic Mac OS
|
|
||||||
Finder as temporary storage space. It usually contains parts of the
|
|
||||||
file metadata (name, type/creator code, etc.).
|
|
||||||
|
|
||||||
The application-specific data is 128 bytes long and is available for use by
|
|
||||||
applications. In practice it usually contains junk data that happened to be in
|
|
||||||
memory when the resource file was written.
|
|
||||||
|
|
||||||
Mac OS X does not use the header data fields anymore. Resource files written
|
|
||||||
on Mac OS X normally have both parts of the header data set to all zero bytes.
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw"], default="dump", help="How to output the header data: human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), or data only as raw bytes (raw). Default: %(default)s")
|
|
||||||
ap.add_argument("--part", choices=["system", "application", "all"], default="all", help="Which part of the header to read. Default: %(default)s")
|
|
||||||
add_resource_file_args(ap)
|
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
|
|
||||||
with open_resource_file(ns.file, fork=ns.fork) as rf:
|
|
||||||
if ns.format in {"dump", "dump-text"}:
|
|
||||||
if ns.format == "dump":
|
|
||||||
dump_func = hexdump
|
|
||||||
elif ns.format == "dump-text":
|
|
||||||
def dump_func(d):
|
|
||||||
print(translate_text(d))
|
|
||||||
else:
|
|
||||||
raise AssertionError(f"Unhandled --format: {ns.format!r}")
|
|
||||||
|
|
||||||
if ns.part in {"system", "all"}:
|
|
||||||
print("System-reserved header data:")
|
|
||||||
dump_func(rf.header_system_data)
|
|
||||||
|
|
||||||
if ns.part in {"application", "all"}:
|
|
||||||
print("Application-specific header data:")
|
|
||||||
dump_func(rf.header_application_data)
|
|
||||||
elif ns.format in {"hex", "raw"}:
|
|
||||||
if ns.part == "system":
|
|
||||||
data = rf.header_system_data
|
|
||||||
elif ns.part == "application":
|
|
||||||
data = rf.header_application_data
|
|
||||||
elif ns.part == "all":
|
|
||||||
data = rf.header_system_data + rf.header_application_data
|
|
||||||
else:
|
|
||||||
raise AssertionError(f"Unhandled --part: {ns.part!r}")
|
|
||||||
|
|
||||||
if ns.format == "hex":
|
|
||||||
raw_hexdump(data)
|
|
||||||
elif ns.format == "raw":
|
|
||||||
sys.stdout.buffer.write(data)
|
|
||||||
else:
|
|
||||||
raise AssertionError(f"Unhandled --format: {ns.format!r}")
|
|
||||||
else:
|
|
||||||
raise AssertionError(f"Unhandled --format: {ns.format!r}")
|
|
||||||
|
|
||||||
def do_info(prog: str, args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
"""Display technical information about the resource file."""
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
description="""
|
|
||||||
Display technical information and stats about the resource file.
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
add_resource_file_args(ap)
|
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
|
|
||||||
with open_resource_file(ns.file, fork=ns.fork) as rf:
|
|
||||||
print("System-reserved header data:")
|
|
||||||
hexdump(rf.header_system_data)
|
|
||||||
print()
|
|
||||||
print("Application-specific header data:")
|
|
||||||
hexdump(rf.header_application_data)
|
|
||||||
print()
|
|
||||||
|
|
||||||
print(f"Resource data starts at {rf.data_offset:#x} and is {rf.data_length:#x} bytes long")
|
|
||||||
print(f"Resource map starts at {rf.map_offset:#x} and is {rf.map_length:#x} bytes long")
|
|
||||||
attrs = decompose_flags(rf.file_attributes)
|
|
||||||
if attrs:
|
|
||||||
attrs_desc = " | ".join(attr.name for attr in attrs)
|
|
||||||
else:
|
|
||||||
attrs_desc = "(none)"
|
|
||||||
print(f"Resource map attributes: {attrs_desc}")
|
|
||||||
print(f"Resource map type list starts at {rf.map_type_list_offset:#x} (relative to map start) and contains {len(rf)} types")
|
|
||||||
print(f"Resource map name list starts at {rf.map_name_list_offset:#x} (relative to map start)")
|
|
||||||
|
|
||||||
def do_list(prog: str, args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
"""List the resources in a file."""
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
description="""
|
|
||||||
List the resources stored in a resource file.
|
|
||||||
|
|
||||||
Each resource's type, ID, name (if any), attributes (if any), and data length
|
|
||||||
are displayed. For compressed resources, the compressed and decompressed data
|
|
||||||
length are displayed, as well as the ID of the 'dcmp' resource used to
|
|
||||||
decompress the resource data.
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not parse the data header of compressed resources and only output their compressed length.")
|
|
||||||
ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources by type or ID, or disable grouping. Default: %(default)s")
|
|
||||||
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.")
|
|
||||||
add_resource_file_args(ap)
|
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
|
|
||||||
with open_resource_file(ns.file, fork=ns.fork) as rf:
|
|
||||||
list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
|
|
||||||
|
|
||||||
def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
"""Read data from resources."""
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
description="""
|
|
||||||
Read the data of one or more resources.
|
|
||||||
|
|
||||||
The resource filters use syntax similar to Rez (resource definition) files.
|
|
||||||
Each filter can have one of the following forms:
|
|
||||||
|
|
||||||
An unquoted type name (without escapes): TYPE
|
|
||||||
A quoted type name: 'TYPE'
|
|
||||||
A quoted type name and an ID: 'TYPE' (42)
|
|
||||||
A quoted type name and an ID range: 'TYPE' (24:42)
|
|
||||||
A quoted type name and a resource name: 'TYPE' ("foobar")
|
|
||||||
|
|
||||||
Note that the resource filter syntax uses quotes, parentheses and spaces,
|
|
||||||
which have special meanings in most shells. It is recommended to quote each
|
|
||||||
resource filter (using double quotes) to ensure that it is not interpreted
|
|
||||||
or rewritten by the shell.
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output the raw compressed resource data.")
|
|
||||||
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources: human-readable info with hex dump (dump), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez). Default: %(default)s")
|
|
||||||
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.")
|
|
||||||
add_resource_file_args(ap)
|
|
||||||
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters are specified, all resources are read.")
|
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
|
|
||||||
with open_resource_file(ns.file, fork=ns.fork) as rf:
|
|
||||||
if ns.filter:
|
|
||||||
resources = filter_resources(rf, ns.filter)
|
|
||||||
else:
|
|
||||||
resources = []
|
|
||||||
for reses in rf.values():
|
|
||||||
resources.extend(reses.values())
|
|
||||||
|
|
||||||
if ns.sort:
|
|
||||||
resources.sort(key=lambda res: (res.type, res.id))
|
|
||||||
|
|
||||||
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
|
|
||||||
|
|
||||||
def do_raw_decompress(prog: str, args: typing.List[str]) -> typing.NoReturn:
|
|
||||||
"""Decompress raw compressed resource data."""
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
description="""
|
|
||||||
Decompress raw compressed resource data that is stored in a standalone file
|
|
||||||
and not as a resource in a resource file.
|
|
||||||
|
|
||||||
This subcommand can be used in a shell pipeline by passing - as the input and
|
|
||||||
output file name, i. e. "%(prog)s - -".
|
|
||||||
|
|
||||||
Note: All other rsrcfork subcommands natively support compressed resources and
|
|
||||||
will automatically decompress them as needed. This subcommand is only needed
|
|
||||||
to decompress resource data that has been read from a resource file in
|
|
||||||
compressed form (e. g. using --no-decompress or another tool that does not
|
|
||||||
handle resource compression).
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--debug", action="store_true", help="Display debugging output from the decompressor on stdout. Cannot be used if the output file is - (stdout).")
|
|
||||||
|
|
||||||
ap.add_argument("input_file", help="The file from which to read the compressed resource data, or - for stdin.")
|
|
||||||
ap.add_argument("output_file", help="The file to which to write the decompressed resource data, or - for stdout.")
|
|
||||||
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
|
|
||||||
if ns.input_file == "-":
|
|
||||||
in_stream = sys.stdin.buffer
|
|
||||||
close_in_stream = False
|
|
||||||
else:
|
|
||||||
in_stream = open(ns.input_file, "rb")
|
|
||||||
close_in_stream = True
|
|
||||||
|
|
||||||
try:
|
|
||||||
header_info = compress.CompressedHeaderInfo.parse_stream(in_stream)
|
|
||||||
|
|
||||||
# Open the output file only after parsing the header, so that the file is only created (or its existing contents deleted) if the input file is valid.
|
|
||||||
if ns.output_file == "-":
|
|
||||||
if ns.debug:
|
|
||||||
print("Cannot use --debug if the decompression output file is - (stdout).", file=sys.stderr)
|
|
||||||
print("The debug output goes to stdout and would conflict with the decompressed data.", file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
out_stream = sys.stdout.buffer
|
|
||||||
close_out_stream = False
|
|
||||||
else:
|
|
||||||
out_stream = open(ns.output_file, "wb")
|
|
||||||
close_out_stream = True
|
|
||||||
|
|
||||||
try:
|
|
||||||
for chunk in compress.decompress_stream_parsed(header_info, in_stream, debug=ns.debug):
|
|
||||||
out_stream.write(chunk)
|
|
||||||
finally:
|
|
||||||
if close_out_stream:
|
|
||||||
out_stream.close()
|
|
||||||
finally:
|
|
||||||
if close_in_stream:
|
|
||||||
in_stream.close()
|
|
||||||
|
|
||||||
|
|
||||||
SUBCOMMANDS = {
|
|
||||||
"read-header": do_read_header,
|
|
||||||
"info": do_info,
|
|
||||||
"list": do_list,
|
|
||||||
"read": do_read,
|
|
||||||
"raw-decompress": do_raw_decompress,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def format_subcommands_help() -> str:
|
|
||||||
"""Return a formatted help text describing the availble subcommands.
|
|
||||||
|
|
||||||
Because we do not use argparse's native support for subcommands (see comments in main function), the main ArgumentParser's help does not include any information about the subcommands by default, so we have to format and add it ourselves.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# The list of subcommands is formatted using a "fake" ArgumentParser, which is never actually used to parse any arguments.
|
|
||||||
# The options are chosen so that the help text will only include the subcommands list and epilog, but no usage or any other arguments.
|
|
||||||
fake_ap = argparse.ArgumentParser(
|
|
||||||
usage=argparse.SUPPRESS,
|
|
||||||
epilog=textwrap.dedent("""
|
|
||||||
Most of the above subcommands take additional arguments. Run a subcommand with
|
|
||||||
the option --help for help about the options understood by that subcommand.
|
|
||||||
"""),
|
|
||||||
add_help=False,
|
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
||||||
)
|
|
||||||
|
|
||||||
# The subcommands are added as positional arguments to a custom group with the title "subcommands".
|
|
||||||
# Semantically this makes no sense, but it looks right in the formatted help text:
|
|
||||||
# the result is a section "subcommands" with an aligned list of command names and short descriptions.
|
|
||||||
fake_group = fake_ap.add_argument_group(title="subcommands")
|
|
||||||
|
|
||||||
for name, func in SUBCOMMANDS.items():
|
|
||||||
# Each command's short description is taken from the implementation function's docstring.
|
|
||||||
fake_group.add_argument(name, help=func.__doc__)
|
|
||||||
|
|
||||||
return fake_ap.format_help()
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> typing.NoReturn:
|
|
||||||
"""Main function of the CLI.
|
|
||||||
|
|
||||||
This function is a valid setuptools entry point. Arguments are passed in sys.argv, and every execution path ends with a sys.exit call. (setuptools entry points are also permitted to return an integer, which will be treated as an exit code. We do not use this feature and instead always call sys.exit ourselves.)
|
|
||||||
"""
|
|
||||||
|
|
||||||
prog = pathlib.PurePath(sys.argv[0]).name
|
|
||||||
args = sys.argv[1:]
|
|
||||||
|
|
||||||
# The rsrcfork CLI is structured into subcommands, each implemented in a separate function.
|
|
||||||
# The main function parses the command-line arguments enough to determine which subcommand to call, but leaves parsing of the rest of the arguments to the subcommand itself.
|
|
||||||
# In addition, it detects use of the old, non-subcommand-based CLI syntax, and delegates to the old main function in that case.
|
|
||||||
# This backwards compatibility handling is one of the reasons why we cannot use the subcommand support of argparse or other CLI parsing libraries, so we have to implement most of the subcommand handling ourselves.
|
|
||||||
|
|
||||||
ap = make_argument_parser(
|
|
||||||
prog=prog,
|
|
||||||
# Custom usage string to make "subcommand ..." show up in the usage, but not as "positional arguments" in the main help text.
|
|
||||||
usage=f"{prog} (--help | --version | subcommand ...)",
|
|
||||||
description="""
|
|
||||||
%(prog)s is a tool for working with Classic Mac OS resource files.
|
|
||||||
Currently this tool can only read resource files; modifying/writing resource
|
|
||||||
files is not supported yet.
|
|
||||||
|
|
||||||
Note: This tool is intended for human users. The output format is not
|
|
||||||
machine-readable and may change at any time. The command-line syntax usually
|
|
||||||
does not change much across versions, but this should not be relied on.
|
|
||||||
Automated scripts and programs should use the Python API provided by the
|
|
||||||
rsrcfork library, which this tool is a part of.
|
|
||||||
""",
|
|
||||||
# The list of subcommands is shown in the epilog so that it appears under the list of optional arguments.
|
|
||||||
epilog=format_subcommands_help(),
|
|
||||||
)
|
|
||||||
|
|
||||||
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit.")
|
|
||||||
|
|
||||||
# The help of these arguments is set to argparse.SUPPRESS so that they do not cause a mostly useless "positional arguments" list to appear.
|
|
||||||
# If the old, non-subcommand syntax is used, the subcommand argument can actually be a file name.
|
|
||||||
ap.add_argument("subcommand", help=argparse.SUPPRESS)
|
|
||||||
ap.add_argument("args", nargs=argparse.REMAINDER, help=argparse.SUPPRESS)
|
|
||||||
|
|
||||||
if not args:
|
|
||||||
print(f"{prog}: Missing subcommand.", file=sys.stderr)
|
|
||||||
ap.print_help()
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# First, parse only known arguments from the CLI.
|
|
||||||
# This is so that we can extract the subcommand/file to check if the old CLI syntax was used, without causing CLI syntax errors because of unknown options before the subcommand/file.
|
|
||||||
ns, _ = ap.parse_known_args(args)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Check if the subcommand is valid.
|
|
||||||
subcommand_func = SUBCOMMANDS[ns.subcommand]
|
|
||||||
except KeyError:
|
|
||||||
if ns.subcommand == "-" or pathlib.Path(ns.subcommand).exists():
|
|
||||||
# Subcommand is actually a file path.
|
|
||||||
# Call the old main function with the entire unparsed argument list, so that it can be reparsed and handled like in previous versions.
|
|
||||||
main_old(args)
|
|
||||||
else:
|
|
||||||
# Subcommand is invalid and also not a path to an existing file. Display an error.
|
|
||||||
print(f"{prog}: Unknown subcommand: {ns.subcommand}", file=sys.stderr)
|
|
||||||
print(f"Run {prog} --help for a list of available subcommands.", file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
# Subcommand is valid. Parse the arguments again, this time without allowing unknown arguments before the subcommand.
|
|
||||||
ns = ap.parse_args(args)
|
|
||||||
# Call the looked up subcommand and pass on further arguments.
|
|
||||||
subcommand_func(f"{prog} {ns.subcommand}", ns.args)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
236
rsrcfork/api.py
236
rsrcfork/api.py
@ -4,7 +4,6 @@ import enum
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
import types
|
|
||||||
import typing
|
import typing
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
@ -97,33 +96,20 @@ class ResourceAttrs(enum.Flag):
|
|||||||
class Resource(object):
|
class Resource(object):
|
||||||
"""A single resource from a resource file."""
|
"""A single resource from a resource file."""
|
||||||
|
|
||||||
_resfile: "ResourceFile"
|
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data_raw", "_data_decompressed")
|
||||||
type: bytes
|
|
||||||
id: int
|
|
||||||
name_offset: int
|
|
||||||
_name: typing.Optional[bytes]
|
|
||||||
attributes: ResourceAttrs
|
|
||||||
data_raw_offset: int
|
|
||||||
_data_raw: bytes
|
|
||||||
_compressed_info: compress.common.CompressedHeaderInfo
|
|
||||||
_data_decompressed: bytes
|
|
||||||
|
|
||||||
def __init__(self, resfile: "ResourceFile", resource_type: bytes, resource_id: int, name_offset: int, attributes: ResourceAttrs, data_raw_offset: int) -> None:
|
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes):
|
||||||
"""Create a resource object representing a resource stored in a resource file.
|
"""Create a new resource with the given type code, ID, name, attributes, and data."""
|
||||||
|
|
||||||
External code should not call this constructor manually. Resources should be looked up through a ResourceFile object instead.
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self._resfile = resfile
|
self.resource_type: bytes = resource_type
|
||||||
self.type = resource_type
|
self.resource_id: int = resource_id
|
||||||
self.id = resource_id
|
self.name: typing.Optional[bytes] = name
|
||||||
self.name_offset = name_offset
|
self.attributes: ResourceAttrs = attributes
|
||||||
self.attributes = attributes
|
self.data_raw: bytes = data_raw
|
||||||
self.data_raw_offset = data_raw_offset
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self):
|
||||||
try:
|
try:
|
||||||
data = self.data
|
data = self.data
|
||||||
except compress.DecompressError:
|
except compress.DecompressError:
|
||||||
@ -140,78 +126,7 @@ class Resource(object):
|
|||||||
if not decompress_ok:
|
if not decompress_ok:
|
||||||
data_repr = f"<decompression failed - compressed data: {data_repr}>"
|
data_repr = f"<decompression failed - compressed data: {data_repr}>"
|
||||||
|
|
||||||
return f"<{type(self).__qualname__} type {self.type}, id {self.id}, name {self.name}, attributes {self.attributes}, data {data_repr}>"
|
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
|
||||||
|
|
||||||
@property
|
|
||||||
def resource_type(self) -> bytes:
|
|
||||||
warnings.warn(DeprecationWarning("The resource_type attribute has been deprecated and will be removed in a future version. Please use the type attribute instead."))
|
|
||||||
return self.type
|
|
||||||
|
|
||||||
@property
|
|
||||||
def resource_id(self) -> int:
|
|
||||||
warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
|
|
||||||
return self.id
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self) -> typing.Optional[bytes]:
|
|
||||||
try:
|
|
||||||
return self._name
|
|
||||||
except AttributeError:
|
|
||||||
if self.name_offset == 0xffff:
|
|
||||||
self._name = None
|
|
||||||
else:
|
|
||||||
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + self.name_offset)
|
|
||||||
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
|
||||||
self._name = self._resfile._read_exact(name_length)
|
|
||||||
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def data_raw(self) -> bytes:
|
|
||||||
try:
|
|
||||||
return self._data_raw
|
|
||||||
except AttributeError:
|
|
||||||
self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset)
|
|
||||||
(data_raw_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
|
||||||
self._data_raw = self._resfile._read_exact(data_raw_length)
|
|
||||||
return self._data_raw
|
|
||||||
|
|
||||||
@property
|
|
||||||
def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
|
|
||||||
"""The compressed resource header information, or None if this resource is not compressed.
|
|
||||||
|
|
||||||
Accessing this attribute may raise a DecompressError if the resource data is compressed and the header could not be parsed. To access the unparsed header data, use the data_raw attribute.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if ResourceAttrs.resCompressed in self.attributes:
|
|
||||||
try:
|
|
||||||
return self._compressed_info
|
|
||||||
except AttributeError:
|
|
||||||
self._compressed_info = compress.common.CompressedHeaderInfo.parse(self.data_raw)
|
|
||||||
return self._compressed_info
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def length_raw(self) -> int:
|
|
||||||
"""The length of the raw resource data, which may be compressed.
|
|
||||||
|
|
||||||
Accessing this attribute may be faster than computing len(self.data_raw) manually.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return len(self.data_raw)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def length(self) -> int:
|
|
||||||
"""The length of the resource data. If the resource data is compressed, this is the length of the data after decompression.
|
|
||||||
|
|
||||||
Accessing this attribute may be faster than computing len(self.data) manually.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if self.compressed_info is not None:
|
|
||||||
return self.compressed_info.decompressed_length
|
|
||||||
else:
|
|
||||||
return self.length_raw
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self) -> bytes:
|
def data(self) -> bytes:
|
||||||
@ -220,43 +135,42 @@ class Resource(object):
|
|||||||
Accessing this attribute may raise a DecompressError if the resource data is compressed and could not be decompressed. To access the compressed resource data, use the data_raw attribute.
|
Accessing this attribute may raise a DecompressError if the resource data is compressed and could not be decompressed. To access the compressed resource data, use the data_raw attribute.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.compressed_info is not None:
|
if ResourceAttrs.resCompressed in self.attributes:
|
||||||
try:
|
try:
|
||||||
return self._data_decompressed
|
return self._data_decompressed
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self._data_decompressed = compress.decompress_parsed(self.compressed_info, self.data_raw[self.compressed_info.header_length:])
|
self._data_decompressed = compress.decompress(self.data_raw)
|
||||||
return self._data_decompressed
|
return self._data_decompressed
|
||||||
else:
|
else:
|
||||||
return self.data_raw
|
return self.data_raw
|
||||||
|
|
||||||
class _LazyResourceMap(typing.Mapping[int, Resource]):
|
class ResourceFile(collections.abc.Mapping):
|
||||||
"""Internal class: Read-only wrapper for a mapping of resource IDs to resource objects.
|
"""A resource file reader operating on a byte stream."""
|
||||||
|
|
||||||
This class behaves like a normal read-only mapping. The main difference to a plain dict (or similar mapping) is that this mapping has a specialized repr to avoid excessive output when working in the REPL.
|
# noinspection PyProtectedMember
|
||||||
"""
|
class _LazyResourceMap(collections.abc.Mapping):
|
||||||
|
"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile."""
|
||||||
|
|
||||||
type: bytes
|
def __init__(self, resfile: "ResourceFile", restype: bytes):
|
||||||
_submap: typing.Mapping[int, Resource]
|
"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype."""
|
||||||
|
|
||||||
def __init__(self, resource_type: bytes, submap: typing.Mapping[int, Resource]) -> None:
|
|
||||||
"""Create a new _LazyResourceMap that wraps the given mapping."""
|
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.type = resource_type
|
self._resfile: "ResourceFile" = resfile
|
||||||
self._submap = submap
|
self._restype: bytes = restype
|
||||||
|
self._submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]] = self._resfile._references[self._restype]
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self):
|
||||||
"""Get the number of resources with this type code."""
|
"""Get the number of resources with this type code."""
|
||||||
|
|
||||||
return len(self._submap)
|
return len(self._submap)
|
||||||
|
|
||||||
def __iter__(self) -> typing.Iterator[int]:
|
def __iter__(self):
|
||||||
"""Iterate over the IDs of all resources with this type code."""
|
"""Iterate over the IDs of all resources with this type code."""
|
||||||
|
|
||||||
return iter(self._submap)
|
return iter(self._submap)
|
||||||
|
|
||||||
def __contains__(self, key: object) -> bool:
|
def __contains__(self, key: int):
|
||||||
"""Check if a resource with the given ID exists for this type code."""
|
"""Check if a resource with the given ID exists for this type code."""
|
||||||
|
|
||||||
return key in self._submap
|
return key in self._submap
|
||||||
@ -264,38 +178,29 @@ class _LazyResourceMap(typing.Mapping[int, Resource]):
|
|||||||
def __getitem__(self, key: int) -> Resource:
|
def __getitem__(self, key: int) -> Resource:
|
||||||
"""Get a resource with the given ID for this type code."""
|
"""Get a resource with the given ID for this type code."""
|
||||||
|
|
||||||
return self._submap[key]
|
name_offset, attributes, data_offset = self._submap[key]
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
if name_offset == 0xffff:
|
||||||
if len(self) == 1:
|
name = None
|
||||||
contents = f"one resource: {next(iter(self.values()))}"
|
|
||||||
else:
|
else:
|
||||||
contents = f"{len(self)} resources with IDs {list(self)}"
|
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
|
||||||
|
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||||
|
name = self._resfile._read_exact(name_length)
|
||||||
|
|
||||||
return f"<Resource map for type {self.type}, containing {contents}>"
|
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
|
||||||
|
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||||
|
data = self._resfile._read_exact(data_length)
|
||||||
|
|
||||||
class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.ContextManager["ResourceFile"]):
|
return Resource(self._restype, key, name, attributes, data)
|
||||||
"""A resource file reader operating on a byte stream."""
|
|
||||||
|
|
||||||
_close_stream: bool
|
def __repr__(self):
|
||||||
_stream: typing.BinaryIO
|
if len(self) == 1:
|
||||||
|
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>"
|
||||||
data_offset: int
|
else:
|
||||||
map_offset: int
|
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
|
||||||
data_length: int
|
|
||||||
map_length: int
|
|
||||||
header_system_data: bytes
|
|
||||||
header_application_data: bytes
|
|
||||||
|
|
||||||
map_type_list_offset: int
|
|
||||||
map_name_list_offset: int
|
|
||||||
file_attributes: ResourceFileAttrs
|
|
||||||
|
|
||||||
_reference_counts: typing.MutableMapping[bytes, int]
|
|
||||||
_references: typing.MutableMapping[bytes, typing.MutableMapping[int, Resource]]
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def open(cls, filename: typing.Union[str, os.PathLike], *, fork: str="auto", **kwargs: typing.Any) -> "ResourceFile":
|
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, fork: str="auto", **kwargs) -> "ResourceFile":
|
||||||
"""Open the file at the given path as a ResourceFile.
|
"""Open the file at the given path as a ResourceFile.
|
||||||
|
|
||||||
The fork parameter controls which fork of the file the resource data will be read from. It accepts the following values:
|
The fork parameter controls which fork of the file the resource data will be read from. It accepts the following values:
|
||||||
@ -354,7 +259,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported value for the fork parameter: {fork!r}")
|
raise ValueError(f"Unsupported value for the fork parameter: {fork!r}")
|
||||||
|
|
||||||
def __init__(self, stream: typing.BinaryIO, *, close: bool=False) -> None:
|
def __init__(self, stream: typing.io.BinaryIO, *, close: bool=False):
|
||||||
"""Create a ResourceFile wrapping the given byte stream.
|
"""Create a ResourceFile wrapping the given byte stream.
|
||||||
|
|
||||||
To read resource file data from a bytes object, wrap it in an io.BytesIO.
|
To read resource file data from a bytes object, wrap it in an io.BytesIO.
|
||||||
@ -368,7 +273,8 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self._close_stream = close
|
self._close_stream: bool = close
|
||||||
|
self._stream: typing.io.BinaryIO
|
||||||
if stream.seekable():
|
if stream.seekable():
|
||||||
self._stream = stream
|
self._stream = stream
|
||||||
else:
|
else:
|
||||||
@ -392,7 +298,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
raise InvalidResourceFileError(f"Attempted to read {byte_count} bytes of data, but only got {len(data)} bytes")
|
raise InvalidResourceFileError(f"Attempted to read {byte_count} bytes of data, but only got {len(data)} bytes")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _stream_unpack(self, st: struct.Struct) -> tuple:
|
def _stream_unpack(self, st: struct.Struct) -> typing.Tuple:
|
||||||
"""Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method."""
|
"""Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -400,11 +306,17 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
except struct.error as e:
|
except struct.error as e:
|
||||||
raise InvalidResourceFileError(str(e))
|
raise InvalidResourceFileError(str(e))
|
||||||
|
|
||||||
def _read_header(self) -> None:
|
def _read_header(self):
|
||||||
"""Read the resource file header, starting at the current stream position."""
|
"""Read the resource file header, starting at the current stream position."""
|
||||||
|
|
||||||
assert self._stream.tell() == 0
|
assert self._stream.tell() == 0
|
||||||
|
|
||||||
|
self.data_offset: int
|
||||||
|
self.map_offset: int
|
||||||
|
self.data_length: int
|
||||||
|
self.map_length: int
|
||||||
|
self.header_system_data: bytes
|
||||||
|
self.header_application_data: bytes
|
||||||
(
|
(
|
||||||
self.data_offset,
|
self.data_offset,
|
||||||
self.map_offset,
|
self.map_offset,
|
||||||
@ -417,23 +329,25 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
if self._stream.tell() != self.data_offset:
|
if self._stream.tell() != self.data_offset:
|
||||||
raise InvalidResourceFileError(f"The data offset ({self.data_offset}) should point exactly to the end of the file header ({self._stream.tell()})")
|
raise InvalidResourceFileError(f"The data offset ({self.data_offset}) should point exactly to the end of the file header ({self._stream.tell()})")
|
||||||
|
|
||||||
def _read_map_header(self) -> None:
|
def _read_map_header(self):
|
||||||
"""Read the map header, starting at the current stream position."""
|
"""Read the map header, starting at the current stream position."""
|
||||||
|
|
||||||
assert self._stream.tell() == self.map_offset
|
assert self._stream.tell() == self.map_offset
|
||||||
|
|
||||||
|
self.map_type_list_offset: int
|
||||||
|
self.map_name_list_offset: int
|
||||||
(
|
(
|
||||||
_file_attributes,
|
_file_attributes,
|
||||||
self.map_type_list_offset,
|
self.map_type_list_offset,
|
||||||
self.map_name_list_offset,
|
self.map_name_list_offset,
|
||||||
) = self._stream_unpack(STRUCT_RESOURCE_MAP_HEADER)
|
) = self._stream_unpack(STRUCT_RESOURCE_MAP_HEADER)
|
||||||
|
|
||||||
self.file_attributes = ResourceFileAttrs(_file_attributes)
|
self.file_attributes: ResourceFileAttrs = ResourceFileAttrs(_file_attributes)
|
||||||
|
|
||||||
def _read_all_resource_types(self) -> None:
|
def _read_all_resource_types(self):
|
||||||
"""Read all resource types, starting at the current stream position."""
|
"""Read all resource types, starting at the current stream position."""
|
||||||
|
|
||||||
self._reference_counts = collections.OrderedDict()
|
self._reference_counts: typing.MutableMapping[bytes, int] = collections.OrderedDict()
|
||||||
|
|
||||||
(type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER)
|
(type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER)
|
||||||
type_list_length = (type_list_length_m1 + 1) % 0x10000
|
type_list_length = (type_list_length_m1 + 1) % 0x10000
|
||||||
@ -447,13 +361,13 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
count = (count_m1 + 1) % 0x10000
|
count = (count_m1 + 1) % 0x10000
|
||||||
self._reference_counts[resource_type] = count
|
self._reference_counts[resource_type] = count
|
||||||
|
|
||||||
def _read_all_references(self) -> None:
|
def _read_all_references(self):
|
||||||
"""Read all resource references, starting at the current stream position."""
|
"""Read all resource references, starting at the current stream position."""
|
||||||
|
|
||||||
self._references = collections.OrderedDict()
|
self._references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]] = collections.OrderedDict()
|
||||||
|
|
||||||
for resource_type, count in self._reference_counts.items():
|
for resource_type, count in self._reference_counts.items():
|
||||||
resmap: typing.MutableMapping[int, Resource] = collections.OrderedDict()
|
resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict()
|
||||||
self._references[resource_type] = resmap
|
self._references[resource_type] = resmap
|
||||||
for _ in range(count):
|
for _ in range(count):
|
||||||
(
|
(
|
||||||
@ -465,9 +379,9 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
attributes = attributes_and_data_offset >> 24
|
attributes = attributes_and_data_offset >> 24
|
||||||
data_offset = attributes_and_data_offset & ((1 << 24) - 1)
|
data_offset = attributes_and_data_offset & ((1 << 24) - 1)
|
||||||
|
|
||||||
resmap[resource_id] = Resource(self, resource_type, resource_id, name_offset, ResourceAttrs(attributes), data_offset)
|
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self):
|
||||||
"""Close this ResourceFile.
|
"""Close this ResourceFile.
|
||||||
|
|
||||||
If close=True was passed when this ResourceFile was created, the underlying stream's close method is called as well.
|
If close=True was passed when this ResourceFile was created, the underlying stream's close method is called as well.
|
||||||
@ -476,37 +390,31 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
|
|||||||
if self._close_stream:
|
if self._close_stream:
|
||||||
self._stream.close()
|
self._stream.close()
|
||||||
|
|
||||||
def __enter__(self) -> "ResourceFile":
|
def __enter__(self):
|
||||||
return self
|
pass
|
||||||
|
|
||||||
def __exit__(
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
self,
|
|
||||||
exc_type: typing.Optional[typing.Type[BaseException]],
|
|
||||||
exc_val: typing.Optional[BaseException],
|
|
||||||
exc_tb: typing.Optional[types.TracebackType]
|
|
||||||
) -> typing.Optional[bool]:
|
|
||||||
self.close()
|
self.close()
|
||||||
return None
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self):
|
||||||
"""Get the number of resource types in this ResourceFile."""
|
"""Get the number of resource types in this ResourceFile."""
|
||||||
|
|
||||||
return len(self._references)
|
return len(self._references)
|
||||||
|
|
||||||
def __iter__(self) -> typing.Iterator[bytes]:
|
def __iter__(self):
|
||||||
"""Iterate over all resource types in this ResourceFile."""
|
"""Iterate over all resource types in this ResourceFile."""
|
||||||
|
|
||||||
return iter(self._references)
|
return iter(self._references)
|
||||||
|
|
||||||
def __contains__(self, key: object) -> bool:
|
def __contains__(self, key: bytes):
|
||||||
"""Check whether this ResourceFile contains any resources of the given type."""
|
"""Check whether this ResourceFile contains any resources of the given type."""
|
||||||
|
|
||||||
return key in self._references
|
return key in self._references
|
||||||
|
|
||||||
def __getitem__(self, key: bytes) -> "_LazyResourceMap":
|
def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap":
|
||||||
"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
|
"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
|
||||||
|
|
||||||
return _LazyResourceMap(key, self._references[key])
|
return ResourceFile._LazyResourceMap(self, key)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self):
|
||||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
|
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
|
||||||
|
@ -1,63 +1,97 @@
|
|||||||
import io
|
import struct
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import dcmp0
|
from . import dcmp0
|
||||||
from . import dcmp1
|
from . import dcmp1
|
||||||
from . import dcmp2
|
from . import dcmp2
|
||||||
|
|
||||||
from .common import DecompressError, CompressedHeaderInfo
|
from .common import DecompressError
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"CompressedHeaderInfo",
|
|
||||||
"DecompressError",
|
"DecompressError",
|
||||||
"decompress",
|
"decompress",
|
||||||
"decompress_parsed",
|
|
||||||
"decompress_stream",
|
|
||||||
"decompress_stream_parsed",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# The signature of all compressed resource data, 0xa89f6572 in hex, or "®üer" in MacRoman.
|
||||||
|
COMPRESSED_SIGNATURE = b"\xa8\x9fer"
|
||||||
|
# The compression type commonly used for application resources.
|
||||||
|
COMPRESSED_TYPE_APPLICATION = 0x0801
|
||||||
|
# The compression type commonly used for System file resources.
|
||||||
|
COMPRESSED_TYPE_SYSTEM = 0x0901
|
||||||
|
|
||||||
# Maps 'dcmp' IDs to their corresponding Python implementations.
|
# Common header for compressed resources of all types.
|
||||||
# Each decompressor has the signature (header_info: CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes].
|
# 4 bytes: Signature (see above).
|
||||||
DECOMPRESSORS = {
|
# 2 bytes: Length of the complete header (this common part and the type-specific part that follows it). (This meaning is just a guess - the field's value is always 0x0012, so there's no way to know for certain what it means.)
|
||||||
0: dcmp0.decompress_stream,
|
# 2 bytes: Compression type. Known so far: 0x0901 is used in the System file's resources. 0x0801 is used in other files' resources.
|
||||||
1: dcmp1.decompress_stream,
|
# 4 bytes: Length of the data after decompression.
|
||||||
2: dcmp2.decompress_stream,
|
STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHI")
|
||||||
}
|
|
||||||
|
# Header continuation part for an "application" compressed resource.
|
||||||
|
# 1 byte: "Working buffer fractional size" - the ratio of the compressed data size to the uncompressed data size, times 256.
|
||||||
|
# 1 byte: "Expansion buffer size" - the maximum number of bytes that the data might grow during decompression.
|
||||||
|
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 0 is supported.
|
||||||
|
# 2 bytes: Reserved (always zero).
|
||||||
|
STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH")
|
||||||
|
|
||||||
|
# Header continuation part for a "system" compressed resource.
|
||||||
|
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported.
|
||||||
|
# 4 bytes: Decompressor-specific parameters.
|
||||||
|
STRUCT_COMPRESSED_SYSTEM_HEADER = struct.Struct(">h4s")
|
||||||
|
|
||||||
|
|
||||||
def decompress_stream_parsed(header_info: CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
def _decompress_application(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||||
"""Decompress compressed resource data from a stream, whose header has already been read and parsed into a CompressedHeaderInfo object."""
|
working_buffer_fractional_size, expansion_buffer_size, dcmp_id, reserved = STRUCT_COMPRESSED_APPLICATION_HEADER.unpack_from(data)
|
||||||
|
|
||||||
try:
|
|
||||||
decompress_func = DECOMPRESSORS[header_info.dcmp_id]
|
|
||||||
except KeyError:
|
|
||||||
raise DecompressError(f"Unsupported 'dcmp' ID: {header_info.dcmp_id}")
|
|
||||||
|
|
||||||
decompressed_length = 0
|
|
||||||
for chunk in decompress_func(header_info, stream, debug=debug):
|
|
||||||
decompressed_length += len(chunk)
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
if decompressed_length != header_info.decompressed_length:
|
|
||||||
raise DecompressError(f"Actual length of decompressed data ({decompressed_length}) does not match length stored in resource ({header_info.decompressed_length})")
|
|
||||||
|
|
||||||
def decompress_parsed(header_info: CompressedHeaderInfo, data: bytes, *, debug: bool=False) -> bytes:
|
|
||||||
"""Decompress the given compressed resource data, whose header has already been removed and parsed into a CompressedHeaderInfo object."""
|
|
||||||
|
|
||||||
return b"".join(decompress_stream_parsed(header_info, io.BytesIO(data), debug=debug))
|
|
||||||
|
|
||||||
def decompress_stream(stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
|
||||||
"""Decompress compressed resource data from a stream."""
|
|
||||||
|
|
||||||
header_info = CompressedHeaderInfo.parse_stream(stream)
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Compressed resource data header: {header_info}")
|
print(f"Working buffer fractional size: {working_buffer_fractional_size} (=> {len(data) * 256 / working_buffer_fractional_size})")
|
||||||
|
print(f"Expansion buffer size: {expansion_buffer_size}")
|
||||||
|
|
||||||
|
if dcmp_id == 0:
|
||||||
|
decompress_func = dcmp0.decompress
|
||||||
|
elif dcmp_id == 1:
|
||||||
|
decompress_func = dcmp1.decompress
|
||||||
|
else:
|
||||||
|
raise DecompressError(f"Unsupported 'dcmp' ID: {dcmp_id}, expected 0 or 1")
|
||||||
|
|
||||||
|
if reserved != 0:
|
||||||
|
raise DecompressError(f"Reserved field should be 0, not 0x{reserved:>04x}")
|
||||||
|
|
||||||
|
return decompress_func(data[STRUCT_COMPRESSED_APPLICATION_HEADER.size:], decompressed_length, debug=debug)
|
||||||
|
|
||||||
|
|
||||||
|
def _decompress_system(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||||
|
dcmp_id, params = STRUCT_COMPRESSED_SYSTEM_HEADER.unpack_from(data)
|
||||||
|
|
||||||
|
if dcmp_id == 2:
|
||||||
|
decompress_func = dcmp2.decompress
|
||||||
|
else:
|
||||||
|
raise DecompressError(f"Unsupported 'dcmp' ID: {dcmp_id}, expected 2")
|
||||||
|
|
||||||
|
return decompress_func(data[STRUCT_COMPRESSED_SYSTEM_HEADER.size:], decompressed_length, params, debug=debug)
|
||||||
|
|
||||||
yield from decompress_stream_parsed(header_info, stream, debug=debug)
|
|
||||||
|
|
||||||
def decompress(data: bytes, *, debug: bool=False) -> bytes:
|
def decompress(data: bytes, *, debug: bool=False) -> bytes:
|
||||||
"""Decompress the given compressed resource data."""
|
"""Decompress the given compressed resource data."""
|
||||||
|
|
||||||
return b"".join(decompress_stream(io.BytesIO(data), debug=debug))
|
try:
|
||||||
|
signature, header_length, compression_type, decompressed_length = STRUCT_COMPRESSED_HEADER.unpack_from(data)
|
||||||
|
except struct.error:
|
||||||
|
raise DecompressError(f"Invalid header")
|
||||||
|
if signature != COMPRESSED_SIGNATURE:
|
||||||
|
raise DecompressError(f"Invalid signature: {signature!r}, expected {COMPRESSED_SIGNATURE}")
|
||||||
|
if header_length != 0x12:
|
||||||
|
raise DecompressError(f"Unsupported header length: 0x{header_length:>04x}, expected 0x12")
|
||||||
|
|
||||||
|
if compression_type == COMPRESSED_TYPE_APPLICATION:
|
||||||
|
decompress_func = _decompress_application
|
||||||
|
elif compression_type == COMPRESSED_TYPE_SYSTEM:
|
||||||
|
decompress_func = _decompress_system
|
||||||
|
else:
|
||||||
|
raise DecompressError(f"Unsupported compression type: 0x{compression_type:>04x}")
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print(f"Decompressed length: {decompressed_length}")
|
||||||
|
|
||||||
|
decompressed = decompress_func(data[STRUCT_COMPRESSED_HEADER.size:], decompressed_length, debug=debug)
|
||||||
|
if len(decompressed) != decompressed_length:
|
||||||
|
raise DecompressError(f"Actual length of decompressed data ({len(decompressed)}) does not match length stored in resource ({decompressed_length})")
|
||||||
|
return decompressed
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
import io
|
|
||||||
import struct
|
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
||||||
@ -7,192 +5,19 @@ class DecompressError(Exception):
|
|||||||
"""Raised when resource data decompression fails, because the data is invalid or the compression type is not supported."""
|
"""Raised when resource data decompression fails, because the data is invalid or the compression type is not supported."""
|
||||||
|
|
||||||
|
|
||||||
# The signature of all compressed resource data, 0xa89f6572 in hex, or "®üer" in MacRoman.
|
def _read_variable_length_integer(data: bytes, position: int) -> typing.Tuple[int, int]:
|
||||||
COMPRESSED_SIGNATURE = b"\xa8\x9fer"
|
"""Read a variable-length integer starting at the given position in the data, and return the integer as well as the number of bytes consumed.
|
||||||
# The number of the "type 8" compression type. This type is used in the Finder, ResEdit, and some other system files.
|
|
||||||
COMPRESSED_TYPE_8 = 0x0801
|
|
||||||
# The number of the "type 9" compression type. This type is used in the System file and System 7.5's Installer.
|
|
||||||
COMPRESSED_TYPE_9 = 0x0901
|
|
||||||
|
|
||||||
# Common header for compressed resources of all types.
|
|
||||||
# 4 bytes: Signature (see above).
|
|
||||||
# 2 bytes: Length of the complete header (this common part and the type-specific part that follows it). (This meaning is just a guess - the field's value is always 0x0012, so there's no way to know for certain what it means.)
|
|
||||||
# 2 bytes: Compression type. Known so far: 0x0801 ("type 8") and 0x0901 ("type 9").
|
|
||||||
# 4 bytes: Length of the data after decompression.
|
|
||||||
# 6 bytes: Remainder of the header. The exact format varies depending on the compression type.
|
|
||||||
STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHI6s")
|
|
||||||
|
|
||||||
# Remainder of header for a "type 8" compressed resource.
|
|
||||||
# 1 byte: "Working buffer fractional size" - the ratio of the compressed data size to the uncompressed data size, times 256.
|
|
||||||
# 1 byte: "Expansion buffer size" - the maximum number of bytes that the data might grow during decompression.
|
|
||||||
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 0 is supported.
|
|
||||||
# 2 bytes: Reserved (always zero).
|
|
||||||
STRUCT_COMPRESSED_TYPE_8_HEADER = struct.Struct(">BBhH")
|
|
||||||
|
|
||||||
# Remainder of header for a "type 9" compressed resource.
|
|
||||||
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported.
|
|
||||||
# 4 bytes: Decompressor-specific parameters.
|
|
||||||
STRUCT_COMPRESSED_TYPE_9_HEADER = struct.Struct(">h4s")
|
|
||||||
|
|
||||||
|
|
||||||
class CompressedHeaderInfo(object):
|
|
||||||
@classmethod
|
|
||||||
def parse_stream(cls, stream: typing.BinaryIO) -> "CompressedHeaderInfo":
|
|
||||||
try:
|
|
||||||
signature, header_length, compression_type, decompressed_length, remainder = STRUCT_COMPRESSED_HEADER.unpack(stream.read(STRUCT_COMPRESSED_HEADER.size))
|
|
||||||
except struct.error:
|
|
||||||
raise DecompressError(f"Invalid header")
|
|
||||||
if signature != COMPRESSED_SIGNATURE:
|
|
||||||
raise DecompressError(f"Invalid signature: {signature!r}, expected {COMPRESSED_SIGNATURE}")
|
|
||||||
if header_length != 0x12:
|
|
||||||
raise DecompressError(f"Unsupported header length: 0x{header_length:>04x}, expected 0x12")
|
|
||||||
|
|
||||||
if compression_type == COMPRESSED_TYPE_8:
|
|
||||||
working_buffer_fractional_size, expansion_buffer_size, dcmp_id, reserved = STRUCT_COMPRESSED_TYPE_8_HEADER.unpack(remainder)
|
|
||||||
|
|
||||||
if reserved != 0:
|
|
||||||
raise DecompressError(f"Reserved field should be 0, not 0x{reserved:>04x}")
|
|
||||||
|
|
||||||
return CompressedType8HeaderInfo(header_length, compression_type, decompressed_length, dcmp_id, working_buffer_fractional_size, expansion_buffer_size)
|
|
||||||
elif compression_type == COMPRESSED_TYPE_9:
|
|
||||||
dcmp_id, parameters = STRUCT_COMPRESSED_TYPE_9_HEADER.unpack(remainder)
|
|
||||||
|
|
||||||
return CompressedType9HeaderInfo(header_length, compression_type, decompressed_length, dcmp_id, parameters)
|
|
||||||
else:
|
|
||||||
raise DecompressError(f"Unsupported compression type: 0x{compression_type:>04x}")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse(cls, data: bytes) -> "CompressedHeaderInfo":
|
|
||||||
return cls.parse_stream(io.BytesIO(data))
|
|
||||||
|
|
||||||
header_length: int
|
|
||||||
compression_type: int
|
|
||||||
decompressed_length: int
|
|
||||||
dcmp_id: int
|
|
||||||
|
|
||||||
def __init__(self, header_length: int, compression_type: int, decompressed_length: int, dcmp_id: int) -> None:
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.header_length = header_length
|
|
||||||
self.compression_type = compression_type
|
|
||||||
self.decompressed_length = decompressed_length
|
|
||||||
self.dcmp_id = dcmp_id
|
|
||||||
|
|
||||||
|
|
||||||
class CompressedType8HeaderInfo(CompressedHeaderInfo):
|
|
||||||
working_buffer_fractional_size: int
|
|
||||||
expansion_buffer_size: int
|
|
||||||
|
|
||||||
def __init__(self, header_length: int, compression_type: int, decompressed_length: int, dcmp_id: int, working_buffer_fractional_size: int, expansion_buffer_size: int) -> None:
|
|
||||||
super().__init__(header_length, compression_type, decompressed_length, dcmp_id)
|
|
||||||
|
|
||||||
self.working_buffer_fractional_size = working_buffer_fractional_size
|
|
||||||
self.expansion_buffer_size = expansion_buffer_size
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"{type(self).__qualname__}(header_length={self.header_length}, compression_type=0x{self.compression_type:>04x}, decompressed_length={self.decompressed_length}, dcmp_id={self.dcmp_id}, working_buffer_fractional_size={self.working_buffer_fractional_size}, expansion_buffer_size={self.expansion_buffer_size})"
|
|
||||||
|
|
||||||
|
|
||||||
class CompressedType9HeaderInfo(CompressedHeaderInfo):
|
|
||||||
parameters: bytes
|
|
||||||
|
|
||||||
def __init__(self, header_length: int, compression_type: int, decompressed_length: int, dcmp_id: int, parameters: bytes) -> None:
|
|
||||||
super().__init__(header_length, compression_type, decompressed_length, dcmp_id)
|
|
||||||
|
|
||||||
self.parameters = parameters
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"{type(self).__qualname__}(header_length={self.header_length}, compression_type=0x{self.compression_type:>04x}, decompressed_length={self.decompressed_length}, dcmp_id={self.dcmp_id}, parameters={self.parameters!r})"
|
|
||||||
|
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
|
||||||
class PeekableIO(typing.Protocol):
|
|
||||||
"""Minimal protocol for binary IO streams that support the peek method.
|
|
||||||
|
|
||||||
The peek method is supported by various standard Python binary IO streams, such as io.BufferedReader. If a stream does not natively support the peek method, it may be wrapped using the custom helper function make_peekable.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def readable(self) -> bool: ...
|
|
||||||
def read(self, size: typing.Optional[int] = ...) -> bytes: ...
|
|
||||||
def peek(self, size: int = ...) -> bytes: ...
|
|
||||||
|
|
||||||
|
|
||||||
class _PeekableIOWrapper(object):
|
|
||||||
"""Wrapper class to add peek support to an existing stream. Do not instantiate this class directly, use the make_peekable function instead.
|
|
||||||
|
|
||||||
Python provides a standard io.BufferedReader class, which supports the peek method. However, according to its documentation, it only supports wrapping io.RawIOBase subclasses, and not streams which are already otherwise buffered.
|
|
||||||
|
|
||||||
Warning: this class does not perform any buffering of its own, outside of what is required to make peek work. It is strongly recommended to only wrap streams that are already buffered or otherwise fast to read from. In particular, raw streams (io.RawIOBase subclasses) should be wrapped using io.BufferedReader instead.
|
|
||||||
"""
|
|
||||||
|
|
||||||
_wrapped: typing.BinaryIO
|
|
||||||
_readahead: bytes
|
|
||||||
|
|
||||||
def __init__(self, wrapped: typing.BinaryIO) -> None:
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self._wrapped = wrapped
|
|
||||||
self._readahead = b""
|
|
||||||
|
|
||||||
def readable(self) -> bool:
|
|
||||||
return self._wrapped.readable()
|
|
||||||
|
|
||||||
def read(self, size: typing.Optional[int] = None) -> bytes:
|
|
||||||
if size is None or size < 0:
|
|
||||||
ret = self._readahead + self._wrapped.read()
|
|
||||||
self._readahead = b""
|
|
||||||
elif size <= len(self._readahead):
|
|
||||||
ret = self._readahead[:size]
|
|
||||||
self._readahead = self._readahead[size:]
|
|
||||||
else:
|
|
||||||
ret = self._readahead + self._wrapped.read(size - len(self._readahead))
|
|
||||||
self._readahead = b""
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def peek(self, size: int = -1) -> bytes:
|
|
||||||
if not self._readahead:
|
|
||||||
self._readahead = self._wrapped.read(io.DEFAULT_BUFFER_SIZE if size < 0 else size)
|
|
||||||
return self._readahead
|
|
||||||
|
|
||||||
|
|
||||||
def make_peekable(stream: typing.BinaryIO) -> "PeekableIO":
|
|
||||||
"""Wrap an arbitrary binary IO stream so that it supports the peek method.
|
|
||||||
|
|
||||||
The stream is wrapped as efficiently as possible (or not at all if it already supports the peek method). However, in the worst case a custom wrapper class needs to be used, which may not be particularly efficient and only supports a very minimal interface. The only methods that are guaranteed to exist on the returned stream are readable, read, and peek.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if hasattr(stream, "peek"):
|
|
||||||
# Stream is already peekable, nothing to be done.
|
|
||||||
return typing.cast("PeekableIO", stream)
|
|
||||||
elif isinstance(stream, io.RawIOBase):
|
|
||||||
# Raw IO streams can be wrapped efficiently using BufferedReader.
|
|
||||||
return io.BufferedReader(stream)
|
|
||||||
else:
|
|
||||||
# Other streams need to be wrapped using our custom wrapper class.
|
|
||||||
return _PeekableIOWrapper(stream)
|
|
||||||
|
|
||||||
|
|
||||||
def read_exact(stream: typing.BinaryIO, byte_count: int) -> bytes:
|
|
||||||
"""Read byte_count bytes from the stream and raise an exception if too few bytes are read (i. e. if EOF was hit prematurely)."""
|
|
||||||
|
|
||||||
data = stream.read(byte_count)
|
|
||||||
if len(data) != byte_count:
|
|
||||||
raise DecompressError(f"Attempted to read {byte_count} bytes of data, but only got {len(data)} bytes")
|
|
||||||
return data
|
|
||||||
|
|
||||||
def read_variable_length_integer(stream: typing.BinaryIO) -> int:
|
|
||||||
"""Read a variable-length integer from the stream.
|
|
||||||
|
|
||||||
This variable-length integer format is used by the 0xfe codes in the compression formats used by 'dcmp' (0) and 'dcmp' (1).
|
This variable-length integer format is used by the 0xfe codes in the compression formats used by 'dcmp' (0) and 'dcmp' (1).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
head = read_exact(stream, 1)
|
assert len(data) > position
|
||||||
|
if data[position] == 0xff:
|
||||||
if head[0] == 0xff:
|
assert len(data) > position + 4
|
||||||
return int.from_bytes(read_exact(stream, 4), "big", signed=True)
|
return int.from_bytes(data[position+1:position+5], "big", signed=True), 5
|
||||||
elif head[0] >= 0x80:
|
elif data[position] >= 0x80:
|
||||||
data_modified = bytes([(head[0] - 0xc0) & 0xff]) + read_exact(stream, 1)
|
assert len(data) > position + 1
|
||||||
return int.from_bytes(data_modified, "big", signed=True)
|
data_modified = bytes([(data[position] - 0xc0) & 0xff, data[position+1]])
|
||||||
|
return int.from_bytes(data_modified, "big", signed=True), 2
|
||||||
else:
|
else:
|
||||||
return int.from_bytes(head, "big", signed=True)
|
return int.from_bytes(data[position:position+1], "big", signed=True), 1
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
import io
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import common
|
from . import common
|
||||||
|
|
||||||
# Lookup table for codes in range(0x4b, 0xfe).
|
# Lookup table for codes in range(0x4b, 0xfe).
|
||||||
@ -39,73 +36,94 @@ TABLE = [TABLE_DATA[i:i + 2] for i in range(0, len(TABLE_DATA), 2)]
|
|||||||
assert len(TABLE) == len(range(0x4b, 0xfe))
|
assert len(TABLE) == len(range(0x4b, 0xfe))
|
||||||
|
|
||||||
|
|
||||||
def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
def decompress(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||||
"""Internal helper function, implements the main decompression algorithm. Only called from decompress_stream, which performs some extra checks and debug logging."""
|
"""Decompress compressed data in the format used by 'dcmp' (0)."""
|
||||||
|
|
||||||
if not isinstance(header_info, common.CompressedType8HeaderInfo):
|
prev_literals = []
|
||||||
raise common.DecompressError(f"Incorrect header type: {type(header_info).__qualname__}")
|
decompressed = b""
|
||||||
|
|
||||||
prev_literals: typing.List[bytes] = []
|
i = 0
|
||||||
|
|
||||||
while True: # Loop is terminated when the EOF marker (0xff) is encountered
|
while i < len(data):
|
||||||
(byte,) = common.read_exact(stream, 1)
|
byte = data[i]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Tag byte 0x{byte:>02x}")
|
print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
|
||||||
|
|
||||||
if byte in range(0x00, 0x20):
|
if byte in range(0x00, 0x20):
|
||||||
# Literal byte sequence.
|
# Literal byte sequence.
|
||||||
if byte in (0x00, 0x10):
|
if byte in (0x00, 0x10):
|
||||||
# The length of the literal data is stored in the next byte.
|
# The length of the literal data is stored in the next byte.
|
||||||
(count_div2,) = common.read_exact(stream, 1)
|
count_div2 = data[i+1]
|
||||||
|
begin = i + 2
|
||||||
else:
|
else:
|
||||||
# The length of the literal data is stored in the low nibble of the tag byte.
|
# The length of the literal data is stored in the low nibble of the tag byte.
|
||||||
count_div2 = byte >> 0 & 0xf
|
count_div2 = byte >> 0 & 0xf
|
||||||
count = 2 * count_div2
|
begin = i + 1
|
||||||
|
end = begin + 2*count_div2
|
||||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||||
do_store = byte >= 0x10
|
do_store = byte >= 0x10
|
||||||
literal = common.read_exact(stream, count)
|
literal = data[begin:end]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Literal (storing: {do_store})")
|
print(f"Literal (storing: {do_store})")
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
if do_store:
|
if do_store:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> storing as literal number 0x{len(prev_literals):x}")
|
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||||
prev_literals.append(literal)
|
prev_literals.append(literal)
|
||||||
yield literal
|
i = end
|
||||||
elif byte in (0x20, 0x21):
|
elif byte in (0x20, 0x21):
|
||||||
# Backreference to a previous literal, 2-byte form.
|
# Backreference to a previous literal, 2-byte form.
|
||||||
# This can reference literals with index in range(0x28, 0x228).
|
# This can reference literals with index in range(0x28, 0x228).
|
||||||
(next_byte,) = common.read_exact(stream, 1)
|
table_index = 0x28 + ((byte - 0x20) << 8 | data[i+1])
|
||||||
table_index = 0x28 + ((byte - 0x20) << 8 | next_byte)
|
i += 2
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
||||||
yield prev_literals[table_index]
|
literal = prev_literals[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
elif byte == 0x22:
|
elif byte == 0x22:
|
||||||
# Backreference to a previous literal, 3-byte form.
|
# Backreference to a previous literal, 3-byte form.
|
||||||
# This can reference any literal with index 0x28 and higher, but is only necessary for literals with index 0x228 and higher.
|
# This can reference any literal with index 0x28 and higher, but is only necessary for literals with index 0x228 and higher.
|
||||||
table_index = 0x28 + int.from_bytes(common.read_exact(stream, 2), "big", signed=False)
|
table_index = 0x28 + int.from_bytes(data[i+1:i+3], "big", signed=False)
|
||||||
|
i += 3
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Backreference (3-byte form) to 0x{table_index:>02x}")
|
print(f"Backreference (3-byte form) to 0x{table_index:>02x}")
|
||||||
yield prev_literals[table_index]
|
literal = prev_literals[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
elif byte in range(0x23, 0x4b):
|
elif byte in range(0x23, 0x4b):
|
||||||
# Backreference to a previous literal, 1-byte form.
|
# Backreference to a previous literal, 1-byte form.
|
||||||
# This can reference literals with indices in range(0x28).
|
# This can reference literals with indices in range(0x28).
|
||||||
table_index = byte - 0x23
|
table_index = byte - 0x23
|
||||||
|
i += 1
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
||||||
yield prev_literals[table_index]
|
literal = prev_literals[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
elif byte in range(0x4b, 0xfe):
|
elif byte in range(0x4b, 0xfe):
|
||||||
# Reference into a fixed table of two-byte literals.
|
# Reference into a fixed table of two-byte literals.
|
||||||
# All compressed resources use the same table.
|
# All compressed resources use the same table.
|
||||||
table_index = byte - 0x4b
|
table_index = byte - 0x4b
|
||||||
|
i += 1
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Fixed table reference to 0x{table_index:>02x}")
|
print(f"Fixed table reference to 0x{table_index:>02x}")
|
||||||
yield TABLE[table_index]
|
entry = TABLE[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {entry}")
|
||||||
|
decompressed += entry
|
||||||
elif byte == 0xfe:
|
elif byte == 0xfe:
|
||||||
# Extended code, whose meaning is controlled by the following byte.
|
# Extended code, whose meaning is controlled by the following byte.
|
||||||
|
|
||||||
(kind,) = common.read_exact(stream, 1)
|
i += 1
|
||||||
|
kind = data[i]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Extended code: 0x{kind:>02x}")
|
print(f"Extended code: 0x{kind:>02x}")
|
||||||
|
i += 1
|
||||||
|
|
||||||
if kind == 0x00:
|
if kind == 0x00:
|
||||||
# Compact representation of (part of) a segment loader jump table, as used in 'CODE' (0) resources.
|
# Compact representation of (part of) a segment loader jump table, as used in 'CODE' (0) resources.
|
||||||
@ -114,28 +132,37 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
print(f"Segment loader jump table entries")
|
print(f"Segment loader jump table entries")
|
||||||
|
|
||||||
# All generated jump table entries have the same segment number.
|
# All generated jump table entries have the same segment number.
|
||||||
segment_number_int = common.read_variable_length_integer(stream)
|
segment_number_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> segment number: {segment_number_int:#x}")
|
print(f"\t-> segment number: {segment_number_int:#x}")
|
||||||
|
|
||||||
# The tail part of all jump table entries (i. e. everything except for the address).
|
# The tail part of all jump table entries (i. e. everything except for the address).
|
||||||
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=False) + b"\xa9\xf0"
|
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=True) + b"\xa9\xf0"
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> tail of first entry: {entry_tail}")
|
||||||
# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
|
# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
|
||||||
yield entry_tail
|
decompressed += entry_tail
|
||||||
|
|
||||||
count = common.read_variable_length_integer(stream)
|
count, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
if count <= 0:
|
if count <= 0:
|
||||||
raise common.DecompressError(f"Jump table entry count must be greater than 0, not {count}")
|
raise common.DecompressError(f"Jump table entry count must be greater than 0, not {count}")
|
||||||
|
|
||||||
# The second entry's address is stored explicitly.
|
# The second entry's address is stored explicitly.
|
||||||
current_int = common.read_variable_length_integer(stream)
|
current_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> address of second entry: {current_int:#x}")
|
print(f"-> address of second entry: {current_int:#x}")
|
||||||
yield current_int.to_bytes(2, "big", signed=False) + entry_tail
|
entry = current_int.to_bytes(2, "big", signed=False) + entry_tail
|
||||||
|
if debug:
|
||||||
|
print(f"-> second entry: {entry}")
|
||||||
|
decompressed += entry
|
||||||
|
|
||||||
for _ in range(1, count):
|
for _ in range(1, count):
|
||||||
# All further entries' addresses are stored as differences relative to the previous entry's address.
|
# All further entries' addresses are stored as differences relative to the previous entry's address.
|
||||||
diff = common.read_variable_length_integer(stream)
|
diff, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
# For some reason, each difference is 6 higher than it should be.
|
# For some reason, each difference is 6 higher than it should be.
|
||||||
diff -= 6
|
diff -= 6
|
||||||
|
|
||||||
@ -143,7 +170,10 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
current_int = (current_int + diff) & 0xffff
|
current_int = (current_int + diff) & 0xffff
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> difference {diff:#x}: {current_int:#x}")
|
print(f"\t-> difference {diff:#x}: {current_int:#x}")
|
||||||
yield current_int.to_bytes(2, "big", signed=False) + entry_tail
|
entry = current_int.to_bytes(2, "big", signed=False) + entry_tail
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {entry}")
|
||||||
|
decompressed += entry
|
||||||
elif kind in (0x02, 0x03):
|
elif kind in (0x02, 0x03):
|
||||||
# Repeat 1 or 2 bytes a certain number of times.
|
# Repeat 1 or 2 bytes a certain number of times.
|
||||||
|
|
||||||
@ -158,19 +188,23 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
print(f"Repeat {byte_count}-byte value")
|
print(f"Repeat {byte_count}-byte value")
|
||||||
|
|
||||||
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
||||||
to_repeat_int = common.read_variable_length_integer(stream)
|
to_repeat_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
try:
|
try:
|
||||||
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
||||||
|
|
||||||
count = common.read_variable_length_integer(stream) + 1
|
count_m1, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
|
count = count_m1 + 1
|
||||||
if count <= 0:
|
if count <= 0:
|
||||||
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
||||||
|
|
||||||
|
repeated = to_repeat * count
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> {to_repeat} * {count}")
|
print(f"\t-> {to_repeat} * {count}: {repeated}")
|
||||||
yield to_repeat * count
|
decompressed += repeated
|
||||||
elif kind == 0x04:
|
elif kind == 0x04:
|
||||||
# A sequence of 16-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
# A sequence of 16-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
||||||
|
|
||||||
@ -178,16 +212,18 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
print(f"Difference-encoded 16-bit integers")
|
print(f"Difference-encoded 16-bit integers")
|
||||||
|
|
||||||
# The first integer is stored explicitly, as a signed value.
|
# The first integer is stored explicitly, as a signed value.
|
||||||
initial_int = common.read_variable_length_integer(stream)
|
initial_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
try:
|
try:
|
||||||
initial = initial_int.to_bytes(2, "big", signed=True)
|
initial = initial_int.to_bytes(2, "big", signed=True)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
raise common.DecompressError(f"Initial value out of range for 16-bit integer difference encoding: {initial_int:#x}")
|
raise common.DecompressError(f"Initial value out of range for 16-bit integer difference encoding: {initial_int:#x}")
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> initial: 0x{initial_int:>04x}")
|
print(f"\t-> initial: {initial}")
|
||||||
yield initial
|
decompressed += initial
|
||||||
|
|
||||||
count = common.read_variable_length_integer(stream)
|
count, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
if count < 0:
|
if count < 0:
|
||||||
raise common.DecompressError(f"Count cannot be negative: {count}")
|
raise common.DecompressError(f"Count cannot be negative: {count}")
|
||||||
|
|
||||||
@ -196,74 +232,64 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
for _ in range(count):
|
for _ in range(count):
|
||||||
# The difference to the previous integer is stored as an 8-bit signed integer.
|
# The difference to the previous integer is stored as an 8-bit signed integer.
|
||||||
# The usual variable-length integer format is *not* used here.
|
# The usual variable-length integer format is *not* used here.
|
||||||
diff = int.from_bytes(common.read_exact(stream, 1), "big", signed=True)
|
diff = int.from_bytes(data[i:i+1], "big", signed=True)
|
||||||
|
i += 1
|
||||||
|
|
||||||
# Simulate 16-bit integer wraparound.
|
# Simulate 16-bit integer wraparound.
|
||||||
current_int = (current_int + diff) & 0xffff
|
current_int = (current_int + diff) & 0xffff
|
||||||
|
current = current_int.to_bytes(2, "big", signed=False)
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> difference {diff:#x}: 0x{current_int:>04x}")
|
print(f"\t-> difference {diff:#x}: {current}")
|
||||||
yield current_int.to_bytes(2, "big", signed=False)
|
decompressed += current
|
||||||
elif kind == 0x06:
|
elif kind == 0x06:
|
||||||
# A sequence of 32-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
# A sequence of 32-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Difference-encoded 32-bit integers")
|
print(f"Difference-encoded 16-bit integers")
|
||||||
|
|
||||||
# The first integer is stored explicitly, as a signed value.
|
# The first integer is stored explicitly, as a signed value.
|
||||||
initial_int = common.read_variable_length_integer(stream)
|
initial_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
try:
|
try:
|
||||||
initial = initial_int.to_bytes(4, "big", signed=True)
|
initial = initial_int.to_bytes(4, "big", signed=True)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
raise common.DecompressError(f"Initial value out of range for 32-bit integer difference encoding: {initial_int:#x}")
|
raise common.DecompressError(f"Initial value out of range for 32-bit integer difference encoding: {initial_int:#x}")
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> initial: 0x{initial_int:>08x}")
|
print(f"\t-> initial: {initial}")
|
||||||
yield initial
|
decompressed += initial
|
||||||
|
|
||||||
count = common.read_variable_length_integer(stream)
|
count, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
assert count >= 0
|
assert count >= 0
|
||||||
|
|
||||||
# To make the following calculations simpler, the signed initial_int value is converted to unsigned.
|
# To make the following calculations simpler, the signed initial_int value is converted to unsigned.
|
||||||
current_int = initial_int & 0xffffffff
|
current_int = initial_int & 0xffffffff
|
||||||
for _ in range(count):
|
for _ in range(count):
|
||||||
# The difference to the previous integer is stored as a variable-length integer, whose value may be negative.
|
# The difference to the previous integer is stored as a variable-length integer, whose value may be negative.
|
||||||
diff = common.read_variable_length_integer(stream)
|
diff, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
|
|
||||||
# Simulate 32-bit integer wraparound.
|
# Simulate 32-bit integer wraparound.
|
||||||
current_int = (current_int + diff) & 0xffffffff
|
current_int = (current_int + diff) & 0xffffffff
|
||||||
|
current = current_int.to_bytes(4, "big", signed=False)
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> difference {diff:#x}: 0x{current_int:>08x}")
|
print(f"\t-> difference {diff:#x}: {current}")
|
||||||
yield current_int.to_bytes(4, "big", signed=False)
|
decompressed += current
|
||||||
else:
|
else:
|
||||||
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
||||||
elif byte == 0xff:
|
elif byte == 0xff:
|
||||||
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
||||||
if debug:
|
if debug:
|
||||||
print("End marker")
|
print("End marker")
|
||||||
|
if i != len(data) - 1:
|
||||||
# Check that there really is no more data left.
|
raise common.DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
|
||||||
extra = stream.read(1)
|
i += 1
|
||||||
if extra:
|
|
||||||
raise common.DecompressError(f"Extra data encountered after end of data marker (first extra byte: {extra})")
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise common.DecompressError(f"Unknown tag byte: 0x{byte:>02x}")
|
raise common.DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
|
||||||
|
|
||||||
def decompress_stream(header_info: common.CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
if decompressed_length % 2 != 0 and len(decompressed) == decompressed_length + 1:
|
||||||
"""Decompress compressed data in the format used by 'dcmp' (0)."""
|
|
||||||
|
|
||||||
decompressed_length = 0
|
|
||||||
for chunk in decompress_stream_inner(header_info, stream, debug=debug):
|
|
||||||
if debug:
|
|
||||||
print(f"\t-> {chunk}")
|
|
||||||
|
|
||||||
if header_info.decompressed_length % 2 != 0 and decompressed_length + len(chunk) == header_info.decompressed_length + 1:
|
|
||||||
# Special case: if the decompressed data length stored in the header is odd and one less than the length of the actual decompressed data, drop the last byte.
|
# Special case: if the decompressed data length stored in the header is odd and one less than the length of the actual decompressed data, drop the last byte.
|
||||||
# This is necessary because nearly all codes generate data in groups of 2 or 4 bytes, so it is basically impossible to represent data with an odd length using this compression format.
|
# This is necessary because nearly all codes generate data in groups of 2 or 4 bytes, so it is basically impossible to represent data with an odd length using this compression format.
|
||||||
decompressed_length += len(chunk) - 1
|
decompressed = decompressed[:-1]
|
||||||
yield chunk[:-1]
|
|
||||||
else:
|
|
||||||
decompressed_length += len(chunk)
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
if debug:
|
return decompressed
|
||||||
print(f"Decompressed {decompressed_length:#x} bytes so far")
|
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
import io
|
|
||||||
import typing
|
|
||||||
|
|
||||||
from . import common
|
from . import common
|
||||||
|
|
||||||
# Lookup table for codes in range(0xd5, 0xfe).
|
# Lookup table for codes in range(0xd5, 0xfe).
|
||||||
@ -22,75 +19,96 @@ TABLE = [TABLE_DATA[i:i + 2] for i in range(0, len(TABLE_DATA), 2)]
|
|||||||
assert len(TABLE) == len(range(0xd5, 0xfe))
|
assert len(TABLE) == len(range(0xd5, 0xfe))
|
||||||
|
|
||||||
|
|
||||||
def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
def decompress(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||||
"""Internal helper function, implements the main decompression algorithm. Only called from decompress_stream, which performs some extra checks and debug logging."""
|
"""Decompress compressed data in the format used by 'dcmp' (1)."""
|
||||||
|
|
||||||
if not isinstance(header_info, common.CompressedType8HeaderInfo):
|
prev_literals = []
|
||||||
raise common.DecompressError(f"Incorrect header type: {type(header_info).__qualname__}")
|
decompressed = b""
|
||||||
|
|
||||||
prev_literals: typing.List[bytes] = []
|
i = 0
|
||||||
|
|
||||||
while True: # Loop is terminated when the EOF marker (0xff) is encountered
|
while i < len(data):
|
||||||
(byte,) = common.read_exact(stream, 1)
|
byte = data[i]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Tag byte 0x{byte:>02x}")
|
print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
|
||||||
|
|
||||||
if byte in range(0x00, 0x20):
|
if byte in range(0x00, 0x20):
|
||||||
# Literal byte sequence, 1-byte header.
|
# Literal byte sequence, 1-byte header.
|
||||||
# The length of the literal data is stored in the low nibble of the tag byte.
|
# The length of the literal data is stored in the low nibble of the tag byte.
|
||||||
count = (byte >> 0 & 0xf) + 1
|
count = (byte >> 0 & 0xf) + 1
|
||||||
|
begin = i + 1
|
||||||
|
end = begin + count
|
||||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||||
do_store = byte >= 0x10
|
do_store = byte >= 0x10
|
||||||
literal = common.read_exact(stream, count)
|
literal = data[begin:end]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Literal (1-byte header, storing: {do_store})")
|
print(f"Literal (1-byte header, storing: {do_store})")
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
if do_store:
|
if do_store:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> storing as literal number 0x{len(prev_literals):x}")
|
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||||
prev_literals.append(literal)
|
prev_literals.append(literal)
|
||||||
yield literal
|
i = end
|
||||||
elif byte in range(0x20, 0xd0):
|
elif byte in range(0x20, 0xd0):
|
||||||
# Backreference to a previous literal, 1-byte form.
|
# Backreference to a previous literal, 1-byte form.
|
||||||
# This can reference literals with indices in range(0xb0).
|
# This can reference literals with indices in range(0xb0).
|
||||||
table_index = byte - 0x20
|
table_index = byte - 0x20
|
||||||
|
i += 1
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
||||||
yield prev_literals[table_index]
|
literal = prev_literals[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
elif byte in (0xd0, 0xd1):
|
elif byte in (0xd0, 0xd1):
|
||||||
# Literal byte sequence, 2-byte header.
|
# Literal byte sequence, 2-byte header.
|
||||||
# The length of the literal data is stored in the following byte.
|
# The length of the literal data is stored in the following byte.
|
||||||
(count,) = common.read_exact(stream, 1)
|
count = data[i+1]
|
||||||
|
begin = i + 2
|
||||||
|
end = begin + count
|
||||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||||
do_store = byte == 0xd1
|
do_store = byte == 0xd1
|
||||||
literal = common.read_exact(stream, count)
|
literal = data[begin:end]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Literal (2-byte header, storing: {do_store})")
|
print(f"Literal (2-byte header, storing: {do_store})")
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
if do_store:
|
if do_store:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> storing as literal number 0x{len(prev_literals):x}")
|
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||||
prev_literals.append(literal)
|
prev_literals.append(literal)
|
||||||
yield literal
|
i = end
|
||||||
elif byte == 0xd2:
|
elif byte == 0xd2:
|
||||||
# Backreference to a previous literal, 2-byte form.
|
# Backreference to a previous literal, 2-byte form.
|
||||||
# This can reference literals with indices in range(0xb0, 0x1b0).
|
# This can reference literals with indices in range(0xb0, 0x1b0).
|
||||||
(next_byte,) = common.read_exact(stream, 1)
|
table_index = data[i+1] + 0xb0
|
||||||
table_index = next_byte + 0xb0
|
i += 2
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
||||||
yield prev_literals[table_index]
|
literal = prev_literals[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {literal}")
|
||||||
|
decompressed += literal
|
||||||
elif byte in range(0xd5, 0xfe):
|
elif byte in range(0xd5, 0xfe):
|
||||||
# Reference into a fixed table of two-byte literals.
|
# Reference into a fixed table of two-byte literals.
|
||||||
# All compressed resources use the same table.
|
# All compressed resources use the same table.
|
||||||
table_index = byte - 0xd5
|
table_index = byte - 0xd5
|
||||||
|
i += 1
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Fixed table reference to 0x{table_index:>02x}")
|
print(f"Fixed table reference to 0x{table_index:>02x}")
|
||||||
yield TABLE[table_index]
|
entry = TABLE[table_index]
|
||||||
|
if debug:
|
||||||
|
print(f"\t-> {entry}")
|
||||||
|
decompressed += entry
|
||||||
elif byte == 0xfe:
|
elif byte == 0xfe:
|
||||||
# Extended code, whose meaning is controlled by the following byte.
|
# Extended code, whose meaning is controlled by the following byte.
|
||||||
|
|
||||||
(kind,) = common.read_exact(stream, 1)
|
i += 1
|
||||||
|
kind = data[i]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Extended code: 0x{kind:>02x}")
|
print(f"Extended code: 0x{kind:>02x}")
|
||||||
|
i += 1
|
||||||
|
|
||||||
if kind == 0x02:
|
if kind == 0x02:
|
||||||
# Repeat 1 byte a certain number of times.
|
# Repeat 1 byte a certain number of times.
|
||||||
@ -101,44 +119,33 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
|
|||||||
print(f"Repeat {byte_count}-byte value")
|
print(f"Repeat {byte_count}-byte value")
|
||||||
|
|
||||||
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
||||||
to_repeat_int = common.read_variable_length_integer(stream)
|
to_repeat_int, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
try:
|
try:
|
||||||
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
||||||
except OverflowError:
|
except OverflowError:
|
||||||
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
||||||
|
|
||||||
count = common.read_variable_length_integer(stream) + 1
|
count_m1, length = common._read_variable_length_integer(data, i)
|
||||||
|
i += length
|
||||||
|
count = count_m1 + 1
|
||||||
if count <= 0:
|
if count <= 0:
|
||||||
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
||||||
|
|
||||||
|
repeated = to_repeat * count
|
||||||
if debug:
|
if debug:
|
||||||
print(f"\t-> {to_repeat} * {count}")
|
print(f"\t-> {to_repeat} * {count}: {repeated}")
|
||||||
yield to_repeat * count
|
decompressed += repeated
|
||||||
else:
|
else:
|
||||||
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
||||||
elif byte == 0xff:
|
elif byte == 0xff:
|
||||||
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
||||||
if debug:
|
if debug:
|
||||||
print("End marker")
|
print("End marker")
|
||||||
|
if i != len(data) - 1:
|
||||||
# Check that there really is no more data left.
|
raise common.DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
|
||||||
extra = stream.read(1)
|
i += 1
|
||||||
if extra:
|
|
||||||
raise common.DecompressError(f"Extra data encountered after end of data marker (first extra byte: {extra})")
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise common.DecompressError(f"Unknown tag byte: 0x{byte:>02x}")
|
raise common.DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
|
||||||
|
|
||||||
def decompress_stream(header_info: common.CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
return decompressed
|
||||||
"""Decompress compressed data in the format used by 'dcmp' (1)."""
|
|
||||||
|
|
||||||
decompressed_length = 0
|
|
||||||
for chunk in decompress_stream_inner(header_info, stream, debug=debug):
|
|
||||||
if debug:
|
|
||||||
print(f"\t-> {chunk}")
|
|
||||||
|
|
||||||
decompressed_length += len(chunk)
|
|
||||||
yield chunk
|
|
||||||
|
|
||||||
if debug:
|
|
||||||
print(f"Decompressed {decompressed_length:#x} bytes so far")
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import enum
|
import enum
|
||||||
import io
|
|
||||||
import struct
|
import struct
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
@ -74,72 +73,68 @@ def _split_bits(i: int) -> typing.Tuple[bool, bool, bool, bool, bool, bool, bool
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _decompress_untagged(stream: "common.PeekableIO", decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> typing.Iterator[bytes]:
|
def _decompress_system_untagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
|
||||||
while True: # Loop is terminated when EOF is reached.
|
parts = []
|
||||||
table_index_data = stream.read(1)
|
i = 0
|
||||||
if not table_index_data:
|
while i < len(data):
|
||||||
# End of compressed data.
|
if i == len(data) - 1 and decompressed_length % 2 != 0:
|
||||||
break
|
|
||||||
elif not stream.peek(1) and decompressed_length % 2 != 0:
|
|
||||||
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a table reference.
|
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a table reference.
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Last byte: {table_index_data}")
|
print(f"Last byte: {data[-1:]}")
|
||||||
yield table_index_data
|
parts.append(data[-1:])
|
||||||
break
|
break
|
||||||
|
|
||||||
# Compressed data is untagged, every byte is a table reference.
|
# Compressed data is untagged, every byte is a table reference.
|
||||||
(table_index,) = table_index_data
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Reference: {table_index} -> {table[table_index]}")
|
print(f"Reference: {data[i]} -> {table[data[i]]}")
|
||||||
yield table[table_index]
|
parts.append(table[data[i]])
|
||||||
|
i += 1
|
||||||
|
|
||||||
def _decompress_tagged(stream: "common.PeekableIO", decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> typing.Iterator[bytes]:
|
return b"".join(parts)
|
||||||
while True: # Loop is terminated when EOF is reached.
|
|
||||||
tag_data = stream.read(1)
|
def _decompress_system_tagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
|
||||||
if not tag_data:
|
parts = []
|
||||||
# End of compressed data.
|
i = 0
|
||||||
break
|
while i < len(data):
|
||||||
elif not stream.peek(1) and decompressed_length % 2 != 0:
|
if i == len(data) - 1 and decompressed_length % 2 != 0:
|
||||||
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a tag or a table reference.
|
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a tag or a table reference.
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Last byte: {tag_data}")
|
print(f"Last byte: {data[-1:]}")
|
||||||
yield tag_data
|
parts.append(data[-1:])
|
||||||
break
|
break
|
||||||
|
|
||||||
# Compressed data is tagged, each tag byte is followed by 8 table references and/or literals.
|
# Compressed data is tagged, each tag byte is followed by 8 table references and/or literals.
|
||||||
(tag,) = tag_data
|
tag = data[i]
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Tag: 0b{tag:>08b}")
|
print(f"Tag: 0b{tag:>08b}")
|
||||||
|
i += 1
|
||||||
for is_ref in _split_bits(tag):
|
for is_ref in _split_bits(tag):
|
||||||
if is_ref:
|
if is_ref:
|
||||||
# This is a table reference (a single byte that is an index into the table).
|
# This is a table reference (a single byte that is an index into the table).
|
||||||
table_index_data = stream.read(1)
|
|
||||||
if not table_index_data:
|
|
||||||
# End of compressed data.
|
|
||||||
break
|
|
||||||
(table_index,) = table_index_data
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Reference: {table_index} -> {table[table_index]}")
|
print(f"Reference: {data[i]} -> {table[data[i]]}")
|
||||||
yield table[table_index]
|
parts.append(table[data[i]])
|
||||||
|
i += 1
|
||||||
else:
|
else:
|
||||||
# This is a literal (two uncompressed bytes that are literally copied into the output).
|
# This is a literal (two uncompressed bytes that are literally copied into the output).
|
||||||
literal = stream.read(2)
|
# Note: if i == len(data)-1, the literal is actually only a single byte long.
|
||||||
if not literal:
|
# This case is handled automatically - the slice extends one byte past the end of the data, and only one byte is returned.
|
||||||
# End of compressed data.
|
|
||||||
break
|
|
||||||
# Note: the literal may be only a single byte long if it is located exactly at EOF. This is intended and expected - the 1-byte literal is yielded normally, and on the next iteration, decompression is terminated as EOF is detected.
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Literal: {literal}")
|
print(f"Literal: {data[i:i+2]}")
|
||||||
yield literal
|
parts.append(data[i:i + 2])
|
||||||
|
i += 2
|
||||||
|
|
||||||
|
# If the end of the compressed data is reached in the middle of a chunk, all further tag bits are ignored (they should be zero) and decompression ends.
|
||||||
|
if i >= len(data):
|
||||||
|
break
|
||||||
|
|
||||||
|
return b"".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def decompress_stream(header_info: common.CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes]:
|
def decompress(data: bytes, decompressed_length: int, parameters: bytes, *, debug: bool=False) -> bytes:
|
||||||
"""Decompress compressed data in the format used by 'dcmp' (2)."""
|
"""Decompress compressed data in the format used by 'dcmp' (2)."""
|
||||||
|
|
||||||
if not isinstance(header_info, common.CompressedType9HeaderInfo):
|
unknown, table_count_m1, flags_raw = STRUCT_PARAMETERS.unpack(parameters)
|
||||||
raise common.DecompressError(f"Incorrect header type: {type(header_info).__qualname__}")
|
|
||||||
|
|
||||||
unknown, table_count_m1, flags_raw = STRUCT_PARAMETERS.unpack(header_info.parameters)
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Value of unknown parameter field: 0x{unknown:>04x}")
|
print(f"Value of unknown parameter field: 0x{unknown:>04x}")
|
||||||
@ -157,21 +152,24 @@ def decompress_stream(header_info: common.CompressedHeaderInfo, stream: typing.B
|
|||||||
print(f"Flags: {flags}")
|
print(f"Flags: {flags}")
|
||||||
|
|
||||||
if ParameterFlags.CUSTOM_TABLE in flags:
|
if ParameterFlags.CUSTOM_TABLE in flags:
|
||||||
|
table_start = 0
|
||||||
|
data_start = table_start + table_count * 2
|
||||||
table = []
|
table = []
|
||||||
for _ in range(table_count):
|
for i in range(table_start, data_start, 2):
|
||||||
table.append(common.read_exact(stream, 2))
|
table.append(data[i:i + 2])
|
||||||
if debug:
|
if debug:
|
||||||
print(f"Using custom table: {table}")
|
print(f"Using custom table: {table}")
|
||||||
else:
|
else:
|
||||||
if table_count_m1 != 0:
|
if table_count_m1 != 0:
|
||||||
raise common.DecompressError(f"table_count_m1 field is {table_count_m1}, but must be zero when the default table is used")
|
raise common.DecompressError(f"table_count_m1 field is {table_count_m1}, but must be zero when the default table is used")
|
||||||
table = DEFAULT_TABLE
|
table = DEFAULT_TABLE
|
||||||
|
data_start = 0
|
||||||
if debug:
|
if debug:
|
||||||
print("Using default table")
|
print("Using default table")
|
||||||
|
|
||||||
if ParameterFlags.TAGGED in flags:
|
if ParameterFlags.TAGGED in flags:
|
||||||
decompress_func = _decompress_tagged
|
decompress_func = _decompress_system_tagged
|
||||||
else:
|
else:
|
||||||
decompress_func = _decompress_untagged
|
decompress_func = _decompress_system_untagged
|
||||||
|
|
||||||
yield from decompress_func(common.make_peekable(stream), header_info.decompressed_length, table, debug=debug)
|
return decompress_func(data[data_start:], decompressed_length, table, debug=debug)
|
||||||
|
15
setup.cfg
15
setup.cfg
@ -46,18 +46,3 @@ include =
|
|||||||
[options.entry_points]
|
[options.entry_points]
|
||||||
console_scripts =
|
console_scripts =
|
||||||
rsrcfork = rsrcfork.__main__:main
|
rsrcfork = rsrcfork.__main__:main
|
||||||
|
|
||||||
[mypy]
|
|
||||||
files=rsrcfork/**/*.py
|
|
||||||
python_version = 3.6
|
|
||||||
|
|
||||||
disallow_untyped_calls = True
|
|
||||||
disallow_untyped_defs = True
|
|
||||||
disallow_untyped_decorators = True
|
|
||||||
|
|
||||||
no_implicit_optional = True
|
|
||||||
|
|
||||||
warn_unused_ignores = True
|
|
||||||
warn_unreachable = True
|
|
||||||
|
|
||||||
warn_redundant_casts = True
|
|
||||||
|
Reference in New Issue
Block a user