mirror of
https://github.com/dgelessus/python-rsrcfork.git
synced 2025-07-01 17:23:51 +00:00
Compare commits
42 Commits
Author | SHA1 | Date | |
---|---|---|---|
fb5708e6b4 | |||
5bcc3f02d7 | |||
d082f29238 | |||
fb827e4073 | |||
c373b9fe28 | |||
e6779b021a | |||
c4fe09dbf0 | |||
acdbbc89b2 | |||
d7fb67fac1 | |||
5ede8a351a | |||
7253c53d67 | |||
efd3848146 | |||
f798928270 | |||
ad7f9f5d6d | |||
a8c09f19d1 | |||
af4c465613 | |||
4a759027f4 | |||
3e28fa7fe0 | |||
8904f6e093 | |||
4c32987cc3 | |||
acd056973e | |||
3d444bda10 | |||
5bc2c0cc81 | |||
360833f940 | |||
67a16d34a6 | |||
2fb1d02064 | |||
9adb188624 | |||
e98166d0a6 | |||
ea2fcac692 | |||
0d2a3f886b | |||
43a4073432 | |||
6247013592 | |||
9dfb33f436 | |||
a9a3168345 | |||
6b067bd762 | |||
e8df959894 | |||
2883354ef2 | |||
0b8699c2f1 | |||
2dbf0f7047 | |||
cbc55fcbc2 | |||
bc4bad678a | |||
ee796d0eb1 |
10
.editorconfig
Normal file
10
.editorconfig
Normal file
@ -0,0 +1,10 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
indent_style = tab
|
||||
insert_final_newline = true
|
||||
|
||||
[*.rst]
|
||||
indent_style = space
|
||||
indent_size = 4
|
42
.gitignore
vendored
42
.gitignore
vendored
@ -1,40 +1,8 @@
|
||||
# IntelliJ IDEA, PyCharm, etc.
|
||||
.idea
|
||||
*.iml
|
||||
out
|
||||
gen
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
# Python bytecode
|
||||
*.py[co]
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
# setuptools
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# virtualenv
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
build/
|
||||
dist/
|
||||
|
207
README.rst
207
README.rst
@ -1,7 +1,27 @@
|
||||
``rsrcfork``
|
||||
============
|
||||
|
||||
A pure Python library for reading Macintosh Toolbox or Carbon resource manager data, as found in resource forks or ``.rsrc`` files even on current Mac OS X/macOS systems.
|
||||
A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files.
|
||||
|
||||
Resource forks were an important part of the Classic Mac OS, where they provided a standard way to store structured file data, metadata and application resources. This usage continued into Mac OS X (now called macOS) for backward compatibility, but over time resource forks became less commonly used in favor of simple data fork-only formats, application bundles, and extended attributes.
|
||||
|
||||
As of OS X 10.8 and the deprecation of the Carbon API, macOS no longer provides any officially supported APIs for using and manipulating resource data. Despite this, parts of macOS still support and use resource forks, for example to store custom file and folder icons set by the user.
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
* Pure Python, cross-platform - no native Mac APIs are used.
|
||||
* Provides both a Python API and a command-line tool.
|
||||
* Resource data can be read from either the resource fork or the data fork.
|
||||
|
||||
* On Mac systems, the correct fork is selected automatically when reading a file. This allows reading both regular resource forks and resource data stored in data forks (as with ``.rsrc`` and similar files).
|
||||
* On non-Mac systems, resource forks are not available, so the data fork is always used.
|
||||
|
||||
* Compressed resources (supported by System 7 through Mac OS 9) are automatically decompressed.
|
||||
|
||||
* Only the standard System 7.0 resource compression methods are supported. Resources that use non-standard decompressors cannot be decompressed.
|
||||
|
||||
* Object ``repr``\s are REPL-friendly: all relevant information is displayed, and long data is truncated to avoid filling up the screen by accident.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
@ -11,83 +31,70 @@ Python 3.6 or later. No other libraries are required.
|
||||
Installation
|
||||
------------
|
||||
|
||||
``rsrcfork`` is available `on PyPI`__ and can be installed using ``pip``:
|
||||
``rsrcfork`` is available `on PyPI <https://pypi.org/project/rsrcfork/>`_ and can be installed using ``pip``:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
python3 -m pip install rsrcfork
|
||||
python3 -m pip install rsrcfork
|
||||
|
||||
Alternatively you can run the ``setup.py`` script manually:
|
||||
Alternatively you can download the source code manually, and run this command in the source code directory to install it:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
python3 setup.py install
|
||||
|
||||
__ https://pypi.python.org/pypi/rsrcfork
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
* Reading resources from data or resource forks (the latter only work on macOS of course)
|
||||
* Reading data lazily with seeking, or sequentially without seeking
|
||||
* Accessing resource data and attributes by their type code and ID, using a mapping-like interface
|
||||
* REPL-friendly ``repr``\s that truncate long resource data so it doesn't fill the entire screen
|
||||
python3 -m pip install .
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Simple example
|
||||
``````````````
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import rsrcfork
|
||||
>>> rf = rsrcfork.open("/Users/Shared/Test.textClipping")
|
||||
>>> rf
|
||||
<rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
|
||||
>>> rf[b"TEXT"]
|
||||
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(resource_type=b'TEXT', resource_id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
|
||||
>>> import rsrcfork
|
||||
>>> rf = rsrcfork.open("/Users/Shared/Test.textClipping")
|
||||
>>> rf
|
||||
<rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
|
||||
>>> rf[b"TEXT"]
|
||||
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(resource_type=b'TEXT', resource_id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
|
||||
|
||||
Automatic selection of data/resource fork
|
||||
`````````````````````````````````````````
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import rsrcfork
|
||||
>>> datarf = rsrcfork.open("/System/Library/Fonts/Monaco.dfont") # Resources in data fork
|
||||
>>> datarf._stream
|
||||
<_io.BufferedReader name='/System/Library/Fonts/Monaco.dfont'>
|
||||
>>> resourcerf = rsrcfork.open("/Users/Shared/Test.textClipping") # Resources in resource fork
|
||||
>>> resourcerf._stream
|
||||
<_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'>
|
||||
>>> import rsrcfork
|
||||
>>> datarf = rsrcfork.open("/System/Library/Fonts/Monaco.dfont") # Resources in data fork
|
||||
>>> datarf._stream
|
||||
<_io.BufferedReader name='/System/Library/Fonts/Monaco.dfont'>
|
||||
>>> resourcerf = rsrcfork.open("/Users/Shared/Test.textClipping") # Resources in resource fork
|
||||
>>> resourcerf._stream
|
||||
<_io.BufferedReader name='/Users/Shared/Test.textClipping/..namedfork/rsrc'>
|
||||
|
||||
Command-line interface
|
||||
``````````````````````
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: sh
|
||||
$ python3 -m rsrcfork /Users/Shared/Test.textClipping
|
||||
No header system data
|
||||
No header application data
|
||||
No file attributes
|
||||
4 resource types:
|
||||
'utxt': 1 resources:
|
||||
(256), unnamed, no attributes, 34 bytes
|
||||
|
||||
'utf8': 1 resources:
|
||||
(256), unnamed, no attributes, 17 bytes
|
||||
|
||||
'TEXT': 1 resources:
|
||||
(256), unnamed, no attributes, 17 bytes
|
||||
|
||||
'drag': 1 resources:
|
||||
(128), unnamed, no attributes, 64 bytes
|
||||
|
||||
$ python3 -m rsrcfork /Users/Shared/Test.textClipping "'TEXT' (256)"
|
||||
Resource 'TEXT' (256), unnamed, no attributes, 17 bytes:
|
||||
00000000 48 65 72 65 20 69 73 20 73 6f 6d 65 20 74 65 78 |Here is some tex|
|
||||
00000010 74 |t|
|
||||
00000011
|
||||
|
||||
|
||||
$ python3 -m rsrcfork /Users/Shared/Test.textClipping
|
||||
4 resource types:
|
||||
'utxt': 1 resources:
|
||||
(256): 34 bytes
|
||||
|
||||
'utf8': 1 resources:
|
||||
(256): 17 bytes
|
||||
|
||||
'TEXT': 1 resources:
|
||||
(256): 17 bytes
|
||||
|
||||
'drag': 1 resources:
|
||||
(128): 64 bytes
|
||||
|
||||
$ python3 -m rsrcfork /Users/Shared/Test.textClipping "'TEXT' (256)"
|
||||
Resource 'TEXT' (256): 17 bytes:
|
||||
00000000 48 65 72 65 20 69 73 20 73 6f 6d 65 20 74 65 78 |Here is some tex|
|
||||
00000010 74 |t|
|
||||
00000011
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
@ -104,53 +111,81 @@ Further info on resource files
|
||||
Sources of information about the resource fork data format, and the structure of common resource types:
|
||||
|
||||
* Inside Macintosh, Volume I, Chapter 5 "The Resource Manager". This book can probably be obtained in physical form somewhere, but the relevant chapter/book is also available in a few places online:
|
||||
- `Apple's legacy documentation`__
|
||||
- pagetable.com, a site that happened to have a copy of the book: `info blog post`__, `direct download`__
|
||||
* `Wikipedia`__, of course
|
||||
* The `Resource Fork`__ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait)
|
||||
* The `KSFL`__ library (and `its wiki`__), written in Java, which supports reading and writing resource files
|
||||
* Apple's macOS SDK, which is distributed with Xcode. The latest version of Xcode is available for free from the Mac App Store. Current and previous versions can be downloaded from `the Apple Developer download page`__. Accessing these downloads requires an Apple ID with (at least) a free developer program membership.
|
||||
|
||||
* `Apple's legacy documentation <https://developer.apple.com/legacy/library/documentation/mac/pdf/MoreMacintoshToolbox.pdf>`_
|
||||
* pagetable.com, a site that happened to have a copy of the book: `info blog post <http://www.pagetable.com/?p=50>`_, `direct download <http://www.weihenstephan.org/~michaste/pagetable/mac/Inside_Macintosh.pdf>`_
|
||||
|
||||
* `Wikipedia <https://en.wikipedia.org/wiki/Resource_fork>`_, of course
|
||||
* The `Resource Fork <http://fileformats.archiveteam.org/wiki/Resource_Fork>`_ article on "Just Solve the File Format Problem" (despite the title, this is a decent site and not clickbait)
|
||||
* The `KSFL <https://github.com/kreativekorp/ksfl>`_ library (and `its wiki <https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format>`_), written in Java, which supports reading and writing resource files
|
||||
* Alysis Software Corporation's article on resource compression (found on `the company's website <http://www.alysis.us/arctechnology.htm>`_ and in `MacTech Magazine's online archive <http://preserve.mactech.com/articles/mactech/Vol.09/09.01/ResCompression/index.html>`_) has some information on the structure of certain kinds of compressed resources.
|
||||
* Apple's macOS SDK, which is distributed with Xcode. The latest version of Xcode is available for free from the Mac App Store. Current and previous versions can be downloaded from `the Apple Developer download page <https://developer.apple.com/download/more/>`_. Accessing these downloads requires an Apple ID with (at least) a free developer program membership.
|
||||
* Apple's MPW (Macintosh Programmer's Workshop) and related developer tools. These were previously available from Apple's FTP server at ftp://ftp.apple.com/, which is no longer functional. Because of this, these downloads are only available on mirror sites, such as http://staticky.com/mirrors/ftp.apple.com/.
|
||||
|
||||
If these links are no longer functional, some are archived in the `Internet Archive Wayback Machine`__ or `archive.is`__ aka `archive.fo`__.
|
||||
|
||||
__ https://developer.apple.com/legacy/library/documentation/mac/pdf/MoreMacintoshToolbox.pdf
|
||||
|
||||
__ http://www.pagetable.com/?p=50
|
||||
|
||||
__ http://www.weihenstephan.org/~michaste/pagetable/mac/Inside_Macintosh.pdf
|
||||
|
||||
__ https://en.wikipedia.org/wiki/Resource_fork
|
||||
|
||||
__ http://fileformats.archiveteam.org/wiki/Resource_Fork
|
||||
|
||||
__ https://github.com/kreativekorp/ksfl
|
||||
|
||||
__ https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format
|
||||
|
||||
__ https://developer.apple.com/download/more/
|
||||
|
||||
__ https://archive.org/web/
|
||||
|
||||
__ http://archive.is/
|
||||
|
||||
__ https://archive.fo/
|
||||
If these links are no longer functional, some are archived in the `Internet Archive Wayback Machine <https://archive.org/web/>`_ or `archive.is <http://archive.is/>`_ aka `archive.fo <https://archive.fo/>`_.
|
||||
|
||||
Changelog
|
||||
---------
|
||||
|
||||
Version 1.2.0
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Added support for compressed resources.
|
||||
|
||||
* Compressed resource data is automatically decompressed, both in the Python API and on the command line.
|
||||
* This is technically a breaking change, since in previous versions the compressed resource data was returned directly. However, this change will not affect end users negatively, unless one has already implemented custom handling for compressed resources.
|
||||
* Currently, only the three standard System 7.0 compression formats (``'dcmp'`` IDs 0, 1, 2) are supported. Attempting to access a resource compressed in an unsupported format results in a ``DecompressError``.
|
||||
* To access the raw resource data as stored in the file, without automatic decompression, use the ``res.data_raw`` attribute (for the Python API), or the ``--no-decompress`` option (for the command-line interface). This can be used to read the resource data in its compressed form, even if the compression format is not supported.
|
||||
|
||||
* Improved automatic data/resource fork selection for files whose resource fork contains invalid data.
|
||||
|
||||
* This fixes reading certain system files with resource data in their data fork (such as HIToolbox.rsrc in HIToolbox.framework, or .dfont fonts) on recent macOS versions (at least macOS 10.14, possibly earlier). Although these files have no resource fork, recent macOS versions will successfully open the resource fork and return garbage data for it. This behavior is now detected and handled by using the data fork instead.
|
||||
|
||||
* Replaced the ``rsrcfork`` parameter of ``rsrcfork.open``/``ResourceFork.open`` with a new ``fork`` parameter. ``fork`` accepts string values (like the command line ``--fork`` option) rather than ``rsrcfork``'s hard to understand ``None``/``True``/``False``.
|
||||
|
||||
* The old ``rsrcfork`` parameter has been deprecated and will be removed in the future, but for now it still works as before.
|
||||
|
||||
* Added an explanatory message when a resource filter on the command line doesn't match any resources in the resource file. Previously there would either be no output or a confusing error, depending on the selected ``--format``.
|
||||
* Changed resource type codes and names to be displayed in MacRoman instead of escaping all non-ASCII characters.
|
||||
* Cleaned up the resource descriptions in listings and dumps to improve readability. Previously they included some redundant or unnecessary information - for example, each resource with no attributes set would be explicitly marked as "no attributes".
|
||||
* Unified the formats of resource descriptions in listings and dumps, which were previously slightly different from each other.
|
||||
* Improved error messages when attempting to read multiple resources using ``--format=hex`` or ``--format=raw``.
|
||||
* Fixed reading from non-seekable streams not working for some resource files.
|
||||
* Removed the ``allow_seek`` parameter of ``ResourceFork.__init__`` and the ``--read-mode`` command line option. They are no longer necessary, and were already practically useless before due to non-seekable stream reading being broken.
|
||||
|
||||
Version 1.1.3.post1
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
* Fixed a formatting error in the README.rst to allow upload to PyPI.
|
||||
|
||||
Version 1.1.3
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
**Note: This version is not available on PyPI, see version 1.1.3.post1 changelog for details.**
|
||||
|
||||
* Added a setuptools entry point for the command-line interface. This allows calling it using just ``rsrcfork`` instead of ``python3 -m rsrcfork``.
|
||||
* Changed the default value of ``ResourceFork.__init__``'s ``close`` keyword argument from ``True`` to ``False``. This matches the behavior of classes like ``zipfile.ZipFile`` and ``tarfile.TarFile``.
|
||||
* Fixed ``ResourceFork.open`` and ``ResourceFork.__init__`` not closing their streams in some cases.
|
||||
* Refactored the single ``rsrcfork.py`` file into a package. This is an internal change and should have no effect on how the ``rsrcfork`` module is used.
|
||||
|
||||
Version 1.1.2
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Added support for the resource file attributes "Resources Locked" and "Printer Driver MultiFinder Compatible" from ResEdit.
|
||||
* Added more dummy constants for resource attributes with unknown meaning, so that resource files containing such attributes can be loaded without errors.
|
||||
|
||||
Version 1.1.1
|
||||
`````````````
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Fixed overflow issue with empty resource files or empty resource type entries
|
||||
* Changed ``_hexdump`` to behave more like ``hexdump -C``
|
||||
|
||||
Version 1.1.0
|
||||
`````````````
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Added a command-line interface - run ``python3 -m rsrcfork --help`` for more info
|
||||
|
||||
Version 1.0.0
|
||||
`````````````
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
* Initial version
|
||||
|
779
rsrcfork.py
779
rsrcfork.py
@ -1,779 +0,0 @@
|
||||
"""A library for reading old Macintosh resource manager data, as found in resource forks or .rsrc files even on current Mac OS X/macOS systems.
|
||||
|
||||
This library only understands the resource file's general structure, i. e. the type codes, IDs, attributes, and data of the resources stored in the file. The data of individual resources is provided in raw bytes form and is not processed further - the format of this data is specific to each resource type.
|
||||
|
||||
Writing resource data is not supported at all.
|
||||
"""
|
||||
|
||||
import collections
|
||||
import collections.abc
|
||||
import enum
|
||||
import io
|
||||
import os
|
||||
import struct
|
||||
import sys
|
||||
import typing
|
||||
|
||||
__all__ = [
|
||||
"Resource",
|
||||
"ResourceAttrs",
|
||||
"ResourceFile",
|
||||
"ResourceFileAttrs",
|
||||
"open",
|
||||
]
|
||||
|
||||
__version__ = "1.1.1"
|
||||
|
||||
# Translation table to replace ASCII non-printable characters with periods.
|
||||
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
|
||||
|
||||
# The formats of all following structures is as described in the Inside Macintosh book (see module docstring).
|
||||
# Signedness and byte order of the integers is never stated explicitly in IM.
|
||||
# All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs.
|
||||
# Almost all integers are non-negative byte counts or offsets, so it only makes sense for them to be unsigned. Sometimes the number -1 is used as a placeholder value, it should be considered equivalent to its two's complement value interpreted as unsigned (i. e. all bits set). The only exception is the resource ID field, which is signed.
|
||||
|
||||
# Resource file header, found at the start of the resource file.
|
||||
# 4 bytes: Offset from beginning of resource file to resource data. Basically guaranteed to be 0x100.
|
||||
# 4 bytes: Offset from beginning of resource file to resource map.
|
||||
# 4 bytes: Length of resource data.
|
||||
# 4 bytes: Length of resource map.
|
||||
# 112 bytes: System-reserved data. In practice, this is usually all null bytes.
|
||||
# 128 bytes: Application-specific data. In practice, this is usually all null bytes.
|
||||
STRUCT_RESOURCE_HEADER = struct.Struct(">IIII112s128s")
|
||||
|
||||
# Header for a single resource data block, found immediately before the resource data itself.
|
||||
# 4 bytes: Length of following resource data.
|
||||
STRUCT_RESOURCE_DATA_HEADER = struct.Struct(">I")
|
||||
|
||||
# Header for the resource map, found immediately after the last resource data block. This position is also indicated in the header.
|
||||
# 16 bytes: Reserved for copy of resource header (in memory). Should be 0 in the file.
|
||||
# 4 bytes: Reserved for handle to next resource map to be searched (in memory). Should be 0 in file.
|
||||
# 2 bytes: Reserved for file reference number (in memory). Should be 0 in file.
|
||||
# 2 bytes: Resource file attributes. Combination of ResourceFileAttrs flags, see below.
|
||||
# 2 bytes: Offset from beginning of resource map to type list.
|
||||
# 2 bytes: Offset from beginning of resource map to resource name list.
|
||||
STRUCT_RESOURCE_MAP_HEADER = struct.Struct(">16x4x2xHHH")
|
||||
|
||||
# Header for the type list, found immediately after the resource map header.
|
||||
# 2 bytes: Number of resource types in the map minus 1.
|
||||
STRUCT_RESOURCE_TYPE_LIST_HEADER = struct.Struct(">H")
|
||||
|
||||
# A single type in the type list.
|
||||
# 4 bytes: Resource type. This is usually a 4-character ASCII mnemonic, but may be any 4 bytes.
|
||||
# 2 bytes: Number of resources of this type in the map minus 1.
|
||||
# 2 bytes: Offset from beginning of type list to reference list for resources of this type.
|
||||
STRUCT_RESOURCE_TYPE = struct.Struct(">4sHH")
|
||||
|
||||
# A single resource reference in a reference list. (A reference list has no header, and neither does the list of reference lists.)
|
||||
# 2 bytes: Resource ID.
|
||||
# 2 bytes: Offset from beginning of resource name list to length of resource name, or -1 (0xffff) if none.
|
||||
# 1 byte: Resource attributes. Combination of ResourceAttrs flags, see below. (Note: packed into 4 bytes together with the next 3 bytes.)
|
||||
# 3 bytes: Offset from beginning of resource data to length of data for this resource. (Note: packed into 4 bytes together with the previous 1 byte.)
|
||||
# 4 bytes: Reserved for handle to resource (in memory). Should be 0 in file.
|
||||
STRUCT_RESOURCE_REFERENCE = struct.Struct(">hHI4x")
|
||||
|
||||
# Header for a resource name, found immediately before the name itself. (The name list has no header.)
|
||||
# 1 byte: Length of following resource name.
|
||||
STRUCT_RESOURCE_NAME_HEADER = struct.Struct(">B")
|
||||
|
||||
class ResourceFileAttrs(enum.Flag):
|
||||
"""Resource file attribute flags. The descriptions for these flags are taken from comments on the map*Bit and map* enum constants in <CarbonCore/Resources.h>."""
|
||||
|
||||
mapReadOnly = 128 # "is this file read-only?", "Resource file read-only"
|
||||
mapCompact = 64 # "Is a compact necessary?", "Compact resource file"
|
||||
mapChanged = 32 # "Is it necessary to write map?", "Write map out at update"
|
||||
_UNKNOWN_16 = 16
|
||||
_UNKNOWN_8 = 8
|
||||
_UNKNOWN_4 = 4
|
||||
_UNKNOWN_2 = 2
|
||||
_UNKNWON_1 = 1
|
||||
|
||||
class ResourceAttrs(enum.Flag):
|
||||
"""Resource attribute flags. The descriptions for these flags are taken from comments on the res*Bit and res* enum constants in <CarbonCore/Resources.h>."""
|
||||
|
||||
resSysRef = 128 # "reference to system/local reference" (only documented as resSysRefBit = 7 in <CarbonCore/Resources.h>
|
||||
resSysHeap = 64 # "In system/in application heap", "System or application heap?"
|
||||
resPurgeable = 32 # "Purgeable/not purgeable", "Purgeable resource?"
|
||||
resLocked = 16 # "Locked/not locked", "Load it in locked?"
|
||||
resProtected = 8 # "Protected/not protected", "Protected?"
|
||||
resPreload = 4 # "Read in at OpenResource?", "Load in on OpenResFile?"
|
||||
resChanged = 2 # "Existing resource changed since last update", "Resource changed?"
|
||||
resCompressed = 1 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format)
|
||||
|
||||
_REZ_ATTR_NAMES = {
|
||||
ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
|
||||
ResourceAttrs.resSysHeap: "sysheap",
|
||||
ResourceAttrs.resPurgeable: "purgeable",
|
||||
ResourceAttrs.resLocked: "locked",
|
||||
ResourceAttrs.resProtected: "protected",
|
||||
ResourceAttrs.resPreload: "preload",
|
||||
ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
|
||||
ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
|
||||
}
|
||||
|
||||
F = typing.TypeVar("F", bound=enum.Flag, covariant=True)
|
||||
def _decompose_flags(value: F) -> typing.Sequence[F]:
|
||||
"""Decompose an enum.Flags instance into separate enum constants."""
|
||||
|
||||
return [bit for bit in type(value) if bit in value]
|
||||
|
||||
class Resource(object):
|
||||
"""A single resource from a resource file."""
|
||||
|
||||
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data")
|
||||
|
||||
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data: bytes):
|
||||
"""Create a new resource with the given type code, ID, name, attributes, and data."""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.resource_type: bytes = resource_type
|
||||
self.resource_id: int = resource_id
|
||||
self.name: typing.Optional[bytes] = name
|
||||
self.attributes: ResourceAttrs = attributes
|
||||
self.data: bytes = data
|
||||
|
||||
def __repr__(self):
|
||||
if len(self.data) > 32:
|
||||
data = f"<{len(self.data)} bytes: {self.data[:32]}...>"
|
||||
else:
|
||||
data = repr(self.data)
|
||||
|
||||
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data})"
|
||||
|
||||
class ResourceFile(collections.abc.Mapping):
|
||||
"""A resource file reader operating on a byte stream."""
|
||||
|
||||
# noinspection PyProtectedMember
|
||||
class _LazyResourceMap(collections.abc.Mapping):
|
||||
"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile."""
|
||||
|
||||
def __init__(self, resfile: "ResourceFile", restype: bytes):
|
||||
"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype."""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self._resfile: "ResourceFile" = resfile
|
||||
self._restype: bytes = restype
|
||||
self._submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]] = self._resfile._references[self._restype]
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of resources with this type code."""
|
||||
|
||||
return len(self._submap)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over the IDs of all resources with this type code."""
|
||||
|
||||
return iter(self._submap)
|
||||
|
||||
def __contains__(self, key: int):
|
||||
"""Check if a resource with the given ID exists for this type code."""
|
||||
|
||||
return key in self._submap
|
||||
|
||||
def __getitem__(self, key: int) -> Resource:
|
||||
"""Get a resource with the given ID for this type code."""
|
||||
|
||||
name_offset, attributes, data_offset = self._submap[key]
|
||||
|
||||
if name_offset == 0xffff:
|
||||
name = None
|
||||
elif self._resfile._allow_seek:
|
||||
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
|
||||
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||
name = self._resfile._read(name_length)
|
||||
else:
|
||||
name = self._resfile._resource_names[name_offset]
|
||||
|
||||
if self._resfile._allow_seek:
|
||||
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
|
||||
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
data = self._resfile._read(data_length)
|
||||
else:
|
||||
data = self._resfile._resource_data[data_offset]
|
||||
|
||||
return Resource(self._restype, key, name, attributes, data)
|
||||
|
||||
def __repr__(self):
|
||||
if len(self) == 1:
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>"
|
||||
else:
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
|
||||
|
||||
@classmethod
|
||||
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, rsrcfork: typing.Optional[bool]=None, **kwargs) -> "ResourceFile":
|
||||
"""Open the file at the given path as a ResourceFile.
|
||||
|
||||
If rsrcfork is not None, it is treated as boolean and controls whether the data or resource fork of the file should be opened. (On systems other than macOS, opening resource forks will not work of course, since they don't exist.)
|
||||
If rsrcfork is None, guess whether the data or resource fork should be opened. If the resource fork exists and is not empty, it is opened, otherwise the data fork is opened instead.
|
||||
"""
|
||||
|
||||
f: typing.io.BinaryIO
|
||||
if rsrcfork is None:
|
||||
# Determine whether the file has a usable resource fork.
|
||||
try:
|
||||
# Try to open the resource fork.
|
||||
f = io.open(os.path.join(filename, "..namedfork", "rsrc"), "rb")
|
||||
except (FileNotFoundError, NotADirectoryError):
|
||||
# If the resource fork doesn't exist, fall back to the data fork.
|
||||
f = io.open(filename, "rb")
|
||||
else:
|
||||
# Resource fork exists, check if it actually contains anything.
|
||||
if f.read(1):
|
||||
# Resource fork contains data, seek back to start before using it.
|
||||
f.seek(0)
|
||||
else:
|
||||
# Resource fork contains no data, fall back to the data fork.
|
||||
f = io.open(filename, "rb")
|
||||
elif rsrcfork:
|
||||
# Force use of the resource fork.
|
||||
f = io.open(os.path.join(filename, "..namedfork", "rsrc"), "rb")
|
||||
else:
|
||||
# Force use of the data fork.
|
||||
f = io.open(filename, "rb")
|
||||
|
||||
# Use the selected fork to build a ResourceFile.
|
||||
return cls(f, **kwargs)
|
||||
|
||||
def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=True):
|
||||
"""Create a ResourceFile wrapping the given byte stream.
|
||||
|
||||
To read resource file data from a bytes object, wrap it in an io.BytesIO.
|
||||
|
||||
allow_seek controls whether seeking should be used when reading the file. If allow_seek is None, stream.seekable() is called to determine whether seeking should be used.
|
||||
If seeking is used, only the file header, map header, resource types, and resource references are read into memory. Resource data and names are loaded on-demand when the respective resource is accessed.
|
||||
If seeking is not used, the entire stream is processed sequentially and read into memory, including all resource data and names. This may be necessary when the stream does not support seeking at all. Memory is usually not a concern, most resource files are not even a megabyte in size.
|
||||
|
||||
close controls whether the stream should be closed when the ResourceFile's close method is called.
|
||||
"""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self._close_stream: bool = close
|
||||
self._stream: typing.io.BinaryIO = stream
|
||||
|
||||
self._allow_seek: bool
|
||||
if allow_seek is None:
|
||||
self._allow_seek = self._stream.seekable()
|
||||
else:
|
||||
self._allow_seek = allow_seek
|
||||
|
||||
if self._allow_seek:
|
||||
self._pos = None
|
||||
self._init_seeking()
|
||||
else:
|
||||
self._pos: int = 0
|
||||
self._init_streaming()
|
||||
|
||||
def _tell(self) -> int:
|
||||
"""Get the current position in the stream. This uses the stream's tell method if seeking is enabled, and an internal counter otherwise."""
|
||||
|
||||
if self._allow_seek:
|
||||
return self._stream.tell()
|
||||
else:
|
||||
return self._pos
|
||||
|
||||
def _read(self, count: int) -> bytes:
|
||||
"""Read count bytes from the stream. If seeking is disabled, this also increments the internal seek counter accordingly."""
|
||||
|
||||
ret = self._stream.read(count)
|
||||
if not self._allow_seek:
|
||||
self._pos += len(ret)
|
||||
return ret
|
||||
|
||||
def _stream_unpack(self, st: struct.Struct) -> typing.Tuple:
|
||||
"""Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method."""
|
||||
|
||||
return st.unpack(self._read(st.size))
|
||||
|
||||
def _read_header(self):
|
||||
"""Read the resource file header, starting at the current stream position."""
|
||||
|
||||
assert self._tell() == 0
|
||||
|
||||
self.data_offset: int
|
||||
self.map_offset: int
|
||||
self.data_length: int
|
||||
self.map_length: int
|
||||
self.header_system_data: bytes
|
||||
self.header_application_data: bytes
|
||||
(
|
||||
self.data_offset,
|
||||
self.map_offset,
|
||||
self.data_length,
|
||||
self.map_length,
|
||||
self.header_system_data,
|
||||
self.header_application_data,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_HEADER)
|
||||
|
||||
assert self._tell() == self.data_offset
|
||||
|
||||
def _read_all_resource_data(self):
|
||||
"""Read all resource data blocks, starting at the current stream position, until self.map_offset is reached."""
|
||||
|
||||
assert self._tell() == self.data_offset
|
||||
|
||||
self._resource_data: typing.MutableMapping[int, bytes] = collections.OrderedDict()
|
||||
|
||||
while self._tell() < self.map_offset:
|
||||
initial_pos = self._tell()
|
||||
(length,) = self._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
assert self._tell() + length <= self.map_offset
|
||||
self._resource_data[initial_pos] = self._read(length)
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
|
||||
def _read_map_header(self):
|
||||
"""Read the map header, starting at the current stream position."""
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
|
||||
self.map_type_list_offset: int
|
||||
self.map_name_list_offset: int
|
||||
(
|
||||
_file_attributes,
|
||||
self.map_type_list_offset,
|
||||
self.map_name_list_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_MAP_HEADER)
|
||||
|
||||
self.file_attributes: ResourceFileAttrs = ResourceFileAttrs(_file_attributes)
|
||||
|
||||
def _read_all_resource_types(self):
|
||||
"""Read all resource types, starting at the current stream position."""
|
||||
|
||||
self._reference_counts: typing.MutableMapping[bytes, int] = collections.OrderedDict()
|
||||
|
||||
(type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER)
|
||||
type_list_length = (type_list_length_m1 + 1) % 0x10000
|
||||
|
||||
for _ in range(type_list_length):
|
||||
(
|
||||
resource_type,
|
||||
count_m1,
|
||||
reflist_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_TYPE)
|
||||
count = (count_m1 + 1) % 0x10000
|
||||
self._reference_counts[resource_type] = count
|
||||
|
||||
def _read_all_references(self):
|
||||
"""Read all resource references, starting at the current stream position."""
|
||||
|
||||
self._references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]] = collections.OrderedDict()
|
||||
|
||||
for resource_type, count in self._reference_counts.items():
|
||||
resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict()
|
||||
self._references[resource_type] = resmap
|
||||
for _ in range(count):
|
||||
(
|
||||
resource_id,
|
||||
name_offset,
|
||||
attributes_and_data_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_REFERENCE)
|
||||
|
||||
attributes = attributes_and_data_offset >> 24
|
||||
data_offset = attributes_and_data_offset & ((1 << 24) - 1)
|
||||
|
||||
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
|
||||
|
||||
def _read_all_resource_names(self):
|
||||
"""Read all resource names, starting at the current stream position, until the end of the map is reached."""
|
||||
|
||||
self._resource_names: typing.MutableMapping[int, bytes] = collections.OrderedDict()
|
||||
|
||||
while self._tell() < self.map_offset + self.map_length:
|
||||
initial_pos = self._tell()
|
||||
(length,) = self._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||
self._resource_names[initial_pos] = self._read(length)
|
||||
|
||||
def _init_seeking(self):
|
||||
"""Initialize self with seeking enabled, by reading the header, map header, resource types, and references."""
|
||||
|
||||
self._read_header()
|
||||
self._stream.seek(self.map_offset)
|
||||
self._read_map_header()
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
|
||||
def _init_streaming(self):
|
||||
"""Initialize self with seeking disabled, by reading the entire file sequentially."""
|
||||
|
||||
self._read_header()
|
||||
self._read_all_resource_data()
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
|
||||
self._read_map_header()
|
||||
|
||||
assert self._tell() == self.map_offset + self.map_type_list_offset
|
||||
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
|
||||
assert self._tell() == self.map_offset + self.map_name_list_offset
|
||||
|
||||
self._read_all_resource_names()
|
||||
|
||||
def close(self):
|
||||
"""Close the underlying stream, unless this behavior was suppressed by passing close=False to the constructor. If seeking is enabled for this ResourceFile, resources can no longer be read after closing the stream. On the other hand, if seeking is disabled, closing the stream does not affect the ResourceFile."""
|
||||
|
||||
if self._close_stream:
|
||||
self._stream.close()
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of resource types in this ResourceFile."""
|
||||
|
||||
return len(self._references)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over all resource types in this ResourceFile."""
|
||||
|
||||
return iter(self._references)
|
||||
|
||||
def __contains__(self, key: bytes):
|
||||
"""Check whether this ResourceFile contains any resources of the given type."""
|
||||
|
||||
return key in self._references
|
||||
|
||||
def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap":
|
||||
"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
|
||||
|
||||
return ResourceFile._LazyResourceMap(self, key)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
|
||||
|
||||
open = ResourceFile.open
|
||||
|
||||
# The following internal functions are only used by the main function.
|
||||
|
||||
def _bytes_unescape(string: str) -> bytes:
|
||||
"""Convert a string containing ASCII characters and hex escapes to a bytestring.
|
||||
|
||||
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
|
||||
"""
|
||||
|
||||
out = []
|
||||
it = iter(string)
|
||||
n = 0
|
||||
for char in it:
|
||||
if char == "\\":
|
||||
try:
|
||||
esc = next(it)
|
||||
if esc in "\\\'\"":
|
||||
out.append(esc)
|
||||
elif esc == "x":
|
||||
x1, x2 = next(it), next(it)
|
||||
out.append(int(x1+x2, 16))
|
||||
else:
|
||||
raise ValueError(f"Unknown escape character: {esc}")
|
||||
except StopIteration:
|
||||
raise ValueError("End of string in escape sequence")
|
||||
else:
|
||||
out.append(ord(char))
|
||||
n += 1
|
||||
|
||||
return bytes(out)
|
||||
|
||||
def _bytes_escape(bs: bytes, *, quote: str=None) -> str:
|
||||
"""Convert a bytestring to a string, with non-ASCII bytes hex-escaped.
|
||||
|
||||
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
|
||||
"""
|
||||
|
||||
out = []
|
||||
for byte in bs:
|
||||
c = chr(byte)
|
||||
if c in {quote, "\\"}:
|
||||
out.append(f"\\{c}")
|
||||
elif 0x20 <= byte < 0x7f:
|
||||
out.append(c)
|
||||
else:
|
||||
out.append(f"\\x{byte:02x}")
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def _filter_resources(rf: ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[Resource]:
|
||||
matching = collections.OrderedDict()
|
||||
|
||||
for filter in filters:
|
||||
if len(filter) == 4:
|
||||
try:
|
||||
resources = rf[filter.encode("ascii")]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
for res in resources.values():
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
elif filter[0] == filter[-1] == "'":
|
||||
try:
|
||||
resources = rf[_bytes_unescape(filter[1:-1])]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
for res in resources.values():
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
else:
|
||||
pos = filter.find("'", 1)
|
||||
if pos == -1:
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
|
||||
elif filter[pos + 1] != " ":
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
|
||||
|
||||
restype, resid = filter[:pos + 1], filter[pos + 2:]
|
||||
|
||||
if not restype[0] == restype[-1] == "'":
|
||||
raise ValueError(
|
||||
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}")
|
||||
restype = _bytes_unescape(restype[1:-1])
|
||||
|
||||
if len(restype) != 4:
|
||||
raise ValueError(
|
||||
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
|
||||
|
||||
if resid[0] != "(" or resid[-1] != ")":
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
|
||||
resid = resid[1:-1]
|
||||
|
||||
try:
|
||||
resources = rf[restype]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
if resid[0] == resid[-1] == '"':
|
||||
name = _bytes_unescape(resid[1:-1])
|
||||
|
||||
for res in resources.values():
|
||||
if res.name == name:
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
break
|
||||
elif ":" in resid:
|
||||
if resid.count(":") > 1:
|
||||
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}")
|
||||
start, end = resid.split(":")
|
||||
start, end = int(start), int(end)
|
||||
|
||||
for res in resources.values():
|
||||
if start <= res.resource_id <= end:
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
else:
|
||||
resid = int(resid)
|
||||
try:
|
||||
res = resources[resid]
|
||||
except KeyError:
|
||||
continue
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
|
||||
return list(matching.values())
|
||||
|
||||
def _hexdump(data: bytes):
|
||||
for i in range(0, len(data), 16):
|
||||
line = data[i:i + 16]
|
||||
line_hex = " ".join(f"{byte:02x}" for byte in line)
|
||||
line_char = line.decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES)
|
||||
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
|
||||
|
||||
if data:
|
||||
print(f"{len(data):08x}")
|
||||
|
||||
def _raw_hexdump(data: bytes):
|
||||
for i in range(0, len(data), 16):
|
||||
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
|
||||
|
||||
def main(args: typing.Sequence[str]):
|
||||
import argparse
|
||||
import textwrap
|
||||
|
||||
ap = argparse.ArgumentParser(
|
||||
add_help=False,
|
||||
fromfile_prefix_chars="@",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent("""
|
||||
Read and display resources from a file's resource or data fork.
|
||||
|
||||
When specifying resource filters, each one may be of one of the
|
||||
following forms:
|
||||
|
||||
An unquoted type name (without escapes): TYPE
|
||||
A quoted type name: 'TYPE'
|
||||
A quoted type name and an ID: 'TYPE' (42)
|
||||
A quoted type name and an ID range: 'TYPE' (24:42)
|
||||
A quoted type name and a resource name: 'TYPE' ("foobar")
|
||||
|
||||
When multiple filters are specified, all resources matching any of them
|
||||
are displayed.
|
||||
"""),
|
||||
)
|
||||
|
||||
ap.add_argument("--help", action="help", help="Display this help message and exit")
|
||||
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
|
||||
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
|
||||
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
|
||||
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
||||
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
||||
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
||||
ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)")
|
||||
|
||||
ap.add_argument("file", help="The file to read, or - for stdin")
|
||||
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
||||
|
||||
ns = ap.parse_args(args)
|
||||
|
||||
ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork]
|
||||
ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode]
|
||||
|
||||
if ns.file == "-":
|
||||
if ns.fork is not None:
|
||||
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
rf = ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode)
|
||||
else:
|
||||
rf = ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode)
|
||||
|
||||
with rf:
|
||||
if ns.header_system or ns.header_application:
|
||||
if ns.header_system:
|
||||
data = rf.header_system_data
|
||||
else:
|
||||
data = rf.header_application_data
|
||||
|
||||
if ns.format == "dump":
|
||||
_hexdump(data)
|
||||
elif ns.format == "hex":
|
||||
_raw_hexdump(data)
|
||||
elif ns.format == "raw":
|
||||
sys.stdout.buffer.write(data)
|
||||
elif ns.format == "derez":
|
||||
print("Cannot output file header data in derez format", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||
elif ns.filter or ns.all:
|
||||
if ns.filter:
|
||||
resources = _filter_resources(rf, ns.filter)
|
||||
else:
|
||||
resources = []
|
||||
for reses in rf.values():
|
||||
resources.extend(reses.values())
|
||||
|
||||
if ns.format in ("hex", "raw") and len(resources) != 1:
|
||||
print(f"Format {ns.format} only supports exactly one resource, but found {len(resources)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
for res in resources:
|
||||
if ns.format == "dump":
|
||||
# Human-readable info and hex dump
|
||||
|
||||
if res.name is None:
|
||||
name = "unnamed"
|
||||
else:
|
||||
name = _bytes_escape(res.name, quote='"')
|
||||
name = f'name "{name}"'
|
||||
|
||||
attrs = _decompose_flags(res.attributes)
|
||||
if attrs:
|
||||
attrdesc = "attributes: " + " | ".join(attr.name for attr in attrs)
|
||||
else:
|
||||
attrdesc = "no attributes"
|
||||
|
||||
restype = _bytes_escape(res.resource_type, quote="'")
|
||||
print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {len(res.data)} bytes:")
|
||||
_hexdump(res.data)
|
||||
print()
|
||||
elif ns.format == "hex":
|
||||
# Data only as hex
|
||||
|
||||
_raw_hexdump(res.data)
|
||||
elif ns.format == "raw":
|
||||
# Data only as raw bytes
|
||||
|
||||
sys.stdout.buffer.write(res.data)
|
||||
elif ns.format == "derez":
|
||||
# Like DeRez with no resource definitions
|
||||
|
||||
attrs = [_REZ_ATTR_NAMES[attr] for attr in _decompose_flags(res.attributes)]
|
||||
if None in attrs:
|
||||
attrs[:] = [f"${res.attributes.value:02X}"]
|
||||
|
||||
parts = [str(res.resource_id)]
|
||||
|
||||
if res.name is not None:
|
||||
name = _bytes_escape(res.name, quote='"')
|
||||
parts.append(f'"{name}"')
|
||||
|
||||
parts += attrs
|
||||
|
||||
restype = _bytes_escape(res.resource_type, quote="'")
|
||||
print(f"data '{restype}' ({', '.join(parts)}) {{")
|
||||
|
||||
for i in range(0, len(res.data), 16):
|
||||
# Two-byte grouping is really annoying to implement.
|
||||
groups = []
|
||||
for j in range(0, 16, 2):
|
||||
if i+j >= len(res.data):
|
||||
break
|
||||
elif i+j+1 >= len(res.data):
|
||||
groups.append(f"{res.data[i+j]:02X}")
|
||||
else:
|
||||
groups.append(f"{res.data[i+j]:02X}{res.data[i+j+1]:02X}")
|
||||
|
||||
s = f'$"{" ".join(groups)}"'
|
||||
comment = "/* " + res.data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */"
|
||||
print(f"\t{s:<54s}{comment}")
|
||||
|
||||
print("};")
|
||||
print()
|
||||
else:
|
||||
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||
else:
|
||||
if rf.header_system_data != bytes(len(rf.header_system_data)):
|
||||
print("Header system data:")
|
||||
_hexdump(rf.header_system_data)
|
||||
else:
|
||||
print("No header system data")
|
||||
|
||||
if rf.header_application_data != bytes(len(rf.header_application_data)):
|
||||
print("Header application data:")
|
||||
_hexdump(rf.header_application_data)
|
||||
else:
|
||||
print("No header application data")
|
||||
|
||||
attrs = _decompose_flags(rf.file_attributes)
|
||||
if attrs:
|
||||
print("File attributes: " + " | ".join(attr.name for attr in attrs))
|
||||
else:
|
||||
print("No file attributes")
|
||||
|
||||
if len(rf) > 0:
|
||||
print(f"{len(rf)} resource types:")
|
||||
for typecode, resources in rf.items():
|
||||
restype = _bytes_escape(typecode, quote="'")
|
||||
print(f"'{restype}': {len(resources)} resources:")
|
||||
for resid, res in rf[typecode].items():
|
||||
if res.name is None:
|
||||
name = "unnamed"
|
||||
else:
|
||||
name = _bytes_escape(res.name, quote='"')
|
||||
name = f'name "{name}"'
|
||||
|
||||
attrs = _decompose_flags(res.attributes)
|
||||
if attrs:
|
||||
attrdesc = " | ".join(attr.name for attr in attrs)
|
||||
else:
|
||||
attrdesc = "no attributes"
|
||||
|
||||
print(f"({resid}), {name}, {attrdesc}, {len(res.data)} bytes")
|
||||
print()
|
||||
else:
|
||||
print("No resource types (empty resource file)")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
18
rsrcfork/__init__.py
Normal file
18
rsrcfork/__init__.py
Normal file
@ -0,0 +1,18 @@
|
||||
"""A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files."""
|
||||
|
||||
__version__ = "1.2.0"
|
||||
|
||||
__all__ = [
|
||||
"Resource",
|
||||
"ResourceAttrs",
|
||||
"ResourceFile",
|
||||
"ResourceFileAttrs",
|
||||
"compress",
|
||||
"open",
|
||||
]
|
||||
|
||||
from . import api, compress
|
||||
from .api import Resource, ResourceAttrs, ResourceFile, ResourceFileAttrs
|
||||
|
||||
# noinspection PyShadowingBuiltins
|
||||
open = ResourceFile.open
|
382
rsrcfork/__main__.py
Normal file
382
rsrcfork/__main__.py
Normal file
@ -0,0 +1,382 @@
|
||||
import argparse
|
||||
import collections
|
||||
import enum
|
||||
import sys
|
||||
import textwrap
|
||||
import typing
|
||||
|
||||
from . import __version__, api, compress
|
||||
|
||||
# The encoding to use when rendering bytes as text (in four-char codes, strings, hex dumps, etc.) or reading a quoted byte string (from the command line).
|
||||
_TEXT_ENCODING = "MacRoman"
|
||||
|
||||
# Translation table to replace ASCII non-printable characters with periods.
|
||||
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
|
||||
|
||||
_REZ_ATTR_NAMES = {
|
||||
api.ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
|
||||
api.ResourceAttrs.resSysHeap: "sysheap",
|
||||
api.ResourceAttrs.resPurgeable: "purgeable",
|
||||
api.ResourceAttrs.resLocked: "locked",
|
||||
api.ResourceAttrs.resProtected: "protected",
|
||||
api.ResourceAttrs.resPreload: "preload",
|
||||
api.ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
|
||||
api.ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
|
||||
}
|
||||
|
||||
F = typing.TypeVar("F", bound=enum.Flag, covariant=True)
|
||||
def _decompose_flags(value: F) -> typing.Sequence[F]:
|
||||
"""Decompose an enum.Flags instance into separate enum constants."""
|
||||
|
||||
return [bit for bit in type(value) if bit in value]
|
||||
|
||||
def _is_printable(char: str) -> bool:
|
||||
"""Determine whether a character is printable for our purposes.
|
||||
|
||||
We mainly use Python's definition of printable (i. e. everything that Unicode does not consider a separator or "other" character). However, we also treat U+F8FF as printable, which is the private use codepoint used for the Apple logo character.
|
||||
"""
|
||||
|
||||
return char.isprintable() or char == "\uf8ff"
|
||||
|
||||
def _bytes_unescape(string: str) -> bytes:
|
||||
"""Convert a string containing text (in _TEXT_ENCODING) and hex escapes to a bytestring.
|
||||
|
||||
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
|
||||
"""
|
||||
|
||||
out = []
|
||||
it = iter(string)
|
||||
for char in it:
|
||||
if char == "\\":
|
||||
try:
|
||||
esc = next(it)
|
||||
if esc in "\\\'\"":
|
||||
out.append(esc)
|
||||
elif esc == "x":
|
||||
x1, x2 = next(it), next(it)
|
||||
out.append(int(x1+x2, 16))
|
||||
else:
|
||||
raise ValueError(f"Unknown escape character: {esc}")
|
||||
except StopIteration:
|
||||
raise ValueError("End of string in escape sequence")
|
||||
else:
|
||||
out.extend(char.encode(_TEXT_ENCODING))
|
||||
|
||||
return bytes(out)
|
||||
|
||||
def _bytes_escape(bs: bytes, *, quote: str=None) -> str:
|
||||
"""Convert a bytestring to a string (using _TEXT_ENCODING), with non-printable characters hex-escaped.
|
||||
|
||||
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
|
||||
"""
|
||||
|
||||
out = []
|
||||
for byte, char in zip(bs, bs.decode(_TEXT_ENCODING)):
|
||||
if char in {quote, "\\"}:
|
||||
out.append(f"\\{char}")
|
||||
elif _is_printable(char):
|
||||
out.append(char)
|
||||
else:
|
||||
out.append(f"\\x{byte:02x}")
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typing.Sequence[api.Resource]:
|
||||
matching = collections.OrderedDict()
|
||||
|
||||
for filter in filters:
|
||||
if len(filter) == 4:
|
||||
try:
|
||||
resources = rf[filter.encode("ascii")]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
for res in resources.values():
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
elif filter[0] == filter[-1] == "'":
|
||||
try:
|
||||
resources = rf[_bytes_unescape(filter[1:-1])]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
for res in resources.values():
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
else:
|
||||
pos = filter.find("'", 1)
|
||||
if pos == -1:
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
|
||||
elif filter[pos + 1] != " ":
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
|
||||
|
||||
restype, resid = filter[:pos + 1], filter[pos + 2:]
|
||||
|
||||
if not restype[0] == restype[-1] == "'":
|
||||
raise ValueError(
|
||||
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype!r}")
|
||||
restype = _bytes_unescape(restype[1:-1])
|
||||
|
||||
if len(restype) != 4:
|
||||
raise ValueError(
|
||||
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
|
||||
|
||||
if resid[0] != "(" or resid[-1] != ")":
|
||||
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
|
||||
resid = resid[1:-1]
|
||||
|
||||
try:
|
||||
resources = rf[restype]
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
if resid[0] == resid[-1] == '"':
|
||||
name = _bytes_unescape(resid[1:-1])
|
||||
|
||||
for res in resources.values():
|
||||
if res.name == name:
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
break
|
||||
elif ":" in resid:
|
||||
if resid.count(":") > 1:
|
||||
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid!r}")
|
||||
start, end = resid.split(":")
|
||||
start, end = int(start), int(end)
|
||||
|
||||
for res in resources.values():
|
||||
if start <= res.resource_id <= end:
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
else:
|
||||
resid = int(resid)
|
||||
try:
|
||||
res = resources[resid]
|
||||
except KeyError:
|
||||
continue
|
||||
matching[res.resource_type, res.resource_id] = res
|
||||
|
||||
return list(matching.values())
|
||||
|
||||
def _hexdump(data: bytes):
|
||||
for i in range(0, len(data), 16):
|
||||
line = data[i:i + 16]
|
||||
line_hex = " ".join(f"{byte:02x}" for byte in line)
|
||||
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
|
||||
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
|
||||
|
||||
if data:
|
||||
print(f"{len(data):08x}")
|
||||
|
||||
def _raw_hexdump(data: bytes):
|
||||
for i in range(0, len(data), 16):
|
||||
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
|
||||
|
||||
def _describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
|
||||
id_desc_parts = [f"{res.resource_id}"]
|
||||
|
||||
if res.name is not None:
|
||||
name = _bytes_escape(res.name, quote='"')
|
||||
id_desc_parts.append(f'"{name}"')
|
||||
|
||||
id_desc = ", ".join(id_desc_parts)
|
||||
|
||||
content_desc_parts = []
|
||||
|
||||
if decompress and api.ResourceAttrs.resCompressed in res.attributes:
|
||||
try:
|
||||
res.data
|
||||
except compress.DecompressError:
|
||||
length_desc = f"decompression failed ({len(res.data_raw)} bytes compressed)"
|
||||
else:
|
||||
length_desc = f"{len(res.data)} bytes ({len(res.data_raw)} bytes compressed)"
|
||||
else:
|
||||
length_desc = f"{len(res.data_raw)} bytes"
|
||||
content_desc_parts.append(length_desc)
|
||||
|
||||
attrs = _decompose_flags(res.attributes)
|
||||
if attrs:
|
||||
content_desc_parts.append(" | ".join(attr.name for attr in attrs))
|
||||
|
||||
content_desc = ", ".join(content_desc_parts)
|
||||
|
||||
desc = f"({id_desc}): {content_desc}"
|
||||
if include_type:
|
||||
restype = _bytes_escape(res.resource_type, quote="'")
|
||||
desc = f"'{restype}' {desc}"
|
||||
return desc
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(
|
||||
add_help=False,
|
||||
fromfile_prefix_chars="@",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description=textwrap.dedent("""
|
||||
Read and display resources from a file's resource or data fork.
|
||||
|
||||
When specifying resource filters, each one may be of one of the
|
||||
following forms:
|
||||
|
||||
An unquoted type name (without escapes): TYPE
|
||||
A quoted type name: 'TYPE'
|
||||
A quoted type name and an ID: 'TYPE' (42)
|
||||
A quoted type name and an ID range: 'TYPE' (24:42)
|
||||
A quoted type name and a resource name: 'TYPE' ("foobar")
|
||||
|
||||
When multiple filters are specified, all resources matching any of them
|
||||
are displayed.
|
||||
"""),
|
||||
)
|
||||
|
||||
ap.add_argument("--help", action="help", help="Display this help message and exit")
|
||||
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
|
||||
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
|
||||
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
|
||||
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
|
||||
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
||||
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
||||
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
||||
|
||||
ap.add_argument("file", help="The file to read, or - for stdin")
|
||||
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
||||
|
||||
ns = ap.parse_args()
|
||||
|
||||
if ns.file == "-":
|
||||
if ns.fork is not None:
|
||||
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
rf = api.ResourceFile(sys.stdin.buffer)
|
||||
else:
|
||||
rf = api.ResourceFile.open(ns.file, fork=ns.fork)
|
||||
|
||||
with rf:
|
||||
if ns.header_system or ns.header_application:
|
||||
if ns.header_system:
|
||||
data = rf.header_system_data
|
||||
else:
|
||||
data = rf.header_application_data
|
||||
|
||||
if ns.format == "dump":
|
||||
_hexdump(data)
|
||||
elif ns.format == "hex":
|
||||
_raw_hexdump(data)
|
||||
elif ns.format == "raw":
|
||||
sys.stdout.buffer.write(data)
|
||||
elif ns.format == "derez":
|
||||
print("Cannot output file header data in derez format", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||
elif ns.filter or ns.all:
|
||||
if ns.filter:
|
||||
resources = _filter_resources(rf, ns.filter)
|
||||
else:
|
||||
resources = []
|
||||
for reses in rf.values():
|
||||
resources.extend(reses.values())
|
||||
|
||||
if not resources:
|
||||
if ns.format == "dump":
|
||||
print("No resources matched the filter")
|
||||
elif ns.format in ("hex", "raw"):
|
||||
print("No resources matched the filter", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
elif ns.format == "derez":
|
||||
print("/* No resources matched the filter */")
|
||||
else:
|
||||
raise AssertionError(f"Unhandled output format: {ns.format}")
|
||||
elif ns.format in ("hex", "raw") and len(resources) != 1:
|
||||
print(f"Format {ns.format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
for res in resources:
|
||||
if ns.decompress:
|
||||
data = res.data
|
||||
else:
|
||||
data = res.data_raw
|
||||
|
||||
if ns.format == "dump":
|
||||
# Human-readable info and hex dump
|
||||
desc = _describe_resource(res, include_type=True, decompress=ns.decompress)
|
||||
print(f"Resource {desc}:")
|
||||
_hexdump(data)
|
||||
print()
|
||||
elif ns.format == "hex":
|
||||
# Data only as hex
|
||||
|
||||
_raw_hexdump(data)
|
||||
elif ns.format == "raw":
|
||||
# Data only as raw bytes
|
||||
|
||||
sys.stdout.buffer.write(data)
|
||||
elif ns.format == "derez":
|
||||
# Like DeRez with no resource definitions
|
||||
|
||||
attrs = list(_decompose_flags(res.attributes))
|
||||
|
||||
if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
|
||||
attrs.remove(api.ResourceAttrs.resCompressed)
|
||||
attrs_comment = " /* was compressed */"
|
||||
else:
|
||||
attrs_comment = ""
|
||||
|
||||
attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
|
||||
if None in attr_descs:
|
||||
attr_descs[:] = [f"${res.attributes.value:02X}"]
|
||||
|
||||
parts = [str(res.resource_id)]
|
||||
|
||||
if res.name is not None:
|
||||
name = _bytes_escape(res.name, quote='"')
|
||||
parts.append(f'"{name}"')
|
||||
|
||||
parts += attr_descs
|
||||
|
||||
restype = _bytes_escape(res.resource_type, quote="'")
|
||||
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
|
||||
|
||||
for i in range(0, len(data), 16):
|
||||
# Two-byte grouping is really annoying to implement.
|
||||
groups = []
|
||||
for j in range(0, 16, 2):
|
||||
if i+j >= len(data):
|
||||
break
|
||||
elif i+j+1 >= len(data):
|
||||
groups.append(f"{data[i+j]:02X}")
|
||||
else:
|
||||
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
|
||||
|
||||
s = f'$"{" ".join(groups)}"'
|
||||
comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
|
||||
print(f"\t{s:<54s}{comment}")
|
||||
|
||||
print("};")
|
||||
print()
|
||||
else:
|
||||
raise ValueError(f"Unhandled output format: {ns.format}")
|
||||
else:
|
||||
if rf.header_system_data != bytes(len(rf.header_system_data)):
|
||||
print("Header system data:")
|
||||
_hexdump(rf.header_system_data)
|
||||
|
||||
if rf.header_application_data != bytes(len(rf.header_application_data)):
|
||||
print("Header application data:")
|
||||
_hexdump(rf.header_application_data)
|
||||
|
||||
attrs = _decompose_flags(rf.file_attributes)
|
||||
if attrs:
|
||||
print("File attributes: " + " | ".join(attr.name for attr in attrs))
|
||||
|
||||
if len(rf) > 0:
|
||||
print(f"{len(rf)} resource types:")
|
||||
for typecode, resources in rf.items():
|
||||
restype = _bytes_escape(typecode, quote="'")
|
||||
print(f"'{restype}': {len(resources)} resources:")
|
||||
for resid, res in rf[typecode].items():
|
||||
print(_describe_resource(res, include_type=False, decompress=ns.decompress))
|
||||
print()
|
||||
else:
|
||||
print("No resource types (empty resource file)")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
420
rsrcfork/api.py
Normal file
420
rsrcfork/api.py
Normal file
@ -0,0 +1,420 @@
|
||||
import collections
|
||||
import collections.abc
|
||||
import enum
|
||||
import io
|
||||
import os
|
||||
import struct
|
||||
import typing
|
||||
import warnings
|
||||
|
||||
from . import compress
|
||||
|
||||
# The formats of all following structures is as described in the Inside Macintosh book (see module docstring).
|
||||
# Signedness and byte order of the integers is never stated explicitly in IM.
|
||||
# All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs.
|
||||
# Almost all integers are non-negative byte counts or offsets, so it only makes sense for them to be unsigned. Sometimes the number -1 is used as a placeholder value, it should be considered equivalent to its two's complement value interpreted as unsigned (i. e. all bits set). The only exception is the resource ID field, which is signed.
|
||||
|
||||
# Resource file header, found at the start of the resource file.
|
||||
# 4 bytes: Offset from beginning of resource file to resource data. Basically guaranteed to be 0x100.
|
||||
# 4 bytes: Offset from beginning of resource file to resource map.
|
||||
# 4 bytes: Length of resource data.
|
||||
# 4 bytes: Length of resource map.
|
||||
# 112 bytes: System-reserved data. In practice, this is usually all null bytes.
|
||||
# 128 bytes: Application-specific data. In practice, this is usually all null bytes.
|
||||
STRUCT_RESOURCE_HEADER = struct.Struct(">IIII112s128s")
|
||||
|
||||
# Header for a single resource data block, found immediately before the resource data itself.
|
||||
# 4 bytes: Length of following resource data.
|
||||
STRUCT_RESOURCE_DATA_HEADER = struct.Struct(">I")
|
||||
|
||||
# Header for the resource map, found immediately after the last resource data block. This position is also indicated in the header.
|
||||
# 16 bytes: Reserved for copy of resource header (in memory). Should be 0 in the file.
|
||||
# 4 bytes: Reserved for handle to next resource map to be searched (in memory). Should be 0 in file.
|
||||
# 2 bytes: Reserved for file reference number (in memory). Should be 0 in file.
|
||||
# 2 bytes: Resource file attributes. Combination of ResourceFileAttrs flags, see below.
|
||||
# 2 bytes: Offset from beginning of resource map to type list.
|
||||
# 2 bytes: Offset from beginning of resource map to resource name list.
|
||||
STRUCT_RESOURCE_MAP_HEADER = struct.Struct(">16x4x2xHHH")
|
||||
|
||||
# Header for the type list, found immediately after the resource map header.
|
||||
# 2 bytes: Number of resource types in the map minus 1.
|
||||
STRUCT_RESOURCE_TYPE_LIST_HEADER = struct.Struct(">H")
|
||||
|
||||
# A single type in the type list.
|
||||
# 4 bytes: Resource type. This is usually a 4-character ASCII mnemonic, but may be any 4 bytes.
|
||||
# 2 bytes: Number of resources of this type in the map minus 1.
|
||||
# 2 bytes: Offset from beginning of type list to reference list for resources of this type.
|
||||
STRUCT_RESOURCE_TYPE = struct.Struct(">4sHH")
|
||||
|
||||
# A single resource reference in a reference list. (A reference list has no header, and neither does the list of reference lists.)
|
||||
# 2 bytes: Resource ID.
|
||||
# 2 bytes: Offset from beginning of resource name list to length of resource name, or -1 (0xffff) if none.
|
||||
# 1 byte: Resource attributes. Combination of ResourceAttrs flags, see below. (Note: packed into 4 bytes together with the next 3 bytes.)
|
||||
# 3 bytes: Offset from beginning of resource data to length of data for this resource. (Note: packed into 4 bytes together with the previous 1 byte.)
|
||||
# 4 bytes: Reserved for handle to resource (in memory). Should be 0 in file.
|
||||
STRUCT_RESOURCE_REFERENCE = struct.Struct(">hHI4x")
|
||||
|
||||
# Header for a resource name, found immediately before the name itself. (The name list has no header.)
|
||||
# 1 byte: Length of following resource name.
|
||||
STRUCT_RESOURCE_NAME_HEADER = struct.Struct(">B")
|
||||
|
||||
class InvalidResourceFileError(Exception):
|
||||
pass
|
||||
|
||||
class ResourceFileAttrs(enum.Flag):
|
||||
"""Resource file attribute flags. The descriptions for these flags are taken from comments on the map*Bit and map* enum constants in <CarbonCore/Resources.h>."""
|
||||
|
||||
mapResourcesLocked = 1 << 15 # "Resources Locked" (undocumented, but available as a checkbox in ResEdit)
|
||||
_BIT_14 = 1 << 14
|
||||
_BIT_13 = 1 << 13
|
||||
_BIT_12 = 1 << 12
|
||||
_BIT_11 = 1 << 11
|
||||
_BIT_10 = 1 << 10
|
||||
_BIT_9 = 1 << 9
|
||||
mapPrinterDriverMultiFinderCompatible = 1 << 8 # "Printer Driver MultiFinder Compatible" (undocumented, but available as a checkbox in ResEdit)
|
||||
mapReadOnly = 1 << 7 # "is this file read-only?", "Resource file read-only"
|
||||
mapCompact = 1 << 6 # "Is a compact necessary?", "Compact resource file"
|
||||
mapChanged = 1 << 5 # "Is it necessary to write map?", "Write map out at update"
|
||||
_BIT_4 = 1 << 4
|
||||
_BIT_3 = 1 << 3
|
||||
_BIT_2 = 1 << 2
|
||||
_BIT_1 = 1 << 1
|
||||
_BIT_0 = 1 << 0
|
||||
|
||||
class ResourceAttrs(enum.Flag):
|
||||
"""Resource attribute flags. The descriptions for these flags are taken from comments on the res*Bit and res* enum constants in <CarbonCore/Resources.h>."""
|
||||
|
||||
resSysRef = 1 << 7 # "reference to system/local reference" (only documented as resSysRefBit = 7 in <CarbonCore/Resources.h>
|
||||
resSysHeap = 1 << 6 # "In system/in application heap", "System or application heap?"
|
||||
resPurgeable = 1 << 5 # "Purgeable/not purgeable", "Purgeable resource?"
|
||||
resLocked = 1 << 4 # "Locked/not locked", "Load it in locked?"
|
||||
resProtected = 1 << 3 # "Protected/not protected", "Protected?"
|
||||
resPreload = 1 << 2 # "Read in at OpenResource?", "Load in on OpenResFile?"
|
||||
resChanged = 1 << 1 # "Existing resource changed since last update", "Resource changed?"
|
||||
resCompressed = 1 << 0 # "indicates that the resource data is compressed" (only documented in https://github.com/kreativekorp/ksfl/wiki/Macintosh-Resource-File-Format)
|
||||
|
||||
class Resource(object):
|
||||
"""A single resource from a resource file."""
|
||||
|
||||
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data_raw", "_data_decompressed")
|
||||
|
||||
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes):
|
||||
"""Create a new resource with the given type code, ID, name, attributes, and data."""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.resource_type: bytes = resource_type
|
||||
self.resource_id: int = resource_id
|
||||
self.name: typing.Optional[bytes] = name
|
||||
self.attributes: ResourceAttrs = attributes
|
||||
self.data_raw: bytes = data_raw
|
||||
|
||||
def __repr__(self):
|
||||
try:
|
||||
data = self.data
|
||||
except compress.DecompressError:
|
||||
decompress_ok = False
|
||||
data = self.data_raw
|
||||
else:
|
||||
decompress_ok = True
|
||||
|
||||
if len(data) > 32:
|
||||
data_repr = f"<{len(data)} bytes: {data[:32]}...>"
|
||||
else:
|
||||
data_repr = repr(data)
|
||||
|
||||
if not decompress_ok:
|
||||
data_repr = f"<decompression failed - compressed data: {data_repr}>"
|
||||
|
||||
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
|
||||
|
||||
@property
|
||||
def data(self) -> bytes:
|
||||
"""The resource data, decompressed if necessary.
|
||||
|
||||
Accessing this attribute may raise a DecompressError if the resource data is compressed and could not be decompressed. To access the compressed resource data, use the data_raw attribute.
|
||||
"""
|
||||
|
||||
if ResourceAttrs.resCompressed in self.attributes:
|
||||
try:
|
||||
return self._data_decompressed
|
||||
except AttributeError:
|
||||
self._data_decompressed = compress.decompress(self.data_raw)
|
||||
return self._data_decompressed
|
||||
else:
|
||||
return self.data_raw
|
||||
|
||||
class ResourceFile(collections.abc.Mapping):
|
||||
"""A resource file reader operating on a byte stream."""
|
||||
|
||||
# noinspection PyProtectedMember
|
||||
class _LazyResourceMap(collections.abc.Mapping):
|
||||
"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile."""
|
||||
|
||||
def __init__(self, resfile: "ResourceFile", restype: bytes):
|
||||
"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype."""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self._resfile: "ResourceFile" = resfile
|
||||
self._restype: bytes = restype
|
||||
self._submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]] = self._resfile._references[self._restype]
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of resources with this type code."""
|
||||
|
||||
return len(self._submap)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over the IDs of all resources with this type code."""
|
||||
|
||||
return iter(self._submap)
|
||||
|
||||
def __contains__(self, key: int):
|
||||
"""Check if a resource with the given ID exists for this type code."""
|
||||
|
||||
return key in self._submap
|
||||
|
||||
def __getitem__(self, key: int) -> Resource:
|
||||
"""Get a resource with the given ID for this type code."""
|
||||
|
||||
name_offset, attributes, data_offset = self._submap[key]
|
||||
|
||||
if name_offset == 0xffff:
|
||||
name = None
|
||||
else:
|
||||
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
|
||||
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||
name = self._resfile._read_exact(name_length)
|
||||
|
||||
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
|
||||
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
data = self._resfile._read_exact(data_length)
|
||||
|
||||
return Resource(self._restype, key, name, attributes, data)
|
||||
|
||||
def __repr__(self):
|
||||
if len(self) == 1:
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>"
|
||||
else:
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
|
||||
|
||||
@classmethod
|
||||
def open(cls, filename: typing.Union[str, bytes, os.PathLike], *, fork: str="auto", **kwargs) -> "ResourceFile":
|
||||
"""Open the file at the given path as a ResourceFile.
|
||||
|
||||
The fork parameter controls which fork of the file the resource data will be read from. It accepts the following values:
|
||||
|
||||
* "auto" (the default): Automatically select the correct fork. The resource fork will be used if the file has one and it contains valid resource data. Otherwise the data fork will be used.
|
||||
* "rsrc": Force use of the resource fork and never fall back to the data fork. This will not work on systems other than macOS, because they do not support resource forks natively.
|
||||
* "data": Force use of the data fork, even if a resource fork is present.
|
||||
|
||||
The rsrcfork parameter is deprecated and will be removed in the future. It has the same purpose as the fork parameter, but accepts different argument values: None stands for "auto", True stands for "rsrc", and False stands for "data". These argument values are less understandable than the string versions and are not easily extensible in the future, which is why the parameter has been deprecated.
|
||||
"""
|
||||
|
||||
if "close" in kwargs:
|
||||
raise TypeError("ResourceFile.open does not support the 'close' keyword argument")
|
||||
|
||||
kwargs["close"] = True
|
||||
|
||||
if "rsrcfork" in kwargs:
|
||||
if fork != "auto":
|
||||
raise TypeError("The fork and rsrcfork parameters cannot be used together. Please use only the fork parameter; it replaces the deprecated rsrcfork parameter.")
|
||||
|
||||
if kwargs["rsrcfork"] is None:
|
||||
fork = "auto"
|
||||
elif kwargs["rsrcfork"]:
|
||||
fork = "rsrc"
|
||||
else:
|
||||
fork = "data"
|
||||
warnings.warn(DeprecationWarning(f"The rsrcfork parameter has been deprecated and will be removed in a future version. Please use fork={fork!r} instead of rsrcfork={kwargs['rsrcfork']!r}."))
|
||||
del kwargs["rsrcfork"]
|
||||
|
||||
if fork == "auto":
|
||||
# Determine whether the file has a usable resource fork.
|
||||
try:
|
||||
# Try to open the resource fork.
|
||||
f = open(os.path.join(filename, "..namedfork", "rsrc"), "rb")
|
||||
except (FileNotFoundError, NotADirectoryError):
|
||||
# If the resource fork doesn't exist, fall back to the data fork.
|
||||
return cls(open(filename, "rb"), **kwargs)
|
||||
else:
|
||||
# Resource fork exists, check if it actually contains valid resource data.
|
||||
# This check is necessary because opening ..namedfork/rsrc on files that don't actually have a resource fork can sometimes succeed, but the resulting stream will either be empty, or (as of macOS 10.14, and possibly earlier) contain garbage data.
|
||||
try:
|
||||
return cls(f, **kwargs)
|
||||
except InvalidResourceFileError:
|
||||
# Resource fork is empty or invalid, fall back to the data fork.
|
||||
f.close()
|
||||
return cls(open(filename, "rb"), **kwargs)
|
||||
except BaseException:
|
||||
f.close()
|
||||
raise
|
||||
elif fork == "rsrc":
|
||||
# Force use of the resource fork.
|
||||
return cls(open(os.path.join(filename, "..namedfork", "rsrc"), "rb"), **kwargs)
|
||||
elif fork == "data":
|
||||
# Force use of the data fork.
|
||||
return cls(open(filename, "rb"), **kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unsupported value for the fork parameter: {fork!r}")
|
||||
|
||||
def __init__(self, stream: typing.io.BinaryIO, *, close: bool=False):
|
||||
"""Create a ResourceFile wrapping the given byte stream.
|
||||
|
||||
To read resource file data from a bytes object, wrap it in an io.BytesIO.
|
||||
|
||||
If the stream is seekable, only the file header and resource map are read initially. Resource data and names are loaded on-demand when the respective resource is accessed. If the stream is not seekable, the entire stream data is read into memory (this is necessary because the resource map is stored at the end of the resource file).
|
||||
|
||||
In practice, memory usage is usually not a concern when reading resource files. Even large resource files are only a few megabytes in size, and due to limitations in the format, resource files cannot be much larger than 16 MiB (except for special cases that are unlikely to occur in practice).
|
||||
|
||||
close controls whether the stream should be closed when the ResourceFile's close method is called. By default this is False.
|
||||
"""
|
||||
|
||||
super().__init__()
|
||||
|
||||
self._close_stream: bool = close
|
||||
self._stream: typing.io.BinaryIO
|
||||
if stream.seekable():
|
||||
self._stream = stream
|
||||
else:
|
||||
self._stream = io.BytesIO(stream.read())
|
||||
|
||||
try:
|
||||
self._read_header()
|
||||
self._stream.seek(self.map_offset)
|
||||
self._read_map_header()
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
except BaseException:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def _read_exact(self, byte_count: int) -> bytes:
|
||||
"""Read byte_count bytes from the stream and raise an exception if too few bytes are read (i. e. if EOF was hit prematurely)."""
|
||||
|
||||
data = self._stream.read(byte_count)
|
||||
if len(data) != byte_count:
|
||||
raise InvalidResourceFileError(f"Attempted to read {byte_count} bytes of data, but only got {len(data)} bytes")
|
||||
return data
|
||||
|
||||
def _stream_unpack(self, st: struct.Struct) -> typing.Tuple:
|
||||
"""Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method."""
|
||||
|
||||
try:
|
||||
return st.unpack(self._read_exact(st.size))
|
||||
except struct.error as e:
|
||||
raise InvalidResourceFileError(str(e))
|
||||
|
||||
def _read_header(self):
|
||||
"""Read the resource file header, starting at the current stream position."""
|
||||
|
||||
assert self._stream.tell() == 0
|
||||
|
||||
self.data_offset: int
|
||||
self.map_offset: int
|
||||
self.data_length: int
|
||||
self.map_length: int
|
||||
self.header_system_data: bytes
|
||||
self.header_application_data: bytes
|
||||
(
|
||||
self.data_offset,
|
||||
self.map_offset,
|
||||
self.data_length,
|
||||
self.map_length,
|
||||
self.header_system_data,
|
||||
self.header_application_data,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_HEADER)
|
||||
|
||||
if self._stream.tell() != self.data_offset:
|
||||
raise InvalidResourceFileError(f"The data offset ({self.data_offset}) should point exactly to the end of the file header ({self._stream.tell()})")
|
||||
|
||||
def _read_map_header(self):
|
||||
"""Read the map header, starting at the current stream position."""
|
||||
|
||||
assert self._stream.tell() == self.map_offset
|
||||
|
||||
self.map_type_list_offset: int
|
||||
self.map_name_list_offset: int
|
||||
(
|
||||
_file_attributes,
|
||||
self.map_type_list_offset,
|
||||
self.map_name_list_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_MAP_HEADER)
|
||||
|
||||
self.file_attributes: ResourceFileAttrs = ResourceFileAttrs(_file_attributes)
|
||||
|
||||
def _read_all_resource_types(self):
|
||||
"""Read all resource types, starting at the current stream position."""
|
||||
|
||||
self._reference_counts: typing.MutableMapping[bytes, int] = collections.OrderedDict()
|
||||
|
||||
(type_list_length_m1,) = self._stream_unpack(STRUCT_RESOURCE_TYPE_LIST_HEADER)
|
||||
type_list_length = (type_list_length_m1 + 1) % 0x10000
|
||||
|
||||
for _ in range(type_list_length):
|
||||
(
|
||||
resource_type,
|
||||
count_m1,
|
||||
reflist_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_TYPE)
|
||||
count = (count_m1 + 1) % 0x10000
|
||||
self._reference_counts[resource_type] = count
|
||||
|
||||
def _read_all_references(self):
|
||||
"""Read all resource references, starting at the current stream position."""
|
||||
|
||||
self._references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]] = collections.OrderedDict()
|
||||
|
||||
for resource_type, count in self._reference_counts.items():
|
||||
resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict()
|
||||
self._references[resource_type] = resmap
|
||||
for _ in range(count):
|
||||
(
|
||||
resource_id,
|
||||
name_offset,
|
||||
attributes_and_data_offset,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_REFERENCE)
|
||||
|
||||
attributes = attributes_and_data_offset >> 24
|
||||
data_offset = attributes_and_data_offset & ((1 << 24) - 1)
|
||||
|
||||
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
|
||||
|
||||
def close(self):
|
||||
"""Close this ResourceFile.
|
||||
|
||||
If close=True was passed when this ResourceFile was created, the underlying stream's close method is called as well.
|
||||
"""
|
||||
|
||||
if self._close_stream:
|
||||
self._stream.close()
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of resource types in this ResourceFile."""
|
||||
|
||||
return len(self._references)
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate over all resource types in this ResourceFile."""
|
||||
|
||||
return iter(self._references)
|
||||
|
||||
def __contains__(self, key: bytes):
|
||||
"""Check whether this ResourceFile contains any resources of the given type."""
|
||||
|
||||
return key in self._references
|
||||
|
||||
def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap":
|
||||
"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
|
||||
|
||||
return ResourceFile._LazyResourceMap(self, key)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
|
97
rsrcfork/compress/__init__.py
Normal file
97
rsrcfork/compress/__init__.py
Normal file
@ -0,0 +1,97 @@
|
||||
import struct
|
||||
|
||||
from . import dcmp0
|
||||
from . import dcmp1
|
||||
from . import dcmp2
|
||||
|
||||
from .common import DecompressError
|
||||
|
||||
__all__ = [
|
||||
"DecompressError",
|
||||
"decompress",
|
||||
]
|
||||
|
||||
# The signature of all compressed resource data, 0xa89f6572 in hex, or "®üer" in MacRoman.
|
||||
COMPRESSED_SIGNATURE = b"\xa8\x9fer"
|
||||
# The compression type commonly used for application resources.
|
||||
COMPRESSED_TYPE_APPLICATION = 0x0801
|
||||
# The compression type commonly used for System file resources.
|
||||
COMPRESSED_TYPE_SYSTEM = 0x0901
|
||||
|
||||
# Common header for compressed resources of all types.
|
||||
# 4 bytes: Signature (see above).
|
||||
# 2 bytes: Length of the complete header (this common part and the type-specific part that follows it). (This meaning is just a guess - the field's value is always 0x0012, so there's no way to know for certain what it means.)
|
||||
# 2 bytes: Compression type. Known so far: 0x0901 is used in the System file's resources. 0x0801 is used in other files' resources.
|
||||
# 4 bytes: Length of the data after decompression.
|
||||
STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHI")
|
||||
|
||||
# Header continuation part for an "application" compressed resource.
|
||||
# 1 byte: "Working buffer fractional size" - the ratio of the compressed data size to the uncompressed data size, times 256.
|
||||
# 1 byte: "Expansion buffer size" - the maximum number of bytes that the data might grow during decompression.
|
||||
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 0 is supported.
|
||||
# 2 bytes: Reserved (always zero).
|
||||
STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH")
|
||||
|
||||
# Header continuation part for a "system" compressed resource.
|
||||
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported.
|
||||
# 4 bytes: Decompressor-specific parameters.
|
||||
STRUCT_COMPRESSED_SYSTEM_HEADER = struct.Struct(">h4s")
|
||||
|
||||
|
||||
def _decompress_application(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||
working_buffer_fractional_size, expansion_buffer_size, dcmp_id, reserved = STRUCT_COMPRESSED_APPLICATION_HEADER.unpack_from(data)
|
||||
|
||||
if debug:
|
||||
print(f"Working buffer fractional size: {working_buffer_fractional_size} (=> {len(data) * 256 / working_buffer_fractional_size})")
|
||||
print(f"Expansion buffer size: {expansion_buffer_size}")
|
||||
|
||||
if dcmp_id == 0:
|
||||
decompress_func = dcmp0.decompress
|
||||
elif dcmp_id == 1:
|
||||
decompress_func = dcmp1.decompress
|
||||
else:
|
||||
raise DecompressError(f"Unsupported 'dcmp' ID: {dcmp_id}, expected 0 or 1")
|
||||
|
||||
if reserved != 0:
|
||||
raise DecompressError(f"Reserved field should be 0, not 0x{reserved:>04x}")
|
||||
|
||||
return decompress_func(data[STRUCT_COMPRESSED_APPLICATION_HEADER.size:], decompressed_length, debug=debug)
|
||||
|
||||
|
||||
def _decompress_system(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||
dcmp_id, params = STRUCT_COMPRESSED_SYSTEM_HEADER.unpack_from(data)
|
||||
|
||||
if dcmp_id == 2:
|
||||
decompress_func = dcmp2.decompress
|
||||
else:
|
||||
raise DecompressError(f"Unsupported 'dcmp' ID: {dcmp_id}, expected 2")
|
||||
|
||||
return decompress_func(data[STRUCT_COMPRESSED_SYSTEM_HEADER.size:], decompressed_length, params, debug=debug)
|
||||
|
||||
|
||||
def decompress(data: bytes, *, debug: bool=False) -> bytes:
|
||||
"""Decompress the given compressed resource data."""
|
||||
|
||||
try:
|
||||
signature, header_length, compression_type, decompressed_length = STRUCT_COMPRESSED_HEADER.unpack_from(data)
|
||||
except struct.error:
|
||||
raise DecompressError(f"Invalid header")
|
||||
if signature != COMPRESSED_SIGNATURE:
|
||||
raise DecompressError(f"Invalid signature: {signature!r}, expected {COMPRESSED_SIGNATURE}")
|
||||
if header_length != 0x12:
|
||||
raise DecompressError(f"Unsupported header length: 0x{header_length:>04x}, expected 0x12")
|
||||
|
||||
if compression_type == COMPRESSED_TYPE_APPLICATION:
|
||||
decompress_func = _decompress_application
|
||||
elif compression_type == COMPRESSED_TYPE_SYSTEM:
|
||||
decompress_func = _decompress_system
|
||||
else:
|
||||
raise DecompressError(f"Unsupported compression type: 0x{compression_type:>04x}")
|
||||
|
||||
if debug:
|
||||
print(f"Decompressed length: {decompressed_length}")
|
||||
|
||||
decompressed = decompress_func(data[STRUCT_COMPRESSED_HEADER.size:], decompressed_length, debug=debug)
|
||||
if len(decompressed) != decompressed_length:
|
||||
raise DecompressError(f"Actual length of decompressed data ({len(decompressed)}) does not match length stored in resource ({decompressed_length})")
|
||||
return decompressed
|
23
rsrcfork/compress/common.py
Normal file
23
rsrcfork/compress/common.py
Normal file
@ -0,0 +1,23 @@
|
||||
import typing
|
||||
|
||||
|
||||
class DecompressError(Exception):
|
||||
"""Raised when resource data decompression fails, because the data is invalid or the compression type is not supported."""
|
||||
|
||||
|
||||
def _read_variable_length_integer(data: bytes, position: int) -> typing.Tuple[int, int]:
|
||||
"""Read a variable-length integer starting at the given position in the data, and return the integer as well as the number of bytes consumed.
|
||||
|
||||
This variable-length integer format is used by the 0xfe codes in the compression formats used by 'dcmp' (0) and 'dcmp' (1).
|
||||
"""
|
||||
|
||||
assert len(data) > position
|
||||
if data[position] == 0xff:
|
||||
assert len(data) > position + 4
|
||||
return int.from_bytes(data[position+1:position+5], "big", signed=True), 5
|
||||
elif data[position] >= 0x80:
|
||||
assert len(data) > position + 1
|
||||
data_modified = bytes([(data[position] - 0xc0) & 0xff, data[position+1]])
|
||||
return int.from_bytes(data_modified, "big", signed=True), 2
|
||||
else:
|
||||
return int.from_bytes(data[position:position+1], "big", signed=True), 1
|
295
rsrcfork/compress/dcmp0.py
Normal file
295
rsrcfork/compress/dcmp0.py
Normal file
@ -0,0 +1,295 @@
|
||||
from . import common
|
||||
|
||||
# Lookup table for codes in range(0x4b, 0xfe).
|
||||
# This table was obtained by decompressing a manually created compressed resource with the following contents:
|
||||
# b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x01f\x80\x03\x00\x00\x00\x00' + bytes(range(0x4b, 0xfe)) + b'\xff'
|
||||
TABLE_DATA = (
|
||||
# First line corresponds to codes in range(0x4b, 0x50).
|
||||
b"\x00\x00N\xba\x00\x08Nu\x00\x0c"
|
||||
# All following lines correspond to 8 codes each.
|
||||
b"N\xad S/\x0ba\x00\x00\x10p\x00/\x00Hn"
|
||||
b" P n/.\xff\xfcH\xe7?<\x00\x04\xff\xf8"
|
||||
b"/\x0c \x06N\xedNV hN^\x00\x01X\x8f"
|
||||
b"O\xef\x00\x02\x00\x18`\x00\xff\xffP\x8fN\x90\x00\x06"
|
||||
b"&n\x00\x14\xff\xf4L\xee\x00\n\x00\x0eA\xeeL\xdf"
|
||||
b"H\xc0\xff\xf0-@\x00\x120.p\x01/( T"
|
||||
b"g\x00\x00 \x00\x1c _\x18\x00&oHx\x00\x16"
|
||||
b"A\xfa0<(@r\x00(n \x0cf\x00 k"
|
||||
b"/\x07U\x8f\x00(\xff\xfe\xff\xec\"\xd8 \x0b\x00\x0f"
|
||||
b"Y\x8f/<\xff\x00\x01\x18\x81\xe1J\x00N\xb0\xff\xe8"
|
||||
b"H\xc7\x00\x03\x00\"\x00\x07\x00\x1ag\x06g\x08N\xf9"
|
||||
b"\x00$ x\x08\x00f\x04\x00*N\xd00(&_"
|
||||
b"g\x04\x000C\xee?\x00 \x1f\x00\x1e\xff\xf6 ."
|
||||
b"B\xa7 \x07\xff\xfa`\x02=@\x0c@f\x06\x00&"
|
||||
b"-H/\x01p\xff`\x04\x18\x80J@\x00@\x00,"
|
||||
b"/\x08\x00\x11\xff\xe4!@&@\xff\xf2BnN\xb9"
|
||||
b"=|\x008\x00\r`\x06B. <g\x0c-h"
|
||||
b"f\x08J.J\xae\x00.H@\"_\"\x00g\n"
|
||||
b"0\x07Bg\x002 (\x00\tHz\x02\x00/+"
|
||||
b"\x00\x05\"nf\x02\xe5\x80g\x0ef\n\x00P>\x00"
|
||||
b"f\x0c.\x00\xff\xee m @\xff\xe0S@`\x08"
|
||||
# Last line corresponds to codes in range(0xf8, 0xfe).
|
||||
b"\x04\x80\x00h\x0b|D\x00A\xe8HA"
|
||||
)
|
||||
# Note: index 0 in this table corresponds to code 0x4b, index 1 to 0x4c, etc.
|
||||
TABLE = [TABLE_DATA[i:i + 2] for i in range(0, len(TABLE_DATA), 2)]
|
||||
assert len(TABLE) == len(range(0x4b, 0xfe))
|
||||
|
||||
|
||||
def decompress(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||
"""Decompress compressed data in the format used by 'dcmp' (0)."""
|
||||
|
||||
prev_literals = []
|
||||
decompressed = b""
|
||||
|
||||
i = 0
|
||||
|
||||
while i < len(data):
|
||||
byte = data[i]
|
||||
if debug:
|
||||
print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
|
||||
|
||||
if byte in range(0x00, 0x20):
|
||||
# Literal byte sequence.
|
||||
if byte in (0x00, 0x10):
|
||||
# The length of the literal data is stored in the next byte.
|
||||
count_div2 = data[i+1]
|
||||
begin = i + 2
|
||||
else:
|
||||
# The length of the literal data is stored in the low nibble of the tag byte.
|
||||
count_div2 = byte >> 0 & 0xf
|
||||
begin = i + 1
|
||||
end = begin + 2*count_div2
|
||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||
do_store = byte >= 0x10
|
||||
literal = data[begin:end]
|
||||
if debug:
|
||||
print(f"Literal (storing: {do_store})")
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
if do_store:
|
||||
if debug:
|
||||
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||
prev_literals.append(literal)
|
||||
i = end
|
||||
elif byte in (0x20, 0x21):
|
||||
# Backreference to a previous literal, 2-byte form.
|
||||
# This can reference literals with index in range(0x28, 0x228).
|
||||
table_index = 0x28 + ((byte - 0x20) << 8 | data[i+1])
|
||||
i += 2
|
||||
if debug:
|
||||
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
||||
literal = prev_literals[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
elif byte == 0x22:
|
||||
# Backreference to a previous literal, 3-byte form.
|
||||
# This can reference any literal with index 0x28 and higher, but is only necessary for literals with index 0x228 and higher.
|
||||
table_index = 0x28 + int.from_bytes(data[i+1:i+3], "big", signed=False)
|
||||
i += 3
|
||||
if debug:
|
||||
print(f"Backreference (3-byte form) to 0x{table_index:>02x}")
|
||||
literal = prev_literals[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
elif byte in range(0x23, 0x4b):
|
||||
# Backreference to a previous literal, 1-byte form.
|
||||
# This can reference literals with indices in range(0x28).
|
||||
table_index = byte - 0x23
|
||||
i += 1
|
||||
if debug:
|
||||
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
||||
literal = prev_literals[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
elif byte in range(0x4b, 0xfe):
|
||||
# Reference into a fixed table of two-byte literals.
|
||||
# All compressed resources use the same table.
|
||||
table_index = byte - 0x4b
|
||||
i += 1
|
||||
if debug:
|
||||
print(f"Fixed table reference to 0x{table_index:>02x}")
|
||||
entry = TABLE[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {entry}")
|
||||
decompressed += entry
|
||||
elif byte == 0xfe:
|
||||
# Extended code, whose meaning is controlled by the following byte.
|
||||
|
||||
i += 1
|
||||
kind = data[i]
|
||||
if debug:
|
||||
print(f"Extended code: 0x{kind:>02x}")
|
||||
i += 1
|
||||
|
||||
if kind == 0x00:
|
||||
# Compact representation of (part of) a segment loader jump table, as used in 'CODE' (0) resources.
|
||||
|
||||
if debug:
|
||||
print(f"Segment loader jump table entries")
|
||||
|
||||
# All generated jump table entries have the same segment number.
|
||||
segment_number_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
if debug:
|
||||
print(f"\t-> segment number: {segment_number_int:#x}")
|
||||
|
||||
# The tail part of all jump table entries (i. e. everything except for the address).
|
||||
entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=True) + b"\xa9\xf0"
|
||||
if debug:
|
||||
print(f"\t-> tail of first entry: {entry_tail}")
|
||||
# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
|
||||
decompressed += entry_tail
|
||||
|
||||
count, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
if count <= 0:
|
||||
raise common.DecompressError(f"Jump table entry count must be greater than 0, not {count}")
|
||||
|
||||
# The second entry's address is stored explicitly.
|
||||
current_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
if debug:
|
||||
print(f"-> address of second entry: {current_int:#x}")
|
||||
entry = current_int.to_bytes(2, "big", signed=False) + entry_tail
|
||||
if debug:
|
||||
print(f"-> second entry: {entry}")
|
||||
decompressed += entry
|
||||
|
||||
for _ in range(1, count):
|
||||
# All further entries' addresses are stored as differences relative to the previous entry's address.
|
||||
diff, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
# For some reason, each difference is 6 higher than it should be.
|
||||
diff -= 6
|
||||
|
||||
# Simulate 16-bit integer wraparound.
|
||||
current_int = (current_int + diff) & 0xffff
|
||||
if debug:
|
||||
print(f"\t-> difference {diff:#x}: {current_int:#x}")
|
||||
entry = current_int.to_bytes(2, "big", signed=False) + entry_tail
|
||||
if debug:
|
||||
print(f"\t-> {entry}")
|
||||
decompressed += entry
|
||||
elif kind in (0x02, 0x03):
|
||||
# Repeat 1 or 2 bytes a certain number of times.
|
||||
|
||||
if kind == 0x02:
|
||||
byte_count = 1
|
||||
elif kind == 0x03:
|
||||
byte_count = 2
|
||||
else:
|
||||
raise AssertionError()
|
||||
|
||||
if debug:
|
||||
print(f"Repeat {byte_count}-byte value")
|
||||
|
||||
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
||||
to_repeat_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
try:
|
||||
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
||||
except OverflowError:
|
||||
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
||||
|
||||
count_m1, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
count = count_m1 + 1
|
||||
if count <= 0:
|
||||
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
||||
|
||||
repeated = to_repeat * count
|
||||
if debug:
|
||||
print(f"\t-> {to_repeat} * {count}: {repeated}")
|
||||
decompressed += repeated
|
||||
elif kind == 0x04:
|
||||
# A sequence of 16-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
||||
|
||||
if debug:
|
||||
print(f"Difference-encoded 16-bit integers")
|
||||
|
||||
# The first integer is stored explicitly, as a signed value.
|
||||
initial_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
try:
|
||||
initial = initial_int.to_bytes(2, "big", signed=True)
|
||||
except OverflowError:
|
||||
raise common.DecompressError(f"Initial value out of range for 16-bit integer difference encoding: {initial_int:#x}")
|
||||
if debug:
|
||||
print(f"\t-> initial: {initial}")
|
||||
decompressed += initial
|
||||
|
||||
count, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
if count < 0:
|
||||
raise common.DecompressError(f"Count cannot be negative: {count}")
|
||||
|
||||
# To make the following calculations simpler, the signed initial_int value is converted to unsigned.
|
||||
current_int = initial_int & 0xffff
|
||||
for _ in range(count):
|
||||
# The difference to the previous integer is stored as an 8-bit signed integer.
|
||||
# The usual variable-length integer format is *not* used here.
|
||||
diff = int.from_bytes(data[i:i+1], "big", signed=True)
|
||||
i += 1
|
||||
|
||||
# Simulate 16-bit integer wraparound.
|
||||
current_int = (current_int + diff) & 0xffff
|
||||
current = current_int.to_bytes(2, "big", signed=False)
|
||||
if debug:
|
||||
print(f"\t-> difference {diff:#x}: {current}")
|
||||
decompressed += current
|
||||
elif kind == 0x06:
|
||||
# A sequence of 32-bit signed integers, with each integer encoded as a difference relative to the previous integer. The first integer is stored explicitly.
|
||||
|
||||
if debug:
|
||||
print(f"Difference-encoded 16-bit integers")
|
||||
|
||||
# The first integer is stored explicitly, as a signed value.
|
||||
initial_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
try:
|
||||
initial = initial_int.to_bytes(4, "big", signed=True)
|
||||
except OverflowError:
|
||||
raise common.DecompressError(f"Initial value out of range for 32-bit integer difference encoding: {initial_int:#x}")
|
||||
if debug:
|
||||
print(f"\t-> initial: {initial}")
|
||||
decompressed += initial
|
||||
|
||||
count, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
assert count >= 0
|
||||
|
||||
# To make the following calculations simpler, the signed initial_int value is converted to unsigned.
|
||||
current_int = initial_int & 0xffffffff
|
||||
for _ in range(count):
|
||||
# The difference to the previous integer is stored as a variable-length integer, whose value may be negative.
|
||||
diff, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
|
||||
# Simulate 32-bit integer wraparound.
|
||||
current_int = (current_int + diff) & 0xffffffff
|
||||
current = current_int.to_bytes(4, "big", signed=False)
|
||||
if debug:
|
||||
print(f"\t-> difference {diff:#x}: {current}")
|
||||
decompressed += current
|
||||
else:
|
||||
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
||||
elif byte == 0xff:
|
||||
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
||||
if debug:
|
||||
print("End marker")
|
||||
if i != len(data) - 1:
|
||||
raise common.DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
|
||||
i += 1
|
||||
else:
|
||||
raise common.DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
|
||||
|
||||
if decompressed_length % 2 != 0 and len(decompressed) == decompressed_length + 1:
|
||||
# Special case: if the decompressed data length stored in the header is odd and one less than the length of the actual decompressed data, drop the last byte.
|
||||
# This is necessary because nearly all codes generate data in groups of 2 or 4 bytes, so it is basically impossible to represent data with an odd length using this compression format.
|
||||
decompressed = decompressed[:-1]
|
||||
|
||||
return decompressed
|
151
rsrcfork/compress/dcmp1.py
Normal file
151
rsrcfork/compress/dcmp1.py
Normal file
@ -0,0 +1,151 @@
|
||||
from . import common
|
||||
|
||||
# Lookup table for codes in range(0xd5, 0xfe).
|
||||
# This table was obtained by decompressing a manually created compressed resource with the following contents:
|
||||
# b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x00R\x80\x03\x00\x01\x00\x00' + bytes(range(0xd5, 0xfe)) + b'\xff'
|
||||
TABLE_DATA = (
|
||||
# First line corresponds to codes in range(0xd5, 0xd8).
|
||||
b"\x00\x00\x00\x01\x00\x02"
|
||||
# All following lines correspond to 8 codes each.
|
||||
b"\x00\x03.\x01>\x01\x01\x01\x1e\x01\xff\xff\x0e\x011\x00"
|
||||
b"\x11\x12\x01\x0732\x129\xed\x10\x01'#\"\x017"
|
||||
b"\x07\x06\x01\x17\x01#\x00\xff\x00/\x07\x0e\xfd<\x015"
|
||||
b"\x01\x15\x01\x02\x00\x07\x00>\x05\xd5\x02\x01\x06\x07\x07\x08"
|
||||
# Last line corresponds to codes in range(0xf8, 0xfe).
|
||||
b"0\x01\x013\x00\x10\x17\x167>67"
|
||||
)
|
||||
# Note: index 0 in this table corresponds to code 0xd5, index 1 to 0xd6, etc.
|
||||
TABLE = [TABLE_DATA[i:i + 2] for i in range(0, len(TABLE_DATA), 2)]
|
||||
assert len(TABLE) == len(range(0xd5, 0xfe))
|
||||
|
||||
|
||||
def decompress(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
|
||||
"""Decompress compressed data in the format used by 'dcmp' (1)."""
|
||||
|
||||
prev_literals = []
|
||||
decompressed = b""
|
||||
|
||||
i = 0
|
||||
|
||||
while i < len(data):
|
||||
byte = data[i]
|
||||
if debug:
|
||||
print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
|
||||
|
||||
if byte in range(0x00, 0x20):
|
||||
# Literal byte sequence, 1-byte header.
|
||||
# The length of the literal data is stored in the low nibble of the tag byte.
|
||||
count = (byte >> 0 & 0xf) + 1
|
||||
begin = i + 1
|
||||
end = begin + count
|
||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||
do_store = byte >= 0x10
|
||||
literal = data[begin:end]
|
||||
if debug:
|
||||
print(f"Literal (1-byte header, storing: {do_store})")
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
if do_store:
|
||||
if debug:
|
||||
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||
prev_literals.append(literal)
|
||||
i = end
|
||||
elif byte in range(0x20, 0xd0):
|
||||
# Backreference to a previous literal, 1-byte form.
|
||||
# This can reference literals with indices in range(0xb0).
|
||||
table_index = byte - 0x20
|
||||
i += 1
|
||||
if debug:
|
||||
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
||||
literal = prev_literals[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
elif byte in (0xd0, 0xd1):
|
||||
# Literal byte sequence, 2-byte header.
|
||||
# The length of the literal data is stored in the following byte.
|
||||
count = data[i+1]
|
||||
begin = i + 2
|
||||
end = begin + count
|
||||
# Controls whether or not the literal is stored so that it can be referenced again later.
|
||||
do_store = byte == 0xd1
|
||||
literal = data[begin:end]
|
||||
if debug:
|
||||
print(f"Literal (2-byte header, storing: {do_store})")
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
if do_store:
|
||||
if debug:
|
||||
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
||||
prev_literals.append(literal)
|
||||
i = end
|
||||
elif byte == 0xd2:
|
||||
# Backreference to a previous literal, 2-byte form.
|
||||
# This can reference literals with indices in range(0xb0, 0x1b0).
|
||||
table_index = data[i+1] + 0xb0
|
||||
i += 2
|
||||
if debug:
|
||||
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
||||
literal = prev_literals[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {literal}")
|
||||
decompressed += literal
|
||||
elif byte in range(0xd5, 0xfe):
|
||||
# Reference into a fixed table of two-byte literals.
|
||||
# All compressed resources use the same table.
|
||||
table_index = byte - 0xd5
|
||||
i += 1
|
||||
if debug:
|
||||
print(f"Fixed table reference to 0x{table_index:>02x}")
|
||||
entry = TABLE[table_index]
|
||||
if debug:
|
||||
print(f"\t-> {entry}")
|
||||
decompressed += entry
|
||||
elif byte == 0xfe:
|
||||
# Extended code, whose meaning is controlled by the following byte.
|
||||
|
||||
i += 1
|
||||
kind = data[i]
|
||||
if debug:
|
||||
print(f"Extended code: 0x{kind:>02x}")
|
||||
i += 1
|
||||
|
||||
if kind == 0x02:
|
||||
# Repeat 1 byte a certain number of times.
|
||||
|
||||
byte_count = 1 # Unlike with 'dcmp' (0) compression, there doesn't appear to be a 2-byte repeat (or if there is, it's never used in practice).
|
||||
|
||||
if debug:
|
||||
print(f"Repeat {byte_count}-byte value")
|
||||
|
||||
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
||||
to_repeat_int, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
try:
|
||||
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
||||
except OverflowError:
|
||||
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
||||
|
||||
count_m1, length = common._read_variable_length_integer(data, i)
|
||||
i += length
|
||||
count = count_m1 + 1
|
||||
if count <= 0:
|
||||
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
||||
|
||||
repeated = to_repeat * count
|
||||
if debug:
|
||||
print(f"\t-> {to_repeat} * {count}: {repeated}")
|
||||
decompressed += repeated
|
||||
else:
|
||||
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
||||
elif byte == 0xff:
|
||||
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
||||
if debug:
|
||||
print("End marker")
|
||||
if i != len(data) - 1:
|
||||
raise common.DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
|
||||
i += 1
|
||||
else:
|
||||
raise common.DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
|
||||
|
||||
return decompressed
|
175
rsrcfork/compress/dcmp2.py
Normal file
175
rsrcfork/compress/dcmp2.py
Normal file
@ -0,0 +1,175 @@
|
||||
import enum
|
||||
import struct
|
||||
import typing
|
||||
|
||||
from . import common
|
||||
|
||||
|
||||
# Parameters for a 'dcmp' (2)-compressed resource.
|
||||
# 2 bytes: Unknown meaning, doesn't appear to have any effect on the decompression algorithm. Usually zero, sometimes set to a small integer (< 10). On 'lpch' resources, the value is always nonzero, and sometimes larger than usual.
|
||||
# 1 byte: Number of entries in the custom lookup table minus one. Set to zero if the default lookup table is used.
|
||||
# 1 byte: Flags. See the ParameterFlags enum below for details.
|
||||
STRUCT_PARAMETERS = struct.Struct(">HBB")
|
||||
|
||||
# Default lookup table.
|
||||
# If the custom table flag is set, a custom table (usually with fewer than 256 entries) is used instead of this one.
|
||||
# This table was obtained by decompressing a manually created compressed resource with the following contents:
|
||||
# b'\xa8\x9fer\x00\x12\t\x01\x00\x00\x02\x00\x00\x02\x00\x00\x00\x00' + bytes(range(256))
|
||||
DEFAULT_TABLE_DATA = (
|
||||
b"\x00\x00\x00\x08N\xba nNu\x00\x0c\x00\x04p\x00"
|
||||
b"\x00\x10\x00\x02Hn\xff\xfc`\x00\x00\x01H\xe7/."
|
||||
b"NV\x00\x06N^/\x00a\x00\xff\xf8/\x0b\xff\xff"
|
||||
b"\x00\x14\x00\n\x00\x18 _\x00\x0e P?<\xff\xf4"
|
||||
b"L\xee0.g\x00L\xdf&n\x00\x12\x00\x1cBg"
|
||||
b"\xff\xf00</\x0c\x00\x03N\xd0\x00 p\x01\x00\x16"
|
||||
b"-@H\xc0 xr\x00X\x8ff\x00O\xefB\xa7"
|
||||
b"g\x06\xff\xfaU\x8f(n?\x00\xff\xfe/<g\x04"
|
||||
b"Y\x8f k\x00$ \x1fA\xfa\x81\xe1f\x04g\x08"
|
||||
b"\x00\x1aN\xb9P\x8f .\x00\x07N\xb0\xff\xf2=@"
|
||||
b"\x00\x1e hf\x06\xff\xf6N\xf9\x08\x00\x0c@=|"
|
||||
b"\xff\xec\x00\x05 <\xff\xe8\xde\xfcJ.\x000\x00("
|
||||
b"/\x08 \x0b`\x02Bn-H S @\x18\x00"
|
||||
b"`\x04A\xee/(/\x01g\nH@ \x07f\x08"
|
||||
b"\x01\x18/\x070(?.0+\"n/+\x00,"
|
||||
b"g\x0c\"_`\x06\x00\xff0\x07\xff\xeeS@\x00@"
|
||||
b"\xff\xe4J@f\n\x00\x0fN\xadp\xff\"\xd8Hk"
|
||||
b"\x00\" Kg\x0eJ\xaeN\x90\xff\xe0\xff\xc0\x00*"
|
||||
b"'@g\x02Q\xc8\x02\xb6Hz\"x\xb0n\xff\xe6"
|
||||
b"\x00\t2.>\x00HA\xff\xeaC\xeeNqt\x00"
|
||||
b"/, l\x00<\x00&\x00P\x18\x800\x1f\"\x00"
|
||||
b"f\x0c\xff\xda\x008f\x020, \x0c-nB@"
|
||||
b"\xff\xe2\xa9\xf0\xff\x007|\xe5\x80\xff\xdcHhYO"
|
||||
b"\x004>\x1f`\x08/\x06\xff\xde`\np\x02\x002"
|
||||
b"\xff\xcc\x00\x80\"Q\x10\x1f1|\xa0)\xff\xd8R@"
|
||||
b"\x01\x00g\x10\xa0#\xff\xce\xff\xd4 \x06Hx\x00."
|
||||
b"POC\xfag\x12v\x00A\xe8Jn \xd9\x00Z"
|
||||
b"\x7f\xffQ\xca\x00\\.\x00\x02@H\xc7g\x14\x0c\x80"
|
||||
b".\x9f\xff\xd6\x80\x00\x10\x00HBJk\xff\xd2\x00H"
|
||||
b"JGN\xd1 o\x00A`\x0c*xB.2\x00"
|
||||
b"etg\x16\x00DHm \x08Hl\x0b|&@"
|
||||
b"\x04\x00\x00h m\x00\r*@\x00\x0b\x00>\x02 "
|
||||
)
|
||||
DEFAULT_TABLE = [DEFAULT_TABLE_DATA[i:i + 2] for i in range(0, len(DEFAULT_TABLE_DATA), 2)]
|
||||
|
||||
|
||||
class ParameterFlags(enum.Flag):
|
||||
TAGGED = 1 << 1 # The compressed data is tagged, meaning that it consists of "blocks" of a tag byte followed by 8 table references and/or literals. See comments in the decompress function for details.
|
||||
CUSTOM_TABLE = 1 << 0 # A custom lookup table is included before the compressed data, which is used instead of the default table.
|
||||
|
||||
|
||||
def _split_bits(i: int) -> typing.Tuple[bool, bool, bool, bool, bool, bool, bool, bool]:
|
||||
"""Split a byte (an int) into its 8 bits (a tuple of 8 bools)."""
|
||||
|
||||
assert i in range(256)
|
||||
return (
|
||||
bool(i & (1 << 7)),
|
||||
bool(i & (1 << 6)),
|
||||
bool(i & (1 << 5)),
|
||||
bool(i & (1 << 4)),
|
||||
bool(i & (1 << 3)),
|
||||
bool(i & (1 << 2)),
|
||||
bool(i & (1 << 1)),
|
||||
bool(i & (1 << 0)),
|
||||
)
|
||||
|
||||
|
||||
def _decompress_system_untagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
|
||||
parts = []
|
||||
i = 0
|
||||
while i < len(data):
|
||||
if i == len(data) - 1 and decompressed_length % 2 != 0:
|
||||
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a table reference.
|
||||
if debug:
|
||||
print(f"Last byte: {data[-1:]}")
|
||||
parts.append(data[-1:])
|
||||
break
|
||||
|
||||
# Compressed data is untagged, every byte is a table reference.
|
||||
if debug:
|
||||
print(f"Reference: {data[i]} -> {table[data[i]]}")
|
||||
parts.append(table[data[i]])
|
||||
i += 1
|
||||
|
||||
return b"".join(parts)
|
||||
|
||||
def _decompress_system_tagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
|
||||
parts = []
|
||||
i = 0
|
||||
while i < len(data):
|
||||
if i == len(data) - 1 and decompressed_length % 2 != 0:
|
||||
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a tag or a table reference.
|
||||
if debug:
|
||||
print(f"Last byte: {data[-1:]}")
|
||||
parts.append(data[-1:])
|
||||
break
|
||||
|
||||
# Compressed data is tagged, each tag byte is followed by 8 table references and/or literals.
|
||||
tag = data[i]
|
||||
if debug:
|
||||
print(f"Tag: 0b{tag:>08b}")
|
||||
i += 1
|
||||
for is_ref in _split_bits(tag):
|
||||
if is_ref:
|
||||
# This is a table reference (a single byte that is an index into the table).
|
||||
if debug:
|
||||
print(f"Reference: {data[i]} -> {table[data[i]]}")
|
||||
parts.append(table[data[i]])
|
||||
i += 1
|
||||
else:
|
||||
# This is a literal (two uncompressed bytes that are literally copied into the output).
|
||||
# Note: if i == len(data)-1, the literal is actually only a single byte long.
|
||||
# This case is handled automatically - the slice extends one byte past the end of the data, and only one byte is returned.
|
||||
if debug:
|
||||
print(f"Literal: {data[i:i+2]}")
|
||||
parts.append(data[i:i + 2])
|
||||
i += 2
|
||||
|
||||
# If the end of the compressed data is reached in the middle of a chunk, all further tag bits are ignored (they should be zero) and decompression ends.
|
||||
if i >= len(data):
|
||||
break
|
||||
|
||||
return b"".join(parts)
|
||||
|
||||
|
||||
def decompress(data: bytes, decompressed_length: int, parameters: bytes, *, debug: bool=False) -> bytes:
|
||||
"""Decompress compressed data in the format used by 'dcmp' (2)."""
|
||||
|
||||
unknown, table_count_m1, flags_raw = STRUCT_PARAMETERS.unpack(parameters)
|
||||
|
||||
if debug:
|
||||
print(f"Value of unknown parameter field: 0x{unknown:>04x}")
|
||||
|
||||
table_count = table_count_m1 + 1
|
||||
if debug:
|
||||
print(f"Table has {table_count} entries")
|
||||
|
||||
try:
|
||||
flags = ParameterFlags(flags_raw)
|
||||
except ValueError:
|
||||
raise common.DecompressError(f"Unsupported flags set: 0b{flags_raw:>08b}, currently only bits 0 and 1 are supported")
|
||||
|
||||
if debug:
|
||||
print(f"Flags: {flags}")
|
||||
|
||||
if ParameterFlags.CUSTOM_TABLE in flags:
|
||||
table_start = 0
|
||||
data_start = table_start + table_count * 2
|
||||
table = []
|
||||
for i in range(table_start, data_start, 2):
|
||||
table.append(data[i:i + 2])
|
||||
if debug:
|
||||
print(f"Using custom table: {table}")
|
||||
else:
|
||||
if table_count_m1 != 0:
|
||||
raise common.DecompressError(f"table_count_m1 field is {table_count_m1}, but must be zero when the default table is used")
|
||||
table = DEFAULT_TABLE
|
||||
data_start = 0
|
||||
if debug:
|
||||
print("Using default table")
|
||||
|
||||
if ParameterFlags.TAGGED in flags:
|
||||
decompress_func = _decompress_system_tagged
|
||||
else:
|
||||
decompress_func = _decompress_system_untagged
|
||||
|
||||
return decompress_func(data[data_start:], decompressed_length, table, debug=debug)
|
44
setup.cfg
Normal file
44
setup.cfg
Normal file
@ -0,0 +1,44 @@
|
||||
[metadata]
|
||||
name = rsrcfork
|
||||
version = attr: rsrcfork.__version__
|
||||
url = https://github.com/dgelessus/python-rsrcfork
|
||||
author = dgelessus
|
||||
classifiers =
|
||||
Development Status :: 4 - Beta
|
||||
Intended Audience :: Developers
|
||||
Topic :: Software Development :: Disassemblers
|
||||
Topic :: System
|
||||
Topic :: Utilities
|
||||
License :: OSI Approved :: MIT License
|
||||
Operating System :: MacOS :: MacOS 9
|
||||
Operating System :: MacOS :: MacOS X
|
||||
Operating System :: OS Independent
|
||||
Programming Language :: Python
|
||||
Programming Language :: Python :: 3
|
||||
Programming Language :: Python :: 3 :: Only
|
||||
Programming Language :: Python :: 3.6
|
||||
Programming Language :: Python :: 3.7
|
||||
license = MIT
|
||||
license_file = LICENSE
|
||||
description = A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files
|
||||
long_description = file: README.rst
|
||||
long_description_content_type = text/x-rst
|
||||
keywords =
|
||||
rsrc
|
||||
fork
|
||||
resource
|
||||
manager
|
||||
macintosh
|
||||
mac
|
||||
macos
|
||||
|
||||
[options]
|
||||
setup_requires =
|
||||
setuptools>=39.2.0
|
||||
python_requires = >=3.6
|
||||
packages =
|
||||
rsrcfork
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
rsrcfork = rsrcfork.__main__:main
|
28
setup.py
28
setup.py
@ -2,30 +2,4 @@
|
||||
|
||||
import setuptools
|
||||
|
||||
with open("README.rst", "r", encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
setuptools.setup(
|
||||
name="rsrcfork",
|
||||
version="1.1.1",
|
||||
description="A pure Python library for reading old Macintosh resource manager data",
|
||||
long_description=long_description,
|
||||
url="https://github.com/dgelessus/python-rsrcfork",
|
||||
author="dgelessus",
|
||||
license="MIT",
|
||||
classifiers=[
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
"Topic :: Utilities",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
],
|
||||
keywords="rsrc fork resource manager macintosh mac macos",
|
||||
python_requires=">=3.6",
|
||||
py_modules=["rsrcfork"],
|
||||
)
|
||||
setuptools.setup()
|
||||
|
Reference in New Issue
Block a user