Add initial support for compressed resources

Not all compression formats are supported yet.
This commit is contained in:
dgelessus 2019-07-14 02:16:49 +02:00
parent 9adb188624
commit 2fb1d02064
5 changed files with 298 additions and 26 deletions

View File

@ -121,7 +121,11 @@ Changelog
Version 1.2.0 (next version)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* (no changes yet)
* Added support for compressed resources.
* Compressed resource data is automatically decompressed, both in the Python API and on the command like.
* This is technically a breaking change, since in previous versions the compressed resource data was returned directly. However, this change will not affect end users negatively, unless one has already implemented custom handling for compressed resources.
* Currently, only one of the two standard Mac OS resource compression format is supported. Attempting to access a resource compressed in an unsupported format results in a ``DecompressError``.
* To access the raw resource data as stored in the file, without automatic decompression, use the ``res.data_raw`` attribute (for the Python API), or the ``--no-decompress`` option (for the command-line interface). This can be used to read the resource data in its compressed form, even if the compression format is not supported.
Version 1.1.3.post1
^^^^^^^^^^^^^^^^^^^

View File

@ -12,10 +12,11 @@ __all__ = [
"ResourceAttrs",
"ResourceFile",
"ResourceFileAttrs",
"compress",
"open",
]
from . import api
from . import api, compress
from .api import Resource, ResourceAttrs, ResourceFile, ResourceFileAttrs
# noinspection PyShadowingBuiltins

View File

@ -5,7 +5,7 @@ import sys
import textwrap
import typing
from . import __version__, api
from . import __version__, api, compress
# Translation table to replace ASCII non-printable characters with periods.
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
@ -186,6 +186,7 @@ def main():
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
@ -239,6 +240,11 @@ def main():
sys.exit(1)
for res in resources:
if ns.decompress:
data = res.data
else:
data = res.data_raw
if ns.format == "dump":
# Human-readable info and hex dump
@ -254,24 +260,37 @@ def main():
else:
attrdesc = "no attributes"
if ns.decompress:
length_desc = f"{len(res.data)} bytes (stored in {len(res.data_raw)} bytes)"
else:
length_desc = f"{len(data)} bytes"
restype = _bytes_escape(res.resource_type, quote="'")
print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {len(res.data)} bytes:")
_hexdump(res.data)
print(f"Resource '{restype}' ({res.resource_id}), {name}, {attrdesc}, {length_desc}:")
_hexdump(data)
print()
elif ns.format == "hex":
# Data only as hex
_raw_hexdump(res.data)
_raw_hexdump(data)
elif ns.format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(res.data)
sys.stdout.buffer.write(data)
elif ns.format == "derez":
# Like DeRez with no resource definitions
attrs = [_REZ_ATTR_NAMES[attr] for attr in _decompose_flags(res.attributes)]
if None in attrs:
attrs[:] = [f"${res.attributes.value:02X}"]
attrs = list(_decompose_flags(res.attributes))
if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
attrs.remove(api.ResourceAttrs.resCompressed)
attrs_comment = " /* was compressed */"
else:
attrs_comment = ""
attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
if None in attr_descs:
attr_descs[:] = [f"${res.attributes.value:02X}"]
parts = [str(res.resource_id)]
@ -279,24 +298,24 @@ def main():
name = _bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attrs
parts += attr_descs
restype = _bytes_escape(res.resource_type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}) {{")
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
for i in range(0, len(res.data), 16):
for i in range(0, len(data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(res.data):
if i+j >= len(data):
break
elif i+j+1 >= len(res.data):
groups.append(f"{res.data[i+j]:02X}")
elif i+j+1 >= len(data):
groups.append(f"{data[i+j]:02X}")
else:
groups.append(f"{res.data[i+j]:02X}{res.data[i+j+1]:02X}")
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + res.data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */"
comment = "/* " + data[i:i + 16].decode("MacRoman").translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
@ -340,7 +359,17 @@ def main():
else:
attrdesc = "no attributes"
print(f"({resid}), {name}, {attrdesc}, {len(res.data)} bytes")
if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
try:
res.data
except compress.DecompressError:
length_desc = f"decompression failed ({len(res.data_raw)} bytes compressed)"
else:
length_desc = f"{len(res.data)} bytes ({len(res.data_raw)} bytes compressed)"
else:
length_desc = f"{len(res.data_raw)} bytes"
print(f"({resid}), {name}, {attrdesc}, {length_desc}")
print()
else:
print("No resource types (empty resource file)")

View File

@ -5,6 +5,8 @@ import os
import struct
import typing
from . import compress
# The formats of all following structures is as described in the Inside Macintosh book (see module docstring).
# Signedness and byte order of the integers is never stated explicitly in IM.
# All integers are big-endian, as this is the native byte order of the 68k and PowerPC processors used in old Macs.
@ -89,9 +91,9 @@ class ResourceAttrs(enum.Flag):
class Resource(object):
"""A single resource from a resource file."""
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data")
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data_raw", "_data_decompressed")
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data: bytes):
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes):
"""Create a new resource with the given type code, ID, name, attributes, and data."""
super().__init__()
@ -100,15 +102,42 @@ class Resource(object):
self.resource_id: int = resource_id
self.name: typing.Optional[bytes] = name
self.attributes: ResourceAttrs = attributes
self.data: bytes = data
self.data_raw: bytes = data_raw
def __repr__(self):
if len(self.data) > 32:
data = f"<{len(self.data)} bytes: {self.data[:32]}...>"
try:
data = self.data
except compress.DecompressError:
decompress_ok = False
data = self.data_raw
else:
data = repr(self.data)
decompress_ok = True
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data})"
if len(data) > 32:
data_repr = f"<{len(data)} bytes: {data[:32]}...>"
else:
data_repr = repr(data)
if not decompress_ok:
data_repr = f"<decompression failed - compressed data: {data_repr}>"
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
@property
def data(self) -> bytes:
"""The resource data, decompressed if necessary.
Accessing this attribute may raise a DecompressError if the resource data is compressed and could not be decompressed. To access the compressed resource data, use the data_raw attribute.
"""
if ResourceAttrs.resCompressed in self.attributes:
try:
return self._data_decompressed
except AttributeError:
self._data_decompressed = compress.decompress(self.data_raw)
return self._data_decompressed
else:
return self.data_raw
class ResourceFile(collections.abc.Mapping):
"""A resource file reader operating on a byte stream."""

209
rsrcfork/compress.py Normal file
View File

@ -0,0 +1,209 @@
import enum
import struct
import typing
__all__ = [
"DecompressError",
"decompress",
]
# The signature of all compressed resource data, 0xa89f6572 in hex, or "®üer" in MacRoman.
COMPRESSED_SIGNATURE = b"\xa8\x9fer"
# The compression type commonly used for System file resources.
COMPRESSED_TYPE_SYSTEM = 0x0901
# Header for a compressed resource.
# 4 bytes: Signature (see above).
# 2 bytes: Length of the header. (This meaning is just a guess - the field's value is always 0x0012, so there's no way to know for certain what it means.)
# 2 bytes: Compression type. Known so far: 0x0901 is used in the System file's resources. 0x0801 is used in other files' resources. Currently only the first type is supported.
# 4 bytes: Length of the data after decompression.
# 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported.
# 2 bytes: Unknown meaning, doesn't appear to have any effect on the decompression algorithm. Usually zero, sometimes set to a small integer (< 10). On 'lpch' resources, the value is always nonzero, and sometimes larger than usual.
# 1 byte: Number of entries in the custom lookup table minus one. Set to zero if the default lookup table is used.
# 1 byte: Flags. See the CompressedFlags enum below for details.
STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHIhHBB")
# Default lookup table for compressed resources.
# If the custom table flag is set, a custom table (usually with fewer than 256 entries) is used instead of this one.
# This table was obtained by decompressing a manually created compressed resource that refers to every possible table entry. Detailed steps:
# 1. Create a file with a resource fork
# 2. Add a resource with the following contents: b'\xa8\x9fer\x00\x12\t\x01\x00\x00\x02\x00\x00\x02\x00\x00\x00\x00' + bytes(range(256))
# 3. Set the "compressed" flag (0x01) on the resource
# 4. Open the file in ResEdit
# 5. Duplicate the resource - this will decompress the original resource and write its contents uncompressed into the duplicate
# 6. Read the data from the duplicated resource
COMPRESSED_DEFAULT_TABLE_DATA = (
b"\x00\x00\x00\x08N\xba nNu\x00\x0c\x00\x04p\x00"
b"\x00\x10\x00\x02Hn\xff\xfc`\x00\x00\x01H\xe7/."
b"NV\x00\x06N^/\x00a\x00\xff\xf8/\x0b\xff\xff"
b"\x00\x14\x00\n\x00\x18 _\x00\x0e P?<\xff\xf4"
b"L\xee0.g\x00L\xdf&n\x00\x12\x00\x1cBg"
b"\xff\xf00</\x0c\x00\x03N\xd0\x00 p\x01\x00\x16"
b"-@H\xc0 xr\x00X\x8ff\x00O\xefB\xa7"
b"g\x06\xff\xfaU\x8f(n?\x00\xff\xfe/<g\x04"
b"Y\x8f k\x00$ \x1fA\xfa\x81\xe1f\x04g\x08"
b"\x00\x1aN\xb9P\x8f .\x00\x07N\xb0\xff\xf2=@"
b"\x00\x1e hf\x06\xff\xf6N\xf9\x08\x00\x0c@=|"
b"\xff\xec\x00\x05 <\xff\xe8\xde\xfcJ.\x000\x00("
b"/\x08 \x0b`\x02Bn-H S @\x18\x00"
b"`\x04A\xee/(/\x01g\nH@ \x07f\x08"
b"\x01\x18/\x070(?.0+\"n/+\x00,"
b"g\x0c\"_`\x06\x00\xff0\x07\xff\xeeS@\x00@"
b"\xff\xe4J@f\n\x00\x0fN\xadp\xff\"\xd8Hk"
b"\x00\" Kg\x0eJ\xaeN\x90\xff\xe0\xff\xc0\x00*"
b"'@g\x02Q\xc8\x02\xb6Hz\"x\xb0n\xff\xe6"
b"\x00\t2.>\x00HA\xff\xeaC\xeeNqt\x00"
b"/, l\x00<\x00&\x00P\x18\x800\x1f\"\x00"
b"f\x0c\xff\xda\x008f\x020, \x0c-nB@"
b"\xff\xe2\xa9\xf0\xff\x007|\xe5\x80\xff\xdcHhYO"
b"\x004>\x1f`\x08/\x06\xff\xde`\np\x02\x002"
b"\xff\xcc\x00\x80\"Q\x10\x1f1|\xa0)\xff\xd8R@"
b"\x01\x00g\x10\xa0#\xff\xce\xff\xd4 \x06Hx\x00."
b"POC\xfag\x12v\x00A\xe8Jn \xd9\x00Z"
b"\x7f\xffQ\xca\x00\\.\x00\x02@H\xc7g\x14\x0c\x80"
b".\x9f\xff\xd6\x80\x00\x10\x00HBJk\xff\xd2\x00H"
b"JGN\xd1 o\x00A`\x0c*xB.2\x00"
b"etg\x16\x00DHm \x08Hl\x0b|&@"
b"\x04\x00\x00h m\x00\r*@\x00\x0b\x00>\x02 "
)
COMPRESSED_DEFAULT_TABLE = [COMPRESSED_DEFAULT_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_DEFAULT_TABLE_DATA), 2)]
class CompressedFlags(enum.Flag):
TAGGED = 1 << 1 # The compressed data is tagged, meaning that it consists of "blocks" of a tag byte followed by 8 table references and/or literals. See comments in the decompress function for details.
CUSTOM_TABLE = 1 << 0 # A custom lookup table is included before the compressed data, which is used instead of the default table.
class DecompressError(Exception):
"""Raised when resource data decompression fails, because the data is invalid or the compression type is not supported."""
def _split_bits(i: int) -> typing.Tuple[bool, bool, bool, bool, bool, bool, bool, bool]:
"""Split a byte (an int) into its 8 bits (a tuple of 8 bools)."""
assert i in range(256)
return (
bool(i & (1 << 7)),
bool(i & (1 << 6)),
bool(i & (1 << 5)),
bool(i & (1 << 4)),
bool(i & (1 << 3)),
bool(i & (1 << 2)),
bool(i & (1 << 1)),
bool(i & (1 << 0)),
)
def _decompress_untagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
parts = []
i = 0
while i < len(data):
if i == len(data) - 1 and decompressed_length % 2 != 0:
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a table reference.
if debug:
print(f"Last byte: {data[-1:]}")
parts.append(data[-1:])
break
# Compressed data is untagged, every byte is a table reference.
if debug:
print(f"Reference: {data[i]} -> {table[data[i]]}")
parts.append(table[data[i]])
i += 1
return b"".join(parts)
def _decompress_tagged(data: bytes, decompressed_length: int, table: typing.Sequence[bytes], *, debug: bool=False) -> bytes:
parts = []
i = 0
while i < len(data):
if i == len(data) - 1 and decompressed_length % 2 != 0:
# Special case: if we are at the last byte of the compressed data, and the decompressed data has an odd length, the last byte is a single literal byte, and not a tag or a table reference.
if debug:
print(f"Last byte: {data[-1:]}")
parts.append(data[-1:])
break
# Compressed data is tagged, each tag byte is followed by 8 table references and/or literals.
tag = data[i]
if debug:
print(f"Tag: 0b{tag:>08b}")
i += 1
for is_ref in _split_bits(tag):
if is_ref:
# This is a table reference (a single byte that is an index into the table).
if debug:
print(f"Reference: {data[i]} -> {table[data[i]]}")
parts.append(table[data[i]])
i += 1
else:
# This is a literal (two uncompressed bytes that are literally copied into the output).
# Note: if i == len(data)-1, the literal is actually only a single byte long.
# This case is handled automatically - the slice extends one byte past the end of the data, and only one byte is returned.
if debug:
print(f"Literal: {data[i:i+2]}")
parts.append(data[i:i + 2])
i += 2
# If the end of the compressed data is reached in the middle of a chunk, all further tag bits are ignored (they should be zero) and decompression ends.
if i >= len(data):
break
return b"".join(parts)
def decompress(data: bytes, *, debug: bool=False) -> bytes:
"""Decompress the given compressed resource data."""
try:
signature, header_length, compression_type, decompressed_length, dcmp_id, unknown, table_count_m1, flags_raw = STRUCT_COMPRESSED_HEADER.unpack_from(data, offset=0)
except struct.error:
raise DecompressError(f"Invalid header")
if signature != COMPRESSED_SIGNATURE:
raise DecompressError(f"Invalid signature: {signature!r}, expected {COMPRESSED_SIGNATURE}")
if header_length != STRUCT_COMPRESSED_HEADER.size:
raise DecompressError(f"Unsupported header length: 0x{header_length:>04x}, expected 0x{STRUCT_COMPRESSED_HEADER.size:>04x}")
if compression_type != COMPRESSED_TYPE_SYSTEM:
raise DecompressError(f"Unsupported compression type: 0x{compression_type:>04x}, expected 0x{COMPRESSED_TYPE_SYSTEM:>04x}")
if dcmp_id != 2:
raise DecompressError(f"Unsupported 'dcmp' ID: {dcmp_id}, expected 2")
if debug:
print(f"Value of unknown field at bytes 0xc-0xe: 0x{unknown:>04x}")
table_count = table_count_m1 + 1
if debug:
print(f"Table has {table_count} entries")
try:
flags = CompressedFlags(flags_raw)
except ValueError:
raise DecompressError(f"Unsupported flags set: 0b{flags_raw:>08b}, currently only bits 0 and 1 are supported")
if debug:
print(f"Flags: {flags}")
if CompressedFlags.CUSTOM_TABLE in flags:
table_start = STRUCT_COMPRESSED_HEADER.size
data_start = table_start + table_count * 2
table = []
for i in range(table_start, data_start, 2):
table.append(data[i:i + 2])
if debug:
print(f"Using custom table: {table}")
else:
if table_count_m1 != 0:
raise DecompressError(f"table_count_m1 field is {table_count_m1}, but must be zero when the default table is used")
table = COMPRESSED_DEFAULT_TABLE
data_start = STRUCT_COMPRESSED_HEADER.size
if debug:
print("Using default table")
if CompressedFlags.TAGGED in flags:
decompress_func = _decompress_tagged
else:
decompress_func = _decompress_untagged
decompressed = decompress_func(data[data_start:], decompressed_length, table, debug=debug)
if len(decompressed) != decompressed_length:
raise DecompressError(f"Actual length of decompressed data ({len(decompressed)}) does not match length stored in resource ({decompressed_length})")
return decompressed