mirror of
https://github.com/dgelessus/python-rsrcfork.git
synced 2024-11-22 14:32:11 +00:00
3a72bd3406
The leading underscore is meant to distinguish private (for internal use only) APIs from public (for external use) APIs. One can argue about where the line between public and private should be, but if something is used from other modules (as with read_variable_length_integer) it's not really private IMHO. In scripts (like __main__) it also doesn't make much sense to use leading underscores, because the entire file is never meant to be used by external code.
461 lines
16 KiB
Python
461 lines
16 KiB
Python
import argparse
|
|
import collections
|
|
import enum
|
|
import itertools
|
|
import sys
|
|
import textwrap
|
|
import typing
|
|
|
|
from . import __version__, api, compress
|
|
|
|
# The encoding to use when rendering bytes as text (in four-char codes, strings, hex dumps, etc.) or reading a quoted byte string (from the command line).
|
|
_TEXT_ENCODING = "MacRoman"
|
|
|
|
# Translation table to replace ASCII non-printable characters with periods.
|
|
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
|
|
|
|
_REZ_ATTR_NAMES = {
|
|
api.ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
|
|
api.ResourceAttrs.resSysHeap: "sysheap",
|
|
api.ResourceAttrs.resPurgeable: "purgeable",
|
|
api.ResourceAttrs.resLocked: "locked",
|
|
api.ResourceAttrs.resProtected: "protected",
|
|
api.ResourceAttrs.resPreload: "preload",
|
|
api.ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
|
|
api.ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
|
|
}
|
|
|
|
F = typing.TypeVar("F", bound=enum.Flag)
|
|
def decompose_flags(value: F) -> typing.Sequence[F]:
|
|
"""Decompose an enum.Flags instance into separate enum constants."""
|
|
|
|
return [bit for bit in type(value) if bit in value]
|
|
|
|
def is_printable(char: str) -> bool:
|
|
"""Determine whether a character is printable for our purposes.
|
|
|
|
We mainly use Python's definition of printable (i. e. everything that Unicode does not consider a separator or "other" character). However, we also treat U+F8FF as printable, which is the private use codepoint used for the Apple logo character.
|
|
"""
|
|
|
|
return char.isprintable() or char == "\uf8ff"
|
|
|
|
def bytes_unescape(string: str) -> bytes:
|
|
"""Convert a string containing text (in _TEXT_ENCODING) and hex escapes to a bytestring.
|
|
|
|
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
|
|
"""
|
|
|
|
out: typing.List[int] = []
|
|
it = iter(string)
|
|
for char in it:
|
|
if char == "\\":
|
|
try:
|
|
esc = next(it)
|
|
if esc in "\\\'\"":
|
|
out.extend(esc.encode(_TEXT_ENCODING))
|
|
elif esc == "x":
|
|
x1, x2 = next(it), next(it)
|
|
out.append(int(x1+x2, 16))
|
|
else:
|
|
raise ValueError(f"Unknown escape character: {esc}")
|
|
except StopIteration:
|
|
raise ValueError("End of string in escape sequence")
|
|
else:
|
|
out.extend(char.encode(_TEXT_ENCODING))
|
|
|
|
return bytes(out)
|
|
|
|
def bytes_escape(bs: bytes, *, quote: typing.Optional[str]=None) -> str:
|
|
"""Convert a bytestring to a string (using _TEXT_ENCODING), with non-printable characters hex-escaped.
|
|
|
|
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
|
|
"""
|
|
|
|
out = []
|
|
for byte, char in zip(bs, bs.decode(_TEXT_ENCODING)):
|
|
if char in {quote, "\\"}:
|
|
out.append(f"\\{char}")
|
|
elif is_printable(char):
|
|
out.append(char)
|
|
else:
|
|
out.append(f"\\x{byte:02x}")
|
|
|
|
return "".join(out)
|
|
|
|
def filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typing.List[api.Resource]:
|
|
matching: typing.MutableMapping[typing.Tuple[bytes, int], api.Resource] = collections.OrderedDict()
|
|
|
|
for filter in filters:
|
|
if len(filter) == 4:
|
|
try:
|
|
resources = rf[filter.encode("ascii")]
|
|
except KeyError:
|
|
continue
|
|
|
|
for res in resources.values():
|
|
matching[res.type, res.id] = res
|
|
elif filter[0] == filter[-1] == "'":
|
|
try:
|
|
resources = rf[bytes_unescape(filter[1:-1])]
|
|
except KeyError:
|
|
continue
|
|
|
|
for res in resources.values():
|
|
matching[res.type, res.id] = res
|
|
else:
|
|
pos = filter.find("'", 1)
|
|
if pos == -1:
|
|
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
|
|
elif filter[pos + 1] != " ":
|
|
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
|
|
|
|
restype_str, resid_str = filter[:pos + 1], filter[pos + 2:]
|
|
|
|
if not restype_str[0] == restype_str[-1] == "'":
|
|
raise ValueError(
|
|
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype_str!r}")
|
|
restype = bytes_unescape(restype_str[1:-1])
|
|
|
|
if len(restype) != 4:
|
|
raise ValueError(
|
|
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
|
|
|
|
if resid_str[0] != "(" or resid_str[-1] != ")":
|
|
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
|
|
resid_str = resid_str[1:-1]
|
|
|
|
try:
|
|
resources = rf[restype]
|
|
except KeyError:
|
|
continue
|
|
|
|
if resid_str[0] == resid_str[-1] == '"':
|
|
name = bytes_unescape(resid_str[1:-1])
|
|
|
|
for res in resources.values():
|
|
if res.name == name:
|
|
matching[res.type, res.id] = res
|
|
break
|
|
elif ":" in resid_str:
|
|
if resid_str.count(":") > 1:
|
|
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid_str!r}")
|
|
start_str, end_str = resid_str.split(":")
|
|
start, end = int(start_str), int(end_str)
|
|
|
|
for res in resources.values():
|
|
if start <= res.id <= end:
|
|
matching[res.type, res.id] = res
|
|
else:
|
|
resid = int(resid_str)
|
|
try:
|
|
res = resources[resid]
|
|
except KeyError:
|
|
continue
|
|
matching[res.type, res.id] = res
|
|
|
|
return list(matching.values())
|
|
|
|
def hexdump(data: bytes) -> None:
|
|
last_line = None
|
|
asterisk_shown = False
|
|
for i in range(0, len(data), 16):
|
|
line = data[i:i + 16]
|
|
# If the same 16-byte lines appear multiple times, print only the first one, and replace all further lines with a single line with an asterisk.
|
|
# This is unambiguous - to find out how many lines were collapsed this way, the user can compare the addresses of the lines before and after the asterisk.
|
|
if line == last_line:
|
|
if not asterisk_shown:
|
|
print("*")
|
|
asterisk_shown = True
|
|
else:
|
|
line_hex_left = " ".join(f"{byte:02x}" for byte in line[:8])
|
|
line_hex_right = " ".join(f"{byte:02x}" for byte in line[8:])
|
|
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
|
|
print(f"{i:08x} {line_hex_left:<{8*2+7}} {line_hex_right:<{8*2+7}} |{line_char}|")
|
|
asterisk_shown = False
|
|
last_line = line
|
|
|
|
if data:
|
|
print(f"{len(data):08x}")
|
|
|
|
def raw_hexdump(data: bytes) -> None:
|
|
for i in range(0, len(data), 16):
|
|
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
|
|
|
|
def translate_text(data: bytes) -> str:
|
|
return data.decode(_TEXT_ENCODING).replace("\r", "\n")
|
|
|
|
def describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
|
|
id_desc_parts = [f"{res.id}"]
|
|
|
|
if res.name is not None:
|
|
name = bytes_escape(res.name, quote='"')
|
|
id_desc_parts.append(f'"{name}"')
|
|
|
|
id_desc = ", ".join(id_desc_parts)
|
|
|
|
content_desc_parts = []
|
|
|
|
if decompress and api.ResourceAttrs.resCompressed in res.attributes:
|
|
try:
|
|
res.compressed_info
|
|
except compress.DecompressError:
|
|
length_desc = f"unparseable compressed data header ({res.length_raw} bytes compressed)"
|
|
else:
|
|
assert res.compressed_info is not None
|
|
length_desc = f"{res.length} bytes ({res.length_raw} bytes compressed, 'dcmp' ({res.compressed_info.dcmp_id}) format)"
|
|
else:
|
|
assert res.compressed_info is None
|
|
length_desc = f"{res.length_raw} bytes"
|
|
content_desc_parts.append(length_desc)
|
|
|
|
attrs = decompose_flags(res.attributes)
|
|
if attrs:
|
|
content_desc_parts.append(" | ".join(attr.name for attr in attrs))
|
|
|
|
content_desc = ", ".join(content_desc_parts)
|
|
|
|
desc = f"({id_desc}): {content_desc}"
|
|
if include_type:
|
|
restype = bytes_escape(res.type, quote="'")
|
|
desc = f"'{restype}' {desc}"
|
|
return desc
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
ap = argparse.ArgumentParser(
|
|
add_help=False,
|
|
fromfile_prefix_chars="@",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
description=textwrap.dedent("""
|
|
Read and display resources from a file's resource or data fork.
|
|
|
|
When specifying resource filters, each one may be of one of the
|
|
following forms:
|
|
|
|
An unquoted type name (without escapes): TYPE
|
|
A quoted type name: 'TYPE'
|
|
A quoted type name and an ID: 'TYPE' (42)
|
|
A quoted type name and an ID range: 'TYPE' (24:42)
|
|
A quoted type name and a resource name: 'TYPE' ("foobar")
|
|
|
|
When multiple filters are specified, all resources matching any of them
|
|
are displayed.
|
|
"""),
|
|
)
|
|
|
|
ap.add_argument("--help", action="help", help="Display this help message and exit")
|
|
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
|
|
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
|
|
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
|
|
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
|
|
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
|
ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources in list view by type or ID, or disable grouping (default: type)")
|
|
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID")
|
|
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
|
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
|
|
|
ap.add_argument("file", help="The file to read, or - for stdin")
|
|
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
|
|
|
ns = ap.parse_args()
|
|
return ns
|
|
|
|
def show_header_data(data: bytes, *, format: str) -> None:
|
|
if format == "dump":
|
|
hexdump(data)
|
|
elif format == "dump-text":
|
|
print(translate_text(data))
|
|
elif format == "hex":
|
|
raw_hexdump(data)
|
|
elif format == "raw":
|
|
sys.stdout.buffer.write(data)
|
|
elif format == "derez":
|
|
print("Cannot output file header data in derez format", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
raise ValueError(f"Unhandled output format: {format}")
|
|
|
|
def show_filtered_resources(resources: typing.Sequence[api.Resource], format: str, decompress: bool) -> None:
|
|
if not resources:
|
|
if format in ("dump", "dump-text"):
|
|
print("No resources matched the filter")
|
|
elif format in ("hex", "raw"):
|
|
print("No resources matched the filter", file=sys.stderr)
|
|
sys.exit(1)
|
|
elif format == "derez":
|
|
print("/* No resources matched the filter */")
|
|
else:
|
|
raise AssertionError(f"Unhandled output format: {format}")
|
|
elif format in ("hex", "raw") and len(resources) != 1:
|
|
print(f"Format {format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
for res in resources:
|
|
if decompress:
|
|
data = res.data
|
|
else:
|
|
data = res.data_raw
|
|
|
|
if format in ("dump", "dump-text"):
|
|
# Human-readable info and hex or text dump
|
|
desc = describe_resource(res, include_type=True, decompress=decompress)
|
|
print(f"Resource {desc}:")
|
|
if format == "dump":
|
|
hexdump(data)
|
|
elif format == "dump-text":
|
|
print(translate_text(data))
|
|
else:
|
|
raise AssertionError(f"Unhandled format: {format!r}")
|
|
print()
|
|
elif format == "hex":
|
|
# Data only as hex
|
|
|
|
raw_hexdump(data)
|
|
elif format == "raw":
|
|
# Data only as raw bytes
|
|
|
|
sys.stdout.buffer.write(data)
|
|
elif format == "derez":
|
|
# Like DeRez with no resource definitions
|
|
|
|
attrs = list(decompose_flags(res.attributes))
|
|
|
|
if decompress and api.ResourceAttrs.resCompressed in attrs:
|
|
attrs.remove(api.ResourceAttrs.resCompressed)
|
|
attrs_comment = " /* was compressed */"
|
|
else:
|
|
attrs_comment = ""
|
|
|
|
attr_descs_with_none = [_REZ_ATTR_NAMES[attr] for attr in attrs]
|
|
if None in attr_descs_with_none:
|
|
attr_descs = [f"${res.attributes.value:02X}"]
|
|
else:
|
|
attr_descs = typing.cast(typing.List[str], attr_descs_with_none)
|
|
|
|
parts = [str(res.id)]
|
|
|
|
if res.name is not None:
|
|
name = bytes_escape(res.name, quote='"')
|
|
parts.append(f'"{name}"')
|
|
|
|
parts += attr_descs
|
|
|
|
restype = bytes_escape(res.type, quote="'")
|
|
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
|
|
|
|
for i in range(0, len(data), 16):
|
|
# Two-byte grouping is really annoying to implement.
|
|
groups = []
|
|
for j in range(0, 16, 2):
|
|
if i+j >= len(data):
|
|
break
|
|
elif i+j+1 >= len(data):
|
|
groups.append(f"{data[i+j]:02X}")
|
|
else:
|
|
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
|
|
|
|
s = f'$"{" ".join(groups)}"'
|
|
comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
|
|
print(f"\t{s:<54s}{comment}")
|
|
|
|
print("};")
|
|
print()
|
|
else:
|
|
raise ValueError(f"Unhandled output format: {format}")
|
|
|
|
def list_resource_file(rf: api.ResourceFile, *, sort: bool, group: str, decompress: bool) -> None:
|
|
if rf.header_system_data != bytes(len(rf.header_system_data)):
|
|
print("Header system data:")
|
|
hexdump(rf.header_system_data)
|
|
|
|
if rf.header_application_data != bytes(len(rf.header_application_data)):
|
|
print("Header application data:")
|
|
hexdump(rf.header_application_data)
|
|
|
|
attrs = decompose_flags(rf.file_attributes)
|
|
if attrs:
|
|
print("File attributes: " + " | ".join(attr.name for attr in attrs))
|
|
|
|
if len(rf) == 0:
|
|
print("No resources (empty resource file)")
|
|
return
|
|
|
|
if group == "none":
|
|
all_resources: typing.List[api.Resource] = []
|
|
for reses in rf.values():
|
|
all_resources.extend(reses.values())
|
|
if sort:
|
|
all_resources.sort(key=lambda res: (res.type, res.id))
|
|
print(f"{len(all_resources)} resources:")
|
|
for res in all_resources:
|
|
print(describe_resource(res, include_type=True, decompress=decompress))
|
|
elif group == "type":
|
|
print(f"{len(rf)} resource types:")
|
|
restype_items: typing.Collection[typing.Tuple[bytes, typing.Mapping[int, api.Resource]]] = rf.items()
|
|
if sort:
|
|
restype_items = sorted(restype_items, key=lambda item: item[0])
|
|
for typecode, resources_map in restype_items:
|
|
restype = bytes_escape(typecode, quote="'")
|
|
print(f"'{restype}': {len(resources_map)} resources:")
|
|
resources_items: typing.Collection[typing.Tuple[int, api.Resource]] = resources_map.items()
|
|
if sort:
|
|
resources_items = sorted(resources_items, key=lambda item: item[0])
|
|
for resid, res in resources_items:
|
|
print(describe_resource(res, include_type=False, decompress=decompress))
|
|
print()
|
|
elif group == "id":
|
|
all_resources = []
|
|
for reses in rf.values():
|
|
all_resources.extend(reses.values())
|
|
all_resources.sort(key=lambda res: res.id)
|
|
resources_by_id = {resid: list(reses) for resid, reses in itertools.groupby(all_resources, key=lambda res: res.id)}
|
|
print(f"{len(resources_by_id)} resource IDs:")
|
|
for resid, resources in resources_by_id.items():
|
|
print(f"({resid}): {len(resources)} resources:")
|
|
if sort:
|
|
resources.sort(key=lambda res: res.type)
|
|
for res in resources:
|
|
print(describe_resource(res, include_type=True, decompress=decompress))
|
|
print()
|
|
else:
|
|
raise AssertionError(f"Unhandled group mode: {group!r}")
|
|
|
|
def main() -> typing.NoReturn:
|
|
ns = parse_args()
|
|
|
|
if ns.file == "-":
|
|
if ns.fork is not None:
|
|
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
rf = api.ResourceFile(sys.stdin.buffer)
|
|
else:
|
|
rf = api.ResourceFile.open(ns.file, fork=ns.fork)
|
|
|
|
with rf:
|
|
if ns.header_system or ns.header_application:
|
|
if ns.header_system:
|
|
data = rf.header_system_data
|
|
else:
|
|
data = rf.header_application_data
|
|
|
|
show_header_data(data, format=ns.format)
|
|
elif ns.filter or ns.all:
|
|
if ns.filter:
|
|
resources = filter_resources(rf, ns.filter)
|
|
else:
|
|
resources = []
|
|
for reses in rf.values():
|
|
resources.extend(reses.values())
|
|
|
|
if ns.sort:
|
|
resources.sort(key=lambda res: (res.type, res.id))
|
|
|
|
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
|
|
else:
|
|
list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
|
|
|
|
sys.exit(0)
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|