python-rsrcfork/rsrcfork/__main__.py
dgelessus 3a72bd3406 Remove leading underscores where they don't make much sense
The leading underscore is meant to distinguish private (for internal
use only) APIs from public (for external use) APIs. One can argue about
where the line between public and private should be, but if something
is used from other modules (as with read_variable_length_integer) it's
not really private IMHO.

In scripts (like __main__) it also doesn't make much sense to use
leading underscores, because the entire file is never meant to be used
by external code.
2019-10-01 21:26:41 +02:00

461 lines
16 KiB
Python

import argparse
import collections
import enum
import itertools
import sys
import textwrap
import typing
from . import __version__, api, compress
# The encoding to use when rendering bytes as text (in four-char codes, strings, hex dumps, etc.) or reading a quoted byte string (from the command line).
_TEXT_ENCODING = "MacRoman"
# Translation table to replace ASCII non-printable characters with periods.
_TRANSLATE_NONPRINTABLES = {k: "." for k in [*range(0x20), 0x7f]}
_REZ_ATTR_NAMES = {
api.ResourceAttrs.resSysRef: None, # "Illegal or reserved attribute"
api.ResourceAttrs.resSysHeap: "sysheap",
api.ResourceAttrs.resPurgeable: "purgeable",
api.ResourceAttrs.resLocked: "locked",
api.ResourceAttrs.resProtected: "protected",
api.ResourceAttrs.resPreload: "preload",
api.ResourceAttrs.resChanged: None, # "Illegal or reserved attribute"
api.ResourceAttrs.resCompressed: None, # "Extended Header resource attribute"
}
F = typing.TypeVar("F", bound=enum.Flag)
def decompose_flags(value: F) -> typing.Sequence[F]:
"""Decompose an enum.Flags instance into separate enum constants."""
return [bit for bit in type(value) if bit in value]
def is_printable(char: str) -> bool:
"""Determine whether a character is printable for our purposes.
We mainly use Python's definition of printable (i. e. everything that Unicode does not consider a separator or "other" character). However, we also treat U+F8FF as printable, which is the private use codepoint used for the Apple logo character.
"""
return char.isprintable() or char == "\uf8ff"
def bytes_unescape(string: str) -> bytes:
"""Convert a string containing text (in _TEXT_ENCODING) and hex escapes to a bytestring.
(We implement our own unescaping mechanism here to not depend on any of Python's string/bytes escape syntax.)
"""
out: typing.List[int] = []
it = iter(string)
for char in it:
if char == "\\":
try:
esc = next(it)
if esc in "\\\'\"":
out.extend(esc.encode(_TEXT_ENCODING))
elif esc == "x":
x1, x2 = next(it), next(it)
out.append(int(x1+x2, 16))
else:
raise ValueError(f"Unknown escape character: {esc}")
except StopIteration:
raise ValueError("End of string in escape sequence")
else:
out.extend(char.encode(_TEXT_ENCODING))
return bytes(out)
def bytes_escape(bs: bytes, *, quote: typing.Optional[str]=None) -> str:
"""Convert a bytestring to a string (using _TEXT_ENCODING), with non-printable characters hex-escaped.
(We implement our own escaping mechanism here to not depend on Python's str or bytes repr.)
"""
out = []
for byte, char in zip(bs, bs.decode(_TEXT_ENCODING)):
if char in {quote, "\\"}:
out.append(f"\\{char}")
elif is_printable(char):
out.append(char)
else:
out.append(f"\\x{byte:02x}")
return "".join(out)
def filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> typing.List[api.Resource]:
matching: typing.MutableMapping[typing.Tuple[bytes, int], api.Resource] = collections.OrderedDict()
for filter in filters:
if len(filter) == 4:
try:
resources = rf[filter.encode("ascii")]
except KeyError:
continue
for res in resources.values():
matching[res.type, res.id] = res
elif filter[0] == filter[-1] == "'":
try:
resources = rf[bytes_unescape(filter[1:-1])]
except KeyError:
continue
for res in resources.values():
matching[res.type, res.id] = res
else:
pos = filter.find("'", 1)
if pos == -1:
raise ValueError(f"Invalid filter {filter!r}: Resource type must be single-quoted")
elif filter[pos + 1] != " ":
raise ValueError(f"Invalid filter {filter!r}: Resource type and ID must be separated by a space")
restype_str, resid_str = filter[:pos + 1], filter[pos + 2:]
if not restype_str[0] == restype_str[-1] == "'":
raise ValueError(
f"Invalid filter {filter!r}: Resource type is not a single-quoted type identifier: {restype_str!r}")
restype = bytes_unescape(restype_str[1:-1])
if len(restype) != 4:
raise ValueError(
f"Invalid filter {filter!r}: Type identifier must be 4 bytes after replacing escapes, got {len(restype)} bytes: {restype!r}")
if resid_str[0] != "(" or resid_str[-1] != ")":
raise ValueError(f"Invalid filter {filter!r}: Resource ID must be parenthesized")
resid_str = resid_str[1:-1]
try:
resources = rf[restype]
except KeyError:
continue
if resid_str[0] == resid_str[-1] == '"':
name = bytes_unescape(resid_str[1:-1])
for res in resources.values():
if res.name == name:
matching[res.type, res.id] = res
break
elif ":" in resid_str:
if resid_str.count(":") > 1:
raise ValueError(f"Invalid filter {filter!r}: Too many colons in ID range expression: {resid_str!r}")
start_str, end_str = resid_str.split(":")
start, end = int(start_str), int(end_str)
for res in resources.values():
if start <= res.id <= end:
matching[res.type, res.id] = res
else:
resid = int(resid_str)
try:
res = resources[resid]
except KeyError:
continue
matching[res.type, res.id] = res
return list(matching.values())
def hexdump(data: bytes) -> None:
last_line = None
asterisk_shown = False
for i in range(0, len(data), 16):
line = data[i:i + 16]
# If the same 16-byte lines appear multiple times, print only the first one, and replace all further lines with a single line with an asterisk.
# This is unambiguous - to find out how many lines were collapsed this way, the user can compare the addresses of the lines before and after the asterisk.
if line == last_line:
if not asterisk_shown:
print("*")
asterisk_shown = True
else:
line_hex_left = " ".join(f"{byte:02x}" for byte in line[:8])
line_hex_right = " ".join(f"{byte:02x}" for byte in line[8:])
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
print(f"{i:08x} {line_hex_left:<{8*2+7}} {line_hex_right:<{8*2+7}} |{line_char}|")
asterisk_shown = False
last_line = line
if data:
print(f"{len(data):08x}")
def raw_hexdump(data: bytes) -> None:
for i in range(0, len(data), 16):
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
def translate_text(data: bytes) -> str:
return data.decode(_TEXT_ENCODING).replace("\r", "\n")
def describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
id_desc_parts = [f"{res.id}"]
if res.name is not None:
name = bytes_escape(res.name, quote='"')
id_desc_parts.append(f'"{name}"')
id_desc = ", ".join(id_desc_parts)
content_desc_parts = []
if decompress and api.ResourceAttrs.resCompressed in res.attributes:
try:
res.compressed_info
except compress.DecompressError:
length_desc = f"unparseable compressed data header ({res.length_raw} bytes compressed)"
else:
assert res.compressed_info is not None
length_desc = f"{res.length} bytes ({res.length_raw} bytes compressed, 'dcmp' ({res.compressed_info.dcmp_id}) format)"
else:
assert res.compressed_info is None
length_desc = f"{res.length_raw} bytes"
content_desc_parts.append(length_desc)
attrs = decompose_flags(res.attributes)
if attrs:
content_desc_parts.append(" | ".join(attr.name for attr in attrs))
content_desc = ", ".join(content_desc_parts)
desc = f"({id_desc}): {content_desc}"
if include_type:
restype = bytes_escape(res.type, quote="'")
desc = f"'{restype}' {desc}"
return desc
def parse_args() -> argparse.Namespace:
ap = argparse.ArgumentParser(
add_help=False,
fromfile_prefix_chars="@",
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""
Read and display resources from a file's resource or data fork.
When specifying resource filters, each one may be of one of the
following forms:
An unquoted type name (without escapes): TYPE
A quoted type name: 'TYPE'
A quoted type name and an ID: 'TYPE' (42)
A quoted type name and an ID range: 'TYPE' (24:42)
A quoted type name and a resource name: 'TYPE' ("foobar")
When multiple filters are specified, all resources matching any of them
are displayed.
"""),
)
ap.add_argument("--help", action="help", help="Display this help message and exit")
ap.add_argument("--version", action="version", version=__version__, help="Display version information and exit")
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources in list view by type or ID, or disable grouping (default: type)")
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID")
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
ap.add_argument("file", help="The file to read, or - for stdin")
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
ns = ap.parse_args()
return ns
def show_header_data(data: bytes, *, format: str) -> None:
if format == "dump":
hexdump(data)
elif format == "dump-text":
print(translate_text(data))
elif format == "hex":
raw_hexdump(data)
elif format == "raw":
sys.stdout.buffer.write(data)
elif format == "derez":
print("Cannot output file header data in derez format", file=sys.stderr)
sys.exit(1)
else:
raise ValueError(f"Unhandled output format: {format}")
def show_filtered_resources(resources: typing.Sequence[api.Resource], format: str, decompress: bool) -> None:
if not resources:
if format in ("dump", "dump-text"):
print("No resources matched the filter")
elif format in ("hex", "raw"):
print("No resources matched the filter", file=sys.stderr)
sys.exit(1)
elif format == "derez":
print("/* No resources matched the filter */")
else:
raise AssertionError(f"Unhandled output format: {format}")
elif format in ("hex", "raw") and len(resources) != 1:
print(f"Format {format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
sys.exit(1)
for res in resources:
if decompress:
data = res.data
else:
data = res.data_raw
if format in ("dump", "dump-text"):
# Human-readable info and hex or text dump
desc = describe_resource(res, include_type=True, decompress=decompress)
print(f"Resource {desc}:")
if format == "dump":
hexdump(data)
elif format == "dump-text":
print(translate_text(data))
else:
raise AssertionError(f"Unhandled format: {format!r}")
print()
elif format == "hex":
# Data only as hex
raw_hexdump(data)
elif format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(data)
elif format == "derez":
# Like DeRez with no resource definitions
attrs = list(decompose_flags(res.attributes))
if decompress and api.ResourceAttrs.resCompressed in attrs:
attrs.remove(api.ResourceAttrs.resCompressed)
attrs_comment = " /* was compressed */"
else:
attrs_comment = ""
attr_descs_with_none = [_REZ_ATTR_NAMES[attr] for attr in attrs]
if None in attr_descs_with_none:
attr_descs = [f"${res.attributes.value:02X}"]
else:
attr_descs = typing.cast(typing.List[str], attr_descs_with_none)
parts = [str(res.id)]
if res.name is not None:
name = bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attr_descs
restype = bytes_escape(res.type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
for i in range(0, len(data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(data):
break
elif i+j+1 >= len(data):
groups.append(f"{data[i+j]:02X}")
else:
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
print()
else:
raise ValueError(f"Unhandled output format: {format}")
def list_resource_file(rf: api.ResourceFile, *, sort: bool, group: str, decompress: bool) -> None:
if rf.header_system_data != bytes(len(rf.header_system_data)):
print("Header system data:")
hexdump(rf.header_system_data)
if rf.header_application_data != bytes(len(rf.header_application_data)):
print("Header application data:")
hexdump(rf.header_application_data)
attrs = decompose_flags(rf.file_attributes)
if attrs:
print("File attributes: " + " | ".join(attr.name for attr in attrs))
if len(rf) == 0:
print("No resources (empty resource file)")
return
if group == "none":
all_resources: typing.List[api.Resource] = []
for reses in rf.values():
all_resources.extend(reses.values())
if sort:
all_resources.sort(key=lambda res: (res.type, res.id))
print(f"{len(all_resources)} resources:")
for res in all_resources:
print(describe_resource(res, include_type=True, decompress=decompress))
elif group == "type":
print(f"{len(rf)} resource types:")
restype_items: typing.Collection[typing.Tuple[bytes, typing.Mapping[int, api.Resource]]] = rf.items()
if sort:
restype_items = sorted(restype_items, key=lambda item: item[0])
for typecode, resources_map in restype_items:
restype = bytes_escape(typecode, quote="'")
print(f"'{restype}': {len(resources_map)} resources:")
resources_items: typing.Collection[typing.Tuple[int, api.Resource]] = resources_map.items()
if sort:
resources_items = sorted(resources_items, key=lambda item: item[0])
for resid, res in resources_items:
print(describe_resource(res, include_type=False, decompress=decompress))
print()
elif group == "id":
all_resources = []
for reses in rf.values():
all_resources.extend(reses.values())
all_resources.sort(key=lambda res: res.id)
resources_by_id = {resid: list(reses) for resid, reses in itertools.groupby(all_resources, key=lambda res: res.id)}
print(f"{len(resources_by_id)} resource IDs:")
for resid, resources in resources_by_id.items():
print(f"({resid}): {len(resources)} resources:")
if sort:
resources.sort(key=lambda res: res.type)
for res in resources:
print(describe_resource(res, include_type=True, decompress=decompress))
print()
else:
raise AssertionError(f"Unhandled group mode: {group!r}")
def main() -> typing.NoReturn:
ns = parse_args()
if ns.file == "-":
if ns.fork is not None:
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
sys.exit(1)
rf = api.ResourceFile(sys.stdin.buffer)
else:
rf = api.ResourceFile.open(ns.file, fork=ns.fork)
with rf:
if ns.header_system or ns.header_application:
if ns.header_system:
data = rf.header_system_data
else:
data = rf.header_application_data
show_header_data(data, format=ns.format)
elif ns.filter or ns.all:
if ns.filter:
resources = filter_resources(rf, ns.filter)
else:
resources = []
for reses in rf.values():
resources.extend(reses.values())
if ns.sort:
resources.sort(key=lambda res: (res.type, res.id))
show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
else:
list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
sys.exit(0)
if __name__ == "__main__":
sys.exit(main())