11 Commits
1.2 ... v1.3.0

Author SHA1 Message Date
7207b1d32b Release version 1.3.0 2019-09-16 16:34:40 +02:00
1de940d597 Enable --sort by default and add --no-sort to disable sorting
In most cases the file order is not important and the unsorted output
hurts readability. The performance impact of sorting is relatively
small and barely noticeable even with large resource files.
2019-09-16 15:25:41 +02:00
d7255bc977 Adjust --group=id output format slightly 2019-09-16 14:58:21 +02:00
c6337bdfbd Rename resource_type and resource_id attributes to type and id
The old names were chosen to avoid conflicts with Python's type and id
builtins, but for attribute names this is not necessary.
2019-09-15 15:56:03 +02:00
f4c2717720 Add command-line --group option 2019-09-15 15:38:01 +02:00
8ad0234633 Add command-line --sort option 2019-09-13 15:00:56 +02:00
7612322c43 Add dump-text output format on command line 2019-09-13 14:51:16 +02:00
51ae7c6a09 Refactor __main__.main into smaller functions 2019-09-13 14:17:21 +02:00
194c886472 Change hex dump output format to match hexdump -C 2019-09-13 10:51:27 +02:00
b2fa5f8b0f Collapse multiple subsequent identical lines in hex dumps 2019-09-13 10:40:03 +02:00
752ec9e828 Bump version to 1.2.1.dev 2019-09-13 10:22:43 +02:00
4 changed files with 223 additions and 130 deletions

View File

@ -56,7 +56,7 @@ Simple example
>>> rf
<rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
>>> rf[b"TEXT"]
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(resource_type=b'TEXT', resource_id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
<rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(type=b'TEXT', id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
Automatic selection of data/resource fork
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -127,6 +127,15 @@ If these links are no longer functional, some are archived in the `Internet Arch
Changelog
---------
Version 1.3.0
^^^^^^^^^^^^^
* Added a ``--group`` command line option to group resources in list format by type (the default), ID, or with no grouping.
* Added a ``dump-text`` output format to the command line tool. This format is identical to ``dump``, but instead of a hex dump, it outputs the resource data as text. The data is decoded as MacRoman and classic Mac newlines (``\r``) are translated. This is useful for examining resources that contain mostly plain text.
* Changed the command line tool to sort resources by type and ID, and added a ``--no-sort`` option to disable sorting and output resources in file order (which was the previous behavior).
* Renamed the ``rsrcfork.Resource`` attributes ``resource_type`` and ``resource_id`` to ``type`` and ``id``, respectively. The old names have been deprecated and will be removed in the future, but are still supported for now.
* Changed ``--format=dump`` output to match ``hexdump -C``'s format - spacing has been adjusted, and multiple subsequent identical lines are collapsed into a single ``*``.
Version 1.2.0
^^^^^^^^^^^^^

View File

@ -1,6 +1,6 @@
"""A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files."""
__version__ = "1.2.0"
__version__ = "1.3.0"
__all__ = [
"Resource",

View File

@ -1,6 +1,7 @@
import argparse
import collections
import enum
import itertools
import sys
import textwrap
import typing
@ -92,7 +93,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
matching[res.type, res.id] = res
elif filter[0] == filter[-1] == "'":
try:
resources = rf[_bytes_unescape(filter[1:-1])]
@ -100,7 +101,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
continue
for res in resources.values():
matching[res.resource_type, res.resource_id] = res
matching[res.type, res.id] = res
else:
pos = filter.find("'", 1)
if pos == -1:
@ -133,7 +134,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
for res in resources.values():
if res.name == name:
matching[res.resource_type, res.resource_id] = res
matching[res.type, res.id] = res
break
elif ":" in resid:
if resid.count(":") > 1:
@ -142,24 +143,36 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
start, end = int(start), int(end)
for res in resources.values():
if start <= res.resource_id <= end:
matching[res.resource_type, res.resource_id] = res
if start <= res.id <= end:
matching[res.type, res.id] = res
else:
resid = int(resid)
try:
res = resources[resid]
except KeyError:
continue
matching[res.resource_type, res.resource_id] = res
matching[res.type, res.id] = res
return list(matching.values())
def _hexdump(data: bytes):
last_line = None
asterisk_shown = False
for i in range(0, len(data), 16):
line = data[i:i + 16]
line_hex = " ".join(f"{byte:02x}" for byte in line)
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
# If the same 16-byte lines appear multiple times, print only the first one, and replace all further lines with a single line with an asterisk.
# This is unambiguous - to find out how many lines were collapsed this way, the user can compare the addresses of the lines before and after the asterisk.
if line == last_line:
if not asterisk_shown:
print("*")
asterisk_shown = True
else:
line_hex_left = " ".join(f"{byte:02x}" for byte in line[:8])
line_hex_right = " ".join(f"{byte:02x}" for byte in line[8:])
line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
print(f"{i:08x} {line_hex_left:<{8*2+7}} {line_hex_right:<{8*2+7}} |{line_char}|")
asterisk_shown = False
last_line = line
if data:
print(f"{len(data):08x}")
@ -168,8 +181,11 @@ def _raw_hexdump(data: bytes):
for i in range(0, len(data), 16):
print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))
def _translate_text(data: bytes) -> str:
return data.decode(_TEXT_ENCODING).replace("\r", "\n")
def _describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
id_desc_parts = [f"{res.resource_id}"]
id_desc_parts = [f"{res.id}"]
if res.name is not None:
name = _bytes_escape(res.name, quote='"')
@ -198,11 +214,11 @@ def _describe_resource(res: api.Resource, *, include_type: bool, decompress: boo
desc = f"({id_desc}): {content_desc}"
if include_type:
restype = _bytes_escape(res.resource_type, quote="'")
restype = _bytes_escape(res.type, quote="'")
desc = f"'{restype}' {desc}"
return desc
def main():
def _parse_args() -> argparse.Namespace:
ap = argparse.ArgumentParser(
add_help=False,
fromfile_prefix_chars="@",
@ -229,7 +245,9 @@ def main():
ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources in list view by type or ID, or disable grouping (default: type)")
ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID")
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
@ -237,6 +255,168 @@ def main():
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
ns = ap.parse_args()
return ns
def _show_header_data(data: bytes, *, format: str) -> None:
if format == "dump":
_hexdump(data)
elif format == "dump-text":
print(_translate_text(data))
elif format == "hex":
_raw_hexdump(data)
elif format == "raw":
sys.stdout.buffer.write(data)
elif format == "derez":
print("Cannot output file header data in derez format", file=sys.stderr)
sys.exit(1)
else:
raise ValueError(f"Unhandled output format: {format}")
def _show_filtered_resources(resources: typing.Sequence[api.Resource], format: str, decompress: bool) -> None:
if not resources:
if format in ("dump", "dump-text"):
print("No resources matched the filter")
elif format in ("hex", "raw"):
print("No resources matched the filter", file=sys.stderr)
sys.exit(1)
elif format == "derez":
print("/* No resources matched the filter */")
else:
raise AssertionError(f"Unhandled output format: {format}")
elif format in ("hex", "raw") and len(resources) != 1:
print(f"Format {format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
sys.exit(1)
for res in resources:
if decompress:
data = res.data
else:
data = res.data_raw
if format in ("dump", "dump-text"):
# Human-readable info and hex or text dump
desc = _describe_resource(res, include_type=True, decompress=decompress)
print(f"Resource {desc}:")
if format == "dump":
_hexdump(data)
elif format == "dump-text":
print(_translate_text(data))
else:
raise AssertionError(f"Unhandled format: {format!r}")
print()
elif format == "hex":
# Data only as hex
_raw_hexdump(data)
elif format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(data)
elif format == "derez":
# Like DeRez with no resource definitions
attrs = list(_decompose_flags(res.attributes))
if decompress and api.ResourceAttrs.resCompressed in attrs:
attrs.remove(api.ResourceAttrs.resCompressed)
attrs_comment = " /* was compressed */"
else:
attrs_comment = ""
attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
if None in attr_descs:
attr_descs[:] = [f"${res.attributes.value:02X}"]
parts = [str(res.id)]
if res.name is not None:
name = _bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attr_descs
restype = _bytes_escape(res.type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
for i in range(0, len(data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(data):
break
elif i+j+1 >= len(data):
groups.append(f"{data[i+j]:02X}")
else:
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
print()
else:
raise ValueError(f"Unhandled output format: {format}")
def _list_resource_file(rf: api.ResourceFile, *, sort: bool, group: str, decompress: bool) -> None:
if rf.header_system_data != bytes(len(rf.header_system_data)):
print("Header system data:")
_hexdump(rf.header_system_data)
if rf.header_application_data != bytes(len(rf.header_application_data)):
print("Header application data:")
_hexdump(rf.header_application_data)
attrs = _decompose_flags(rf.file_attributes)
if attrs:
print("File attributes: " + " | ".join(attr.name for attr in attrs))
if len(rf) == 0:
print("No resources (empty resource file)")
return
if group == "none":
all_resources = []
for reses in rf.values():
all_resources.extend(reses.values())
if sort:
all_resources.sort(key=lambda res: (res.type, res.id))
print(f"{len(all_resources)} resources:")
for res in all_resources:
print(_describe_resource(res, include_type=True, decompress=decompress))
elif group == "type":
print(f"{len(rf)} resource types:")
restype_items = rf.items()
if sort:
restype_items = sorted(restype_items, key=lambda item: item[0])
for typecode, resources in restype_items:
restype = _bytes_escape(typecode, quote="'")
print(f"'{restype}': {len(resources)} resources:")
resources_items = resources.items()
if sort:
resources_items = sorted(resources_items, key=lambda item: item[0])
for resid, res in resources_items:
print(_describe_resource(res, include_type=False, decompress=decompress))
print()
elif group == "id":
all_resources = []
for reses in rf.values():
all_resources.extend(reses.values())
all_resources.sort(key=lambda res: res.id)
resources_by_id = {resid: list(reses) for resid, reses in itertools.groupby(all_resources, key=lambda res: res.id)}
print(f"{len(resources_by_id)} resource IDs:")
for resid, resources in resources_by_id.items():
print(f"({resid}): {len(resources)} resources:")
if sort:
resources.sort(key=lambda res: res.type)
for res in resources:
print(_describe_resource(res, include_type=True, decompress=decompress))
print()
else:
raise AssertionError(f"Unhandled group mode: {group!r}")
def main():
ns = _parse_args()
if ns.file == "-":
if ns.fork is not None:
@ -254,17 +434,7 @@ def main():
else:
data = rf.header_application_data
if ns.format == "dump":
_hexdump(data)
elif ns.format == "hex":
_raw_hexdump(data)
elif ns.format == "raw":
sys.stdout.buffer.write(data)
elif ns.format == "derez":
print("Cannot output file header data in derez format", file=sys.stderr)
sys.exit(1)
else:
raise ValueError(f"Unhandled output format: {ns.format}")
_show_header_data(data, format=ns.format)
elif ns.filter or ns.all:
if ns.filter:
resources = _filter_resources(rf, ns.filter)
@ -273,108 +443,12 @@ def main():
for reses in rf.values():
resources.extend(reses.values())
if not resources:
if ns.format == "dump":
print("No resources matched the filter")
elif ns.format in ("hex", "raw"):
print("No resources matched the filter", file=sys.stderr)
sys.exit(1)
elif ns.format == "derez":
print("/* No resources matched the filter */")
else:
raise AssertionError(f"Unhandled output format: {ns.format}")
elif ns.format in ("hex", "raw") and len(resources) != 1:
print(f"Format {ns.format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
sys.exit(1)
if ns.sort:
resources.sort(key=lambda res: (res.type, res.id))
for res in resources:
if ns.decompress:
data = res.data
else:
data = res.data_raw
if ns.format == "dump":
# Human-readable info and hex dump
desc = _describe_resource(res, include_type=True, decompress=ns.decompress)
print(f"Resource {desc}:")
_hexdump(data)
print()
elif ns.format == "hex":
# Data only as hex
_raw_hexdump(data)
elif ns.format == "raw":
# Data only as raw bytes
sys.stdout.buffer.write(data)
elif ns.format == "derez":
# Like DeRez with no resource definitions
attrs = list(_decompose_flags(res.attributes))
if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
attrs.remove(api.ResourceAttrs.resCompressed)
attrs_comment = " /* was compressed */"
else:
attrs_comment = ""
attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
if None in attr_descs:
attr_descs[:] = [f"${res.attributes.value:02X}"]
parts = [str(res.resource_id)]
if res.name is not None:
name = _bytes_escape(res.name, quote='"')
parts.append(f'"{name}"')
parts += attr_descs
restype = _bytes_escape(res.resource_type, quote="'")
print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
for i in range(0, len(data), 16):
# Two-byte grouping is really annoying to implement.
groups = []
for j in range(0, 16, 2):
if i+j >= len(data):
break
elif i+j+1 >= len(data):
groups.append(f"{data[i+j]:02X}")
else:
groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
s = f'$"{" ".join(groups)}"'
comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
print(f"\t{s:<54s}{comment}")
print("};")
print()
else:
raise ValueError(f"Unhandled output format: {ns.format}")
_show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
else:
if rf.header_system_data != bytes(len(rf.header_system_data)):
print("Header system data:")
_hexdump(rf.header_system_data)
if rf.header_application_data != bytes(len(rf.header_application_data)):
print("Header application data:")
_hexdump(rf.header_application_data)
attrs = _decompose_flags(rf.file_attributes)
if attrs:
print("File attributes: " + " | ".join(attr.name for attr in attrs))
if len(rf) > 0:
print(f"{len(rf)} resource types:")
for typecode, resources in rf.items():
restype = _bytes_escape(typecode, quote="'")
print(f"'{restype}': {len(resources)} resources:")
for resid, res in rf[typecode].items():
print(_describe_resource(res, include_type=False, decompress=ns.decompress))
print()
else:
print("No resource types (empty resource file)")
_list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
sys.exit(0)

View File

@ -96,15 +96,15 @@ class ResourceAttrs(enum.Flag):
class Resource(object):
"""A single resource from a resource file."""
__slots__ = ("resource_type", "resource_id", "name", "attributes", "data_raw", "_data_decompressed")
__slots__ = ("type", "id", "name", "attributes", "data_raw", "_data_decompressed")
def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes):
"""Create a new resource with the given type code, ID, name, attributes, and data."""
super().__init__()
self.resource_type: bytes = resource_type
self.resource_id: int = resource_id
self.type: bytes = resource_type
self.id: int = resource_id
self.name: typing.Optional[bytes] = name
self.attributes: ResourceAttrs = attributes
self.data_raw: bytes = data_raw
@ -126,7 +126,17 @@ class Resource(object):
if not decompress_ok:
data_repr = f"<decompression failed - compressed data: {data_repr}>"
return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
return f"{type(self).__module__}.{type(self).__qualname__}(type={self.type}, id={self.id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
@property
def resource_type(self) -> bytes:
warnings.warn(DeprecationWarning("The resource_type attribute has been deprecated and will be removed in a future version. Please use the type attribute instead."))
return self.type
@property
def resource_id(self) -> int:
warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
return self.id
@property
def data(self) -> bytes: