Release version 1.3.0

Enable --sort by default and add --no-sort to disable sorting
In most cases the file order is not important and the unsorted output hurts readability. The performance impact of sorting is relatively small and barely noticeable even with large resource files.
2025-07-01 17:23:51 +00:00 · 2019-09-16 16:34:40 +02:00 · 2019-09-16 15:25:41 +02:00 · 2019-09-16 14:58:21 +02:00 · 2019-09-15 15:56:03 +02:00 · 2019-09-15 15:38:01 +02:00
4 changed files with 223 additions and 130 deletions
--- a/README.rst
+++ b/README.rst
@ -56,7 +56,7 @@ Simple example
    >>> rf
    <rsrcfork.ResourceFile at 0x1046e6048, attributes ResourceFileAttrs.0, containing 4 resource types: [b'utxt', b'utf8', b'TEXT', b'drag']>
    >>> rf[b"TEXT"]
-    <rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(resource_type=b'TEXT', resource_id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>
+    <rsrcfork.ResourceFile._LazyResourceMap at 0x10470ed30 containing one resource: rsrcfork.Resource(type=b'TEXT', id=256, name=None, attributes=ResourceAttrs.0, data=b'Here is some text')>

 Automatic selection of data/resource fork
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -127,6 +127,15 @@ If these links are no longer functional, some are archived in the `Internet Arch
 Changelog
 ---------

+Version 1.3.0
+^^^^^^^^^^^^^
+
+* Added a ``--group`` command line option to group resources in list format by type (the default), ID, or with no grouping.
+* Added a ``dump-text`` output format to the command line tool. This format is identical to ``dump``, but instead of a hex dump, it outputs the resource data as text. The data is decoded as MacRoman and classic Mac newlines (``\r``) are translated. This is useful for examining resources that contain mostly plain text.
+* Changed the command line tool to sort resources by type and ID, and added a ``--no-sort`` option to disable sorting and output resources in file order (which was the previous behavior).
+* Renamed the ``rsrcfork.Resource`` attributes ``resource_type`` and ``resource_id`` to ``type`` and ``id``, respectively. The old names have been deprecated and will be removed in the future, but are still supported for now.
+* Changed ``--format=dump`` output to match ``hexdump -C``'s format - spacing has been adjusted, and multiple subsequent identical lines are collapsed into a single ``*``.
+
 Version 1.2.0
 ^^^^^^^^^^^^^

--- a/rsrcfork/init.py
+++ b/rsrcfork/init.py
@ -1,6 +1,6 @@
 """A pure Python, cross-platform library/tool for reading Macintosh resource data, as stored in resource forks and ``.rsrc`` files."""

-__version__ = "1.2.0"
+__version__ = "1.3.0"

 __all__ = [
 	"Resource",
--- a/rsrcfork/main.py
+++ b/rsrcfork/main.py
@ -1,6 +1,7 @@
 import argparse
 import collections
 import enum
+import itertools
 import sys
 import textwrap
 import typing
@ -92,7 +93,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
 				continue
 			
 			for res in resources.values():
-				matching[res.resource_type, res.resource_id] = res
+				matching[res.type, res.id] = res
 		elif filter[0] == filter[-1] == "'":
 			try:
 				resources = rf[_bytes_unescape(filter[1:-1])]
@ -100,7 +101,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
 				continue
 			
 			for res in resources.values():
-				matching[res.resource_type, res.resource_id] = res
+				matching[res.type, res.id] = res
 		else:
 			pos = filter.find("'", 1)
 			if pos == -1:
@ -133,7 +134,7 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
 				
 				for res in resources.values():
 					if res.name == name:
-						matching[res.resource_type, res.resource_id] = res
+						matching[res.type, res.id] = res
 						break
 			elif ":" in resid:
 				if resid.count(":") > 1:
@ -142,24 +143,36 @@ def _filter_resources(rf: api.ResourceFile, filters: typing.Sequence[str]) -> ty
 				start, end = int(start), int(end)
 				
 				for res in resources.values():
-					if start <= res.resource_id <= end:
-						matching[res.resource_type, res.resource_id] = res
+					if start <= res.id <= end:
+						matching[res.type, res.id] = res
 			else:
 				resid = int(resid)
 				try:
 					res = resources[resid]
 				except KeyError:
 					continue
-				matching[res.resource_type, res.resource_id] = res
+				matching[res.type, res.id] = res
 	
 	return list(matching.values())

 def _hexdump(data: bytes):
+	last_line = None
+	asterisk_shown = False
 	for i in range(0, len(data), 16):
 		line = data[i:i + 16]
-		line_hex = " ".join(f"{byte:02x}" for byte in line)
-		line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
-		print(f"{i:08x} {line_hex:<{16*2+15}} |{line_char}|")
+		# If the same 16-byte lines appear multiple times, print only the first one, and replace all further lines with a single line with an asterisk.
+		# This is unambiguous - to find out how many lines were collapsed this way, the user can compare the addresses of the lines before and after the asterisk.
+		if line == last_line:
+			if not asterisk_shown:
+				print("*")
+				asterisk_shown = True
+		else:
+			line_hex_left = " ".join(f"{byte:02x}" for byte in line[:8])
+			line_hex_right = " ".join(f"{byte:02x}" for byte in line[8:])
+			line_char = line.decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES)
+			print(f"{i:08x}  {line_hex_left:<{8*2+7}}  {line_hex_right:<{8*2+7}}  |{line_char}|")
+			asterisk_shown = False
+		last_line = line
 	
 	if data:
 		print(f"{len(data):08x}")
@ -168,8 +181,11 @@ def _raw_hexdump(data: bytes):
 	for i in range(0, len(data), 16):
 		print(" ".join(f"{byte:02x}" for byte in data[i:i + 16]))

+def _translate_text(data: bytes) -> str:
+	return data.decode(_TEXT_ENCODING).replace("\r", "\n")
+
 def _describe_resource(res: api.Resource, *, include_type: bool, decompress: bool) -> str:
-	id_desc_parts = [f"{res.resource_id}"]
+	id_desc_parts = [f"{res.id}"]
 	
 	if res.name is not None:
 		name = _bytes_escape(res.name, quote='"')
@ -198,11 +214,11 @@ def _describe_resource(res: api.Resource, *, include_type: bool, decompress: boo
 	
 	desc = f"({id_desc}): {content_desc}"
 	if include_type:
-		restype = _bytes_escape(res.resource_type, quote="'")
+		restype = _bytes_escape(res.type, quote="'")
 		desc = f"'{restype}' {desc}"
 	return desc

-def main():
+def _parse_args() -> argparse.Namespace:
 	ap = argparse.ArgumentParser(
 		add_help=False,
 		fromfile_prefix_chars="@",
@ -229,7 +245,9 @@ def main():
 	ap.add_argument("-a", "--all", action="store_true", help="When no filters are given, show all resources in full, instead of an overview")
 	ap.add_argument("-f", "--fork", choices=["auto", "data", "rsrc"], default="auto", help="The fork from which to read the resource data, or auto to guess (default: %(default)s)")
 	ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output compressed resource data as-is")
-	ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
+	ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
+	ap.add_argument("--group", action="store", choices=["none", "type", "id"], default="type", help="Group resources in list view by type or ID, or disable grouping (default: type)")
+	ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID")
 	ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
 	ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
 	
@ -237,6 +255,168 @@ def main():
 	ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
 	
 	ns = ap.parse_args()
+	return ns
+
+def _show_header_data(data: bytes, *, format: str) -> None:
+	if format == "dump":
+		_hexdump(data)
+	elif format == "dump-text":
+		print(_translate_text(data))
+	elif format == "hex":
+		_raw_hexdump(data)
+	elif format == "raw":
+		sys.stdout.buffer.write(data)
+	elif format == "derez":
+		print("Cannot output file header data in derez format", file=sys.stderr)
+		sys.exit(1)
+	else:
+		raise ValueError(f"Unhandled output format: {format}")
+
+def _show_filtered_resources(resources: typing.Sequence[api.Resource], format: str, decompress: bool) -> None:
+	if not resources:
+		if format in ("dump", "dump-text"):
+			print("No resources matched the filter")
+		elif format in ("hex", "raw"):
+			print("No resources matched the filter", file=sys.stderr)
+			sys.exit(1)
+		elif format == "derez":
+			print("/* No resources matched the filter */")
+		else:
+			raise AssertionError(f"Unhandled output format: {format}")
+	elif format in ("hex", "raw") and len(resources) != 1:
+		print(f"Format {format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
+		sys.exit(1)
+	
+	for res in resources:
+		if decompress:
+			data = res.data
+		else:
+			data = res.data_raw
+		
+		if format in ("dump", "dump-text"):
+			# Human-readable info and hex or text dump
+			desc = _describe_resource(res, include_type=True, decompress=decompress)
+			print(f"Resource {desc}:")
+			if format == "dump":
+				_hexdump(data)
+			elif format == "dump-text":
+				print(_translate_text(data))
+			else:
+				raise AssertionError(f"Unhandled format: {format!r}")
+			print()
+		elif format == "hex":
+			# Data only as hex
+			
+			_raw_hexdump(data)
+		elif format == "raw":
+			# Data only as raw bytes
+			
+			sys.stdout.buffer.write(data)
+		elif format == "derez":
+			# Like DeRez with no resource definitions
+			
+			attrs = list(_decompose_flags(res.attributes))
+			
+			if decompress and api.ResourceAttrs.resCompressed in attrs:
+				attrs.remove(api.ResourceAttrs.resCompressed)
+				attrs_comment = " /* was compressed */"
+			else:
+				attrs_comment = ""
+			
+			attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
+			if None in attr_descs:
+				attr_descs[:] = [f"${res.attributes.value:02X}"]
+			
+			parts = [str(res.id)]
+			
+			if res.name is not None:
+				name = _bytes_escape(res.name, quote='"')
+				parts.append(f'"{name}"')
+			
+			parts += attr_descs
+			
+			restype = _bytes_escape(res.type, quote="'")
+			print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
+			
+			for i in range(0, len(data), 16):
+				# Two-byte grouping is really annoying to implement.
+				groups = []
+				for j in range(0, 16, 2):
+					if i+j >= len(data):
+						break
+					elif i+j+1 >= len(data):
+						groups.append(f"{data[i+j]:02X}")
+					else:
+						groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
+				
+				s = f'$"{" ".join(groups)}"'
+				comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
+				print(f"\t{s:<54s}{comment}")
+			
+			print("};")
+			print()
+		else:
+			raise ValueError(f"Unhandled output format: {format}")
+
+def _list_resource_file(rf: api.ResourceFile, *, sort: bool, group: str, decompress: bool) -> None:
+	if rf.header_system_data != bytes(len(rf.header_system_data)):
+		print("Header system data:")
+		_hexdump(rf.header_system_data)
+	
+	if rf.header_application_data != bytes(len(rf.header_application_data)):
+		print("Header application data:")
+		_hexdump(rf.header_application_data)
+	
+	attrs = _decompose_flags(rf.file_attributes)
+	if attrs:
+		print("File attributes: " + " | ".join(attr.name for attr in attrs))
+	
+	if len(rf) == 0:
+		print("No resources (empty resource file)")
+		return
+	
+	if group == "none":
+		all_resources = []
+		for reses in rf.values():
+			all_resources.extend(reses.values())
+		if sort:
+			all_resources.sort(key=lambda res: (res.type, res.id))
+		print(f"{len(all_resources)} resources:")
+		for res in all_resources:
+			print(_describe_resource(res, include_type=True, decompress=decompress))
+	elif group == "type":
+		print(f"{len(rf)} resource types:")
+		restype_items = rf.items()
+		if sort:
+			restype_items = sorted(restype_items, key=lambda item: item[0])
+		for typecode, resources in restype_items:
+			restype = _bytes_escape(typecode, quote="'")
+			print(f"'{restype}': {len(resources)} resources:")
+			resources_items = resources.items()
+			if sort:
+				resources_items = sorted(resources_items, key=lambda item: item[0])
+			for resid, res in resources_items:
+				print(_describe_resource(res, include_type=False, decompress=decompress))
+			print()
+	elif group == "id":
+		all_resources = []
+		for reses in rf.values():
+			all_resources.extend(reses.values())
+		all_resources.sort(key=lambda res: res.id)
+		resources_by_id = {resid: list(reses) for resid, reses in itertools.groupby(all_resources, key=lambda res: res.id)}
+		print(f"{len(resources_by_id)} resource IDs:")
+		for resid, resources in resources_by_id.items():
+			print(f"({resid}): {len(resources)} resources:")
+			if sort:
+				resources.sort(key=lambda res: res.type)
+			for res in resources:
+				print(_describe_resource(res, include_type=True, decompress=decompress))
+			print()
+	else:
+		raise AssertionError(f"Unhandled group mode: {group!r}")
+
+def main():
+	ns = _parse_args()
 	
 	if ns.file == "-":
 		if ns.fork is not None:
@ -254,17 +434,7 @@ def main():
 			else:
 				data = rf.header_application_data
 			
-			if ns.format == "dump":
-				_hexdump(data)
-			elif ns.format == "hex":
-				_raw_hexdump(data)
-			elif ns.format == "raw":
-				sys.stdout.buffer.write(data)
-			elif ns.format == "derez":
-				print("Cannot output file header data in derez format", file=sys.stderr)
-				sys.exit(1)
-			else:
-				raise ValueError(f"Unhandled output format: {ns.format}")
+			_show_header_data(data, format=ns.format)
 		elif ns.filter or ns.all:
 			if ns.filter:
 				resources = _filter_resources(rf, ns.filter)
@ -273,108 +443,12 @@ def main():
 				for reses in rf.values():
 					resources.extend(reses.values())
 			
-			if not resources:
-				if ns.format == "dump":
-					print("No resources matched the filter")
-				elif ns.format in ("hex", "raw"):
-					print("No resources matched the filter", file=sys.stderr)
-					sys.exit(1)
-				elif ns.format == "derez":
-					print("/* No resources matched the filter */")
-				else:
-					raise AssertionError(f"Unhandled output format: {ns.format}")
-			elif ns.format in ("hex", "raw") and len(resources) != 1:
-				print(f"Format {ns.format} can only output a single resource, but the filter matched {len(resources)} resources", file=sys.stderr)
-				sys.exit(1)
+			if ns.sort:
+				resources.sort(key=lambda res: (res.type, res.id))
 			
-			for res in resources:
-				if ns.decompress:
-					data = res.data
-				else:
-					data = res.data_raw
-				
-				if ns.format == "dump":
-					# Human-readable info and hex dump
-					desc = _describe_resource(res, include_type=True, decompress=ns.decompress)
-					print(f"Resource {desc}:")
-					_hexdump(data)
-					print()
-				elif ns.format == "hex":
-					# Data only as hex
-					
-					_raw_hexdump(data)
-				elif ns.format == "raw":
-					# Data only as raw bytes
-					
-					sys.stdout.buffer.write(data)
-				elif ns.format == "derez":
-					# Like DeRez with no resource definitions
-					
-					attrs = list(_decompose_flags(res.attributes))
-					
-					if ns.decompress and api.ResourceAttrs.resCompressed in attrs:
-						attrs.remove(api.ResourceAttrs.resCompressed)
-						attrs_comment = " /* was compressed */"
-					else:
-						attrs_comment = ""
-					
-					attr_descs = [_REZ_ATTR_NAMES[attr] for attr in attrs]
-					if None in attr_descs:
-						attr_descs[:] = [f"${res.attributes.value:02X}"]
-					
-					parts = [str(res.resource_id)]
-					
-					if res.name is not None:
-						name = _bytes_escape(res.name, quote='"')
-						parts.append(f'"{name}"')
-					
-					parts += attr_descs
-					
-					restype = _bytes_escape(res.resource_type, quote="'")
-					print(f"data '{restype}' ({', '.join(parts)}{attrs_comment}) {{")
-					
-					for i in range(0, len(data), 16):
-						# Two-byte grouping is really annoying to implement.
-						groups = []
-						for j in range(0, 16, 2):
-							if i+j >= len(data):
-								break
-							elif i+j+1 >= len(data):
-								groups.append(f"{data[i+j]:02X}")
-							else:
-								groups.append(f"{data[i+j]:02X}{data[i+j+1]:02X}")
-						
-						s = f'$"{" ".join(groups)}"'
-						comment = "/* " + data[i:i + 16].decode(_TEXT_ENCODING).translate(_TRANSLATE_NONPRINTABLES) + " */"
-						print(f"\t{s:<54s}{comment}")
-					
-					print("};")
-					print()
-				else:
-					raise ValueError(f"Unhandled output format: {ns.format}")
+			_show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)
 		else:
-			if rf.header_system_data != bytes(len(rf.header_system_data)):
-				print("Header system data:")
-				_hexdump(rf.header_system_data)
-			
-			if rf.header_application_data != bytes(len(rf.header_application_data)):
-				print("Header application data:")
-				_hexdump(rf.header_application_data)
-			
-			attrs = _decompose_flags(rf.file_attributes)
-			if attrs:
-				print("File attributes: " + " | ".join(attr.name for attr in attrs))
-			
-			if len(rf) > 0:
-				print(f"{len(rf)} resource types:")
-				for typecode, resources in rf.items():
-					restype = _bytes_escape(typecode, quote="'")
-					print(f"'{restype}': {len(resources)} resources:")
-					for resid, res in rf[typecode].items():
-						print(_describe_resource(res, include_type=False, decompress=ns.decompress))
-					print()
-			else:
-				print("No resource types (empty resource file)")
+			_list_resource_file(rf, sort=ns.sort, group=ns.group, decompress=ns.decompress)
 	
 	sys.exit(0)

--- a/rsrcfork/api.py
+++ b/rsrcfork/api.py
@ -96,15 +96,15 @@ class ResourceAttrs(enum.Flag):
 class Resource(object):
 	"""A single resource from a resource file."""
 	
-	__slots__ = ("resource_type", "resource_id", "name", "attributes", "data_raw", "_data_decompressed")
+	__slots__ = ("type", "id", "name", "attributes", "data_raw", "_data_decompressed")
 	
 	def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes):
 		"""Create a new resource with the given type code, ID, name, attributes, and data."""
 		
 		super().__init__()
 		
-		self.resource_type: bytes = resource_type
-		self.resource_id: int = resource_id
+		self.type: bytes = resource_type
+		self.id: int = resource_id
 		self.name: typing.Optional[bytes] = name
 		self.attributes: ResourceAttrs = attributes
 		self.data_raw: bytes = data_raw
@ -126,7 +126,17 @@ class Resource(object):
 		if not decompress_ok:
 			data_repr = f"<decompression failed - compressed data: {data_repr}>"
 		
-		return f"{type(self).__module__}.{type(self).__qualname__}(resource_type={self.resource_type}, resource_id={self.resource_id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
+		return f"{type(self).__module__}.{type(self).__qualname__}(type={self.type}, id={self.id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
+	
+	@property
+	def resource_type(self) -> bytes:
+		warnings.warn(DeprecationWarning("The resource_type attribute has been deprecated and will be removed in a future version. Please use the type attribute instead."))
+		return self.type
+	
+	@property
+	def resource_id(self) -> int:
+		warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
+		return self.id
 	
 	@property
 	def data(self) -> bytes:
Author	SHA1	Message	Date
dgelessus	7207b1d32b	Release version 1.3.0	2019-09-16 16:34:40 +02:00
dgelessus	1de940d597	Enable --sort by default and add --no-sort to disable sorting In most cases the file order is not important and the unsorted output hurts readability. The performance impact of sorting is relatively small and barely noticeable even with large resource files.	2019-09-16 15:25:41 +02:00
dgelessus	d7255bc977	Adjust --group=id output format slightly	2019-09-16 14:58:21 +02:00
dgelessus	c6337bdfbd	Rename resource_type and resource_id attributes to type and id The old names were chosen to avoid conflicts with Python's type and id builtins, but for attribute names this is not necessary.	2019-09-15 15:56:03 +02:00
dgelessus	f4c2717720	Add command-line --group option	2019-09-15 15:38:01 +02:00
dgelessus	8ad0234633	Add command-line --sort option	2019-09-13 15:00:56 +02:00
dgelessus	7612322c43	Add dump-text output format on command line	2019-09-13 14:51:16 +02:00
dgelessus	51ae7c6a09	Refactor __main__.main into smaller functions	2019-09-13 14:17:21 +02:00
dgelessus	194c886472	Change hex dump output format to match hexdump -C	2019-09-13 10:51:27 +02:00
dgelessus	b2fa5f8b0f	Collapse multiple subsequent identical lines in hex dumps	2019-09-13 10:40:03 +02:00
dgelessus	752ec9e828	Bump version to 1.2.1.dev	2019-09-13 10:22:43 +02:00