Release version 1.7.0

Remove incorrect outdated paragraph from list subcommand help
Change formatting of command help strings in source code
2025-07-03 00:23:56 +00:00 · 2019-12-17 11:28:26 +01:00 · 2019-12-10 16:15:18 +01:00 · 2019-12-10 15:58:20 +01:00 · 2019-12-04 23:36:57 +01:00 · 2019-12-04 23:36:56 +01:00
5 changed files with 213 additions and 125 deletions
--- a/README.rst
+++ b/README.rst
@ -113,6 +113,16 @@ For technical info and documentation about resource files and resources, see the
 Changelog
 ---------

+Version 1.7
+^^^^^^^^^^^
+
+* Added a ``raw-decompress`` subcommand to decompress compressed resource data stored in a standalone file rather than as a resource.
+* Optimized lazy loading of ``Resource`` objects. Previously, resource data would be read from disk whenever a ``Resource`` object was looked up, even if the data itself is never used. Now the resource data is only loaded once the ``data`` (or ``data_raw``) attribute is accessed.
+
+  * The same optimization applies to the ``name`` attribute, although this is unlikely to make a difference in practice.
+  * As a result, it is no longer possible to construct ``Resource`` objects without a resource file. This was previously possible, but had no practical use.
+* Fixed a small error in the ``'dcmp' (0)`` decompression implementation.
+
 Version 1.6.0
 ^^^^^^^^^^^^^

--- a/rsrcfork/init.py
+++ b/rsrcfork/init.py
@ -20,7 +20,7 @@
 # * Add a new empty section for the next version to the README.rst changelog.
 # * Commit and push the changes to master.

-__version__ = "1.6.0"
+__version__ = "1.7.0"

 __all__ = [
 	"Resource",
--- a/rsrcfork/main.py
+++ b/rsrcfork/main.py
@ -483,7 +483,7 @@ def make_argument_parser(*, description: str, **kwargs: typing.Any) -> argparse.
 	
 	ap = argparse.ArgumentParser(
 		formatter_class=argparse.RawDescriptionHelpFormatter,
-		description=textwrap.dedent(description),
+		description=description,
 		allow_abbrev=False,
 		add_help=False,
 		**kwargs,
@ -521,21 +521,21 @@ def do_read_header(prog: str, args: typing.List[str]) -> typing.NoReturn:
 	ap = make_argument_parser(
 		prog=prog,
 		description="""
-		Read and output a resource file's header data.
-		
-		The header data consists of two parts:
-		
-		The system-reserved data is 112 bytes long and used by the Classic Mac OS
-		Finder as temporary storage space. It usually contains parts of the
-		file metadata (name, type/creator code, etc.).
-		
-		The application-specific data is 128 bytes long and is available for use by
-		applications. In practice it usually contains junk data that happened to be in
-		memory when the resource file was written.
-		
-		Mac OS X does not use the header data fields anymore. Resource files written
-		on Mac OS X normally have both parts of the header data set to all zero bytes.
-		""",
+Read and output a resource file's header data.
+
+The header data consists of two parts:
+
+The system-reserved data is 112 bytes long and used by the Classic Mac OS
+Finder as temporary storage space. It usually contains parts of the
+file metadata (name, type/creator code, etc.).
+
+The application-specific data is 128 bytes long and is available for use by
+applications. In practice it usually contains junk data that happened to be in
+memory when the resource file was written.
+
+Mac OS X does not use the header data fields anymore. Resource files written
+on Mac OS X normally have both parts of the header data set to all zero bytes.
+""",
 	)
 	
 	ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw"], default="dump", help="How to output the header data: human-readable info with hex dump (dump) (default), human-readable info with newline-translated data (dump-text), data only as hex (hex), or data only as raw bytes (raw). Default: %(default)s")
@ -586,8 +586,8 @@ def do_info(prog: str, args: typing.List[str]) -> typing.NoReturn:
 	ap = make_argument_parser(
 		prog=prog,
 		description="""
-		Display technical information and stats about the resource file.
-		""",
+Display technical information and stats about the resource file.
+""",
 	)
 	add_resource_file_args(ap)
 	
@ -618,16 +618,13 @@ def do_list(prog: str, args: typing.List[str]) -> typing.NoReturn:
 	ap = make_argument_parser(
 		prog=prog,
 		description="""
-		List the resources stored in a resource file.
-		
-		Each resource's type, ID, name (if any), attributes (if any), and data length
-		are displayed. For compressed resources, the compressed and decompressed data
-		length are displayed, as well as the ID of the 'dcmp' resource used to
-		decompress the resource data.
-		
-		If the resource file has any global (resource map) attributes or non-zero
-		header data, they are displayed before the list of resources.
-		""",
+List the resources stored in a resource file.
+
+Each resource's type, ID, name (if any), attributes (if any), and data length
+are displayed. For compressed resources, the compressed and decompressed data
+length are displayed, as well as the ID of the 'dcmp' resource used to
+decompress the resource data.
+""",
 	)
 	
 	ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not parse the data header of compressed resources and only output their compressed length.")
@ -646,29 +643,29 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
 	ap = make_argument_parser(
 		prog=prog,
 		description="""
-		Read the data of one or more resources.
-		
-		The resource filters use syntax similar to Rez (resource definition) files.
-		Each filter can have one of the following forms:
-		
-		An unquoted type name (without escapes): TYPE
-		A quoted type name: 'TYPE'
-		A quoted type name and an ID: 'TYPE' (42)
-		A quoted type name and an ID range: 'TYPE' (24:42)
-		A quoted type name and a resource name: 'TYPE' ("foobar")
-		
-		Note that the resource filter syntax uses quotes, parentheses and spaces,
-		which have special meanings in most shells. It is recommended to quote each
-		resource filter (using double quotes) to ensure that it is not interpreted
-		or rewritten by the shell.
-		""",
+Read the data of one or more resources.
+
+The resource filters use syntax similar to Rez (resource definition) files.
+Each filter can have one of the following forms:
+
+An unquoted type name (without escapes): TYPE
+A quoted type name: 'TYPE'
+A quoted type name and an ID: 'TYPE' (42)
+A quoted type name and an ID range: 'TYPE' (24:42)
+A quoted type name and a resource name: 'TYPE' ("foobar")
+
+Note that the resource filter syntax uses quotes, parentheses and spaces,
+which have special meanings in most shells. It is recommended to quote each
+resource filter (using double quotes) to ensure that it is not interpreted
+or rewritten by the shell.
+""",
 	)
 	
 	ap.add_argument("--no-decompress", action="store_false", dest="decompress", help="Do not decompress compressed resources, output the raw compressed resource data.")
 	ap.add_argument("--format", choices=["dump", "dump-text", "hex", "raw", "derez"], default="dump", help="How to output the resources: human-readable info with hex dump (dump), human-readable info with newline-translated data (dump-text), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez). Default: %(default)s")
 	ap.add_argument("--no-sort", action="store_false", dest="sort", help="Output resources in the order in which they are stored in the file, instead of sorting them by type and ID.")
 	add_resource_file_args(ap)
-	ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters ae specified, all resources are read.")
+	ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to read. If no filters are specified, all resources are read.")
 	
 	ns = ap.parse_args(args)
 	
@ -685,12 +682,73 @@ def do_read(prog: str, args: typing.List[str]) -> typing.NoReturn:
 		
 		show_filtered_resources(resources, format=ns.format, decompress=ns.decompress)

+def do_raw_decompress(prog: str, args: typing.List[str]) -> typing.NoReturn:
+	"""Decompress raw compressed resource data."""
+	
+	ap = make_argument_parser(
+		prog=prog,
+		description="""
+Decompress raw compressed resource data that is stored in a standalone file
+and not as a resource in a resource file.
+
+This subcommand can be used in a shell pipeline by passing - as the input and
+output file name, i. e. "%(prog)s - -".
+
+Note: All other rsrcfork subcommands natively support compressed resources and
+will automatically decompress them as needed. This subcommand is only needed
+to decompress resource data that has been read from a resource file in
+compressed form (e. g. using --no-decompress or another tool that does not
+handle resource compression).
+""",
+	)
+	
+	ap.add_argument("--debug", action="store_true", help="Display debugging output from the decompressor on stdout. Cannot be used if the output file is - (stdout).")
+	
+	ap.add_argument("input_file", help="The file from which to read the compressed resource data, or - for stdin.")
+	ap.add_argument("output_file", help="The file to which to write the decompressed resource data, or - for stdout.")
+	
+	ns = ap.parse_args(args)
+	
+	if ns.input_file == "-":
+		in_stream = sys.stdin.buffer
+		close_in_stream = False
+	else:
+		in_stream = open(ns.input_file, "rb")
+		close_in_stream = True
+	
+	try:
+		header_info = compress.CompressedHeaderInfo.parse_stream(in_stream)
+		
+		# Open the output file only after parsing the header, so that the file is only created (or its existing contents deleted) if the input file is valid.
+		if ns.output_file == "-":
+			if ns.debug:
+				print("Cannot use --debug if the decompression output file is - (stdout).", file=sys.stderr)
+				print("The debug output goes to stdout and would conflict with the decompressed data.", file=sys.stderr)
+				sys.exit(2)
+			
+			out_stream = sys.stdout.buffer
+			close_out_stream = False
+		else:
+			out_stream = open(ns.output_file, "wb")
+			close_out_stream = True
+		
+		try:
+			for chunk in compress.decompress_stream_parsed(header_info, in_stream, debug=ns.debug):
+				out_stream.write(chunk)
+		finally:
+			if close_out_stream:
+				out_stream.close()
+	finally:
+		if close_in_stream:
+			in_stream.close()
+

 SUBCOMMANDS = {
 	"read-header": do_read_header,
 	"info": do_info,
 	"list": do_list,
 	"read": do_read,
+	"raw-decompress": do_raw_decompress,
 }


@ -743,16 +801,16 @@ def main() -> typing.NoReturn:
 		# Custom usage string to make "subcommand ..." show up in the usage, but not as "positional arguments" in the main help text.
 		usage=f"{prog} (--help | --version | subcommand ...)",
 		description="""
-		%(prog)s is a tool for working with Classic Mac OS resource files.
-		Currently this tool can only read resource files; modifying/writing resource
-		files is not supported yet.
-		
-		Note: This tool is intended for human users. The output format is not
-		machine-readable and may change at any time. The command-line syntax usually
-		does not change much across versions, but this should not be relied on.
-		Automated scripts and programs should use the Python API provided by the
-		rsrcfork library, which this tool is a part of.
-		""",
+%(prog)s is a tool for working with Classic Mac OS resource files.
+Currently this tool can only read resource files; modifying/writing resource
+files is not supported yet.
+
+Note: This tool is intended for human users. The output format is not
+machine-readable and may change at any time. The command-line syntax usually
+does not change much across versions, but this should not be relied on.
+Automated scripts and programs should use the Python API provided by the
+rsrcfork library, which this tool is a part of.
+""",
 		# The list of subcommands is shown in the epilog so that it appears under the list of optional arguments.
 		epilog=format_subcommands_help(),
 	)
--- a/rsrcfork/api.py
+++ b/rsrcfork/api.py
@ -97,24 +97,31 @@ class ResourceAttrs(enum.Flag):
 class Resource(object):
 	"""A single resource from a resource file."""
 	
+	_resfile: "ResourceFile"
 	type: bytes
 	id: int
-	name: typing.Optional[bytes]
+	name_offset: int
+	_name: typing.Optional[bytes]
 	attributes: ResourceAttrs
-	data_raw: bytes
+	data_raw_offset: int
+	_data_raw: bytes
 	_compressed_info: compress.common.CompressedHeaderInfo
 	_data_decompressed: bytes
 	
-	def __init__(self, resource_type: bytes, resource_id: int, name: typing.Optional[bytes], attributes: ResourceAttrs, data_raw: bytes) -> None:
-		"""Create a new resource with the given type code, ID, name, attributes, and data."""
+	def __init__(self, resfile: "ResourceFile", resource_type: bytes, resource_id: int, name_offset: int, attributes: ResourceAttrs, data_raw_offset: int) -> None:
+		"""Create a resource object representing a resource stored in a resource file.
+		
+		External code should not call this constructor manually. Resources should be looked up through a ResourceFile object instead.
+		"""
 		
 		super().__init__()
 		
+		self._resfile = resfile
 		self.type = resource_type
 		self.id = resource_id
-		self.name = name
+		self.name_offset = name_offset
 		self.attributes = attributes
-		self.data_raw = data_raw
+		self.data_raw_offset = data_raw_offset
 	
 	def __repr__(self) -> str:
 		try:
@ -133,7 +140,7 @@ class Resource(object):
 		if not decompress_ok:
 			data_repr = f"<decompression failed - compressed data: {data_repr}>"
 		
-		return f"{type(self).__module__}.{type(self).__qualname__}(type={self.type}, id={self.id}, name={self.name}, attributes={self.attributes}, data={data_repr})"
+		return f"<{type(self).__qualname__} type {self.type}, id {self.id}, name {self.name}, attributes {self.attributes}, data {data_repr}>"
 	
 	@property
 	def resource_type(self) -> bytes:
@ -145,6 +152,30 @@ class Resource(object):
 		warnings.warn(DeprecationWarning("The resource_id attribute has been deprecated and will be removed in a future version. Please use the id attribute instead."))
 		return self.id
 	
+	@property
+	def name(self) -> typing.Optional[bytes]:
+		try:
+			return self._name
+		except AttributeError:
+			if self.name_offset == 0xffff:
+				self._name = None
+			else:
+				self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + self.name_offset)
+				(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
+				self._name = self._resfile._read_exact(name_length)
+			
+			return self._name
+	
+	@property
+	def data_raw(self) -> bytes:
+		try:
+			return self._data_raw
+		except AttributeError:
+			self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset)
+			(data_raw_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
+			self._data_raw = self._resfile._read_exact(data_raw_length)
+			return self._data_raw
+	
 	@property
 	def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
 		"""The compressed resource header information, or None if this resource is not compressed.
@ -198,65 +229,54 @@ class Resource(object):
 		else:
 			return self.data_raw

+class _LazyResourceMap(typing.Mapping[int, Resource]):
+	"""Internal class: Read-only wrapper for a mapping of resource IDs to resource objects.
+	
+	This class behaves like a normal read-only mapping. The main difference to a plain dict (or similar mapping) is that this mapping has a specialized repr to avoid excessive output when working in the REPL.
+	"""
+	
+	type: bytes
+	_submap: typing.Mapping[int, Resource]
+	
+	def __init__(self, resource_type: bytes, submap: typing.Mapping[int, Resource]) -> None:
+		"""Create a new _LazyResourceMap that wraps the given mapping."""
+		
+		super().__init__()
+		
+		self.type = resource_type
+		self._submap = submap
+	
+	def __len__(self) -> int:
+		"""Get the number of resources with this type code."""
+		
+		return len(self._submap)
+	
+	def __iter__(self) -> typing.Iterator[int]:
+		"""Iterate over the IDs of all resources with this type code."""
+		
+		return iter(self._submap)
+	
+	def __contains__(self, key: object) -> bool:
+		"""Check if a resource with the given ID exists for this type code."""
+		
+		return key in self._submap
+	
+	def __getitem__(self, key: int) -> Resource:
+		"""Get a resource with the given ID for this type code."""
+		
+		return self._submap[key]
+	
+	def __repr__(self) -> str:
+		if len(self) == 1:
+			contents = f"one resource: {next(iter(self.values()))}"
+		else:
+			contents = f"{len(self)} resources with IDs {list(self)}"
+		
+		return f"<Resource map for type {self.type}, containing {contents}>"
+
 class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.ContextManager["ResourceFile"]):
 	"""A resource file reader operating on a byte stream."""
 	
-	# noinspection PyProtectedMember
-	class _LazyResourceMap(typing.Mapping[int, Resource]):
-		"""Internal class: Lazy mapping of resource IDs to resource objects, returned when subscripting a ResourceFile."""
-		
-		_resfile: "ResourceFile"
-		_restype: bytes
-		_submap: typing.Mapping[int, typing.Tuple[int, ResourceAttrs, int]]
-		
-		def __init__(self, resfile: "ResourceFile", restype: bytes) -> None:
-			"""Create a new _LazyResourceMap "containing" all resources in resfile that have the type code restype."""
-			
-			super().__init__()
-			
-			self._resfile = resfile
-			self._restype = restype
-			self._submap = self._resfile._references[self._restype]
-		
-		def __len__(self) -> int:
-			"""Get the number of resources with this type code."""
-			
-			return len(self._submap)
-		
-		def __iter__(self) -> typing.Iterator[int]:
-			"""Iterate over the IDs of all resources with this type code."""
-			
-			return iter(self._submap)
-		
-		def __contains__(self, key: object) -> bool:
-			"""Check if a resource with the given ID exists for this type code."""
-			
-			return key in self._submap
-		
-		def __getitem__(self, key: int) -> Resource:
-			"""Get a resource with the given ID for this type code."""
-			
-			name_offset, attributes, data_offset = self._submap[key]
-			
-			if name_offset == 0xffff:
-				name = None
-			else:
-				self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
-				(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
-				name = self._resfile._read_exact(name_length)
-			
-			self._resfile._stream.seek(self._resfile.data_offset + data_offset)
-			(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
-			data = self._resfile._read_exact(data_length)
-			
-			return Resource(self._restype, key, name, attributes, data)
-		
-		def __repr__(self) -> str:
-			if len(self) == 1:
-				return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing one resource: {next(iter(self.values()))}>"
-			else:
-				return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x} containing {len(self)} resources with IDs: {list(self)}>"
-	
 	_close_stream: bool
 	_stream: typing.BinaryIO
 	
@ -272,7 +292,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
 	file_attributes: ResourceFileAttrs
 	
 	_reference_counts: typing.MutableMapping[bytes, int]
-	_references: typing.MutableMapping[bytes, typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]]]
+	_references: typing.MutableMapping[bytes, typing.MutableMapping[int, Resource]]
 	
 	@classmethod
 	def open(cls, filename: typing.Union[str, os.PathLike], *, fork: str="auto", **kwargs: typing.Any) -> "ResourceFile":
@ -433,7 +453,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
 		self._references = collections.OrderedDict()
 		
 		for resource_type, count in self._reference_counts.items():
-			resmap: typing.MutableMapping[int, typing.Tuple[int, ResourceAttrs, int]] = collections.OrderedDict()
+			resmap: typing.MutableMapping[int, Resource] = collections.OrderedDict()
 			self._references[resource_type] = resmap
 			for _ in range(count):
 				(
@ -445,7 +465,7 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
 				attributes = attributes_and_data_offset >> 24
 				data_offset = attributes_and_data_offset & ((1 << 24) - 1)
 				
-				resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
+				resmap[resource_id] = Resource(self, resource_type, resource_id, name_offset, ResourceAttrs(attributes), data_offset)
 	
 	def close(self) -> None:
 		"""Close this ResourceFile.
@ -483,10 +503,10 @@ class ResourceFile(typing.Mapping[bytes, typing.Mapping[int, Resource]], typing.
 		
 		return key in self._references
 	
-	def __getitem__(self, key: bytes) -> "ResourceFile._LazyResourceMap":
+	def __getitem__(self, key: bytes) -> "_LazyResourceMap":
 		"""Get a lazy mapping of all resources with the given type in this ResourceFile."""
 		
-		return ResourceFile._LazyResourceMap(self, key)
+		return _LazyResourceMap(key, self._references[key])
 	
 	def __repr__(self) -> str:
 		return f"<{type(self).__module__}.{type(self).__qualname__} at {id(self):#x}, attributes {self.file_attributes}, containing {len(self)} resource types: {list(self)}>"
--- a/rsrcfork/compress/dcmp0.py
+++ b/rsrcfork/compress/dcmp0.py
@ -119,7 +119,7 @@ def decompress_stream_inner(header_info: common.CompressedHeaderInfo, stream: ty
 					print(f"\t-> segment number: {segment_number_int:#x}")
 				
 				# The tail part of all jump table entries (i. e. everything except for the address).
-				entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=True) + b"\xa9\xf0"
+				entry_tail = b"?<" + segment_number_int.to_bytes(2, "big", signed=False) + b"\xa9\xf0"
 				# The tail is output once *without* an address in front, i. e. the first entry's address must be generated manually by a previous code.
 				yield entry_tail
Author	SHA1	Message	Date
dgelessus	158ca4884b	Release version 1.7.0	2019-12-17 11:28:26 +01:00
dgelessus	8568f355c4	Remove incorrect outdated paragraph from list subcommand help	2019-12-10 16:15:18 +01:00
dgelessus	97d2dbe1b3	Change formatting of command help strings in source code The automatic textwrap.dedent makes it impossible to cleanly extract parts of the help strings into separate constants.	2019-12-10 15:58:20 +01:00
dgelessus	a4b6328782	Fix 'dcmp' (0) jump table decompression for large segment numbers	2019-12-04 23:36:57 +01:00
dgelessus	393160b5da	Add raw-decompress subcommand (#6 )	2019-12-04 23:36:56 +01:00
dgelessus	476eaecd17	Fix typo in the help text for rsrcfork read	2019-12-04 21:16:29 +01:00
dgelessus	546edbc31a	Update and improve resource and resource map reprs	2019-12-04 02:01:40 +01:00
dgelessus	cf6ce3c2a6	Move _LazyResourceMap out of ResourceFile	2019-12-04 02:01:40 +01:00
dgelessus	af2ac70676	Simplify ResourceFile._references and ._LazyResourceMap The _references map now stores Resource objects directly, instead of constructing them only when they are looked up. Resource objects are now lazy themselves, so the previous lazy resource creation mechanism is redundant. _LazyResourceMap is now a simple read-only wrapper around an existing map. The custom class is now only used to provide a specialized repr.	2019-12-04 02:01:40 +01:00
dgelessus	5af455992b	Refactor resource reading internals The reading of resource name and data is now performed in the Resource class (lazily, when the respective attributes are accessed) instead of in ResourceFile._LazyResourceMap.	2019-12-04 02:01:40 +01:00
dgelessus	2193c81518	Bump version to 1.6.1.dev	2019-12-04 01:45:15 +01:00