diff --git a/README.rst b/README.rst index 7c0741b..9d88b00 100644 --- a/README.rst +++ b/README.rst @@ -140,6 +140,9 @@ Version 1.2.0 (next version) * Currently, only the three standard System 7.0 compression formats (``'dcmp'`` IDs 0, 1, 2) are supported. Attempting to access a resource compressed in an unsupported format results in a ``DecompressError``. * To access the raw resource data as stored in the file, without automatic decompression, use the ``res.data_raw`` attribute (for the Python API), or the ``--no-decompress`` option (for the command-line interface). This can be used to read the resource data in its compressed form, even if the compression format is not supported. +* Fixed reading from non-seekable streams not working for some resource files. +* Removed the ``allow_seek`` parameter of ``ResourceFork.__init__`` and the ``--read-mode`` command line option. They are no longer necessary, and were already practically useless before due to non-seekable stream reading being broken. + Version 1.1.3.post1 ^^^^^^^^^^^^^^^^^^^ diff --git a/rsrcfork/__main__.py b/rsrcfork/__main__.py index b383a1d..773bc65 100644 --- a/rsrcfork/__main__.py +++ b/rsrcfork/__main__.py @@ -190,7 +190,6 @@ def main(): ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)") ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else") ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else") - ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)") ap.add_argument("file", help="The file to read, or - for stdin") ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources") @@ -198,16 +197,15 @@ def main(): ns = ap.parse_args() ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork] - ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode] if ns.file == "-": if ns.fork is not None: print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr) sys.exit(1) - rf = api.ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode) + rf = api.ResourceFile(sys.stdin.buffer) else: - rf = api.ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode) + rf = api.ResourceFile.open(ns.file, rsrcfork=ns.fork) with rf: if ns.header_system or ns.header_application: diff --git a/rsrcfork/api.py b/rsrcfork/api.py index ac2dd16..d3aebf6 100644 --- a/rsrcfork/api.py +++ b/rsrcfork/api.py @@ -1,6 +1,7 @@ import collections import collections.abc import enum +import io import os import struct import typing @@ -177,19 +178,14 @@ class ResourceFile(collections.abc.Mapping): if name_offset == 0xffff: name = None - elif self._resfile._allow_seek: + else: self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset) (name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER) - name = self._resfile._read(name_length) - else: - name = self._resfile._resource_names[name_offset] + name = self._resfile._stream.read(name_length) - if self._resfile._allow_seek: - self._resfile._stream.seek(self._resfile.data_offset + data_offset) - (data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER) - data = self._resfile._read(data_length) - else: - data = self._resfile._resource_data[data_offset] + self._resfile._stream.seek(self._resfile.data_offset + data_offset) + (data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER) + data = self._resfile._stream.read(data_length) return Resource(self._restype, key, name, attributes, data) @@ -239,14 +235,14 @@ class ResourceFile(collections.abc.Mapping): # Use the selected fork to build a ResourceFile. return cls(f, close=True, **kwargs) - def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=False): + def __init__(self, stream: typing.io.BinaryIO, *, close: bool=False): """Create a ResourceFile wrapping the given byte stream. To read resource file data from a bytes object, wrap it in an io.BytesIO. - allow_seek controls whether seeking should be used when reading the file. If allow_seek is None, stream.seekable() is called to determine whether seeking should be used. - If seeking is used, only the file header, map header, resource types, and resource references are read into memory. Resource data and names are loaded on-demand when the respective resource is accessed. - If seeking is not used, the entire stream is processed sequentially and read into memory, including all resource data and names. This may be necessary when the stream does not support seeking at all. Memory is usually not a concern, most resource files are not even a megabyte in size. + If the stream is seekable, only the file header and resource map are read initially. Resource data and names are loaded on-demand when the respective resource is accessed. If the stream is not seekable, the entire stream data is read into memory (this is necessary because the resource map is stored at the end of the resource file). + + In practice, memory usage is usually not a concern when reading resource files. Even large resource files are only a few megabytes in size, and due to limitations in the format, resource files cannot be much larger than 16 MiB (except for special cases that are unlikely to occur in practice). close controls whether the stream should be closed when the ResourceFile's close method is called. By default this is False. """ @@ -254,50 +250,31 @@ class ResourceFile(collections.abc.Mapping): super().__init__() self._close_stream: bool = close - self._stream: typing.io.BinaryIO = stream + self._stream: typing.io.BinaryIO + if stream.seekable(): + self._stream = stream + else: + self._stream = io.BytesIO(stream.read()) try: - self._allow_seek: bool - if allow_seek is None: - self._allow_seek = self._stream.seekable() - else: - self._allow_seek = allow_seek - - if self._allow_seek: - self._pos = None - self._init_seeking() - else: - self._pos: int = 0 - self._init_streaming() + self._read_header() + self._stream.seek(self.map_offset) + self._read_map_header() + self._read_all_resource_types() + self._read_all_references() except BaseException: self.close() raise - def _tell(self) -> int: - """Get the current position in the stream. This uses the stream's tell method if seeking is enabled, and an internal counter otherwise.""" - - if self._allow_seek: - return self._stream.tell() - else: - return self._pos - - def _read(self, count: int) -> bytes: - """Read count bytes from the stream. If seeking is disabled, this also increments the internal seek counter accordingly.""" - - ret = self._stream.read(count) - if not self._allow_seek: - self._pos += len(ret) - return ret - def _stream_unpack(self, st: struct.Struct) -> typing.Tuple: """Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method.""" - return st.unpack(self._read(st.size)) + return st.unpack(self._stream.read(st.size)) def _read_header(self): """Read the resource file header, starting at the current stream position.""" - assert self._tell() == 0 + assert self._stream.tell() == 0 self.data_offset: int self.map_offset: int @@ -314,27 +291,12 @@ class ResourceFile(collections.abc.Mapping): self.header_application_data, ) = self._stream_unpack(STRUCT_RESOURCE_HEADER) - assert self._tell() == self.data_offset - - def _read_all_resource_data(self): - """Read all resource data blocks, starting at the current stream position, until self.map_offset is reached.""" - - assert self._tell() == self.data_offset - - self._resource_data: typing.MutableMapping[int, bytes] = collections.OrderedDict() - - while self._tell() < self.map_offset: - initial_pos = self._tell() - (length,) = self._stream_unpack(STRUCT_RESOURCE_DATA_HEADER) - assert self._tell() + length <= self.map_offset - self._resource_data[initial_pos] = self._read(length) - - assert self._tell() == self.map_offset + assert self._stream.tell() == self.data_offset def _read_map_header(self): """Read the map header, starting at the current stream position.""" - assert self._tell() == self.map_offset + assert self._stream.tell() == self.map_offset self.map_type_list_offset: int self.map_name_list_offset: int @@ -383,44 +345,6 @@ class ResourceFile(collections.abc.Mapping): resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset) - def _read_all_resource_names(self): - """Read all resource names, starting at the current stream position, until the end of the map is reached.""" - - self._resource_names: typing.MutableMapping[int, bytes] = collections.OrderedDict() - - while self._tell() < self.map_offset + self.map_length: - initial_pos = self._tell() - (length,) = self._stream_unpack(STRUCT_RESOURCE_NAME_HEADER) - self._resource_names[initial_pos] = self._read(length) - - def _init_seeking(self): - """Initialize self with seeking enabled, by reading the header, map header, resource types, and references.""" - - self._read_header() - self._stream.seek(self.map_offset) - self._read_map_header() - self._read_all_resource_types() - self._read_all_references() - - def _init_streaming(self): - """Initialize self with seeking disabled, by reading the entire file sequentially.""" - - self._read_header() - self._read_all_resource_data() - - assert self._tell() == self.map_offset - - self._read_map_header() - - assert self._tell() == self.map_offset + self.map_type_list_offset - - self._read_all_resource_types() - self._read_all_references() - - assert self._tell() == self.map_offset + self.map_name_list_offset - - self._read_all_resource_names() - def close(self): """Close this ResourceFile.