mirror of
https://github.com/dgelessus/python-rsrcfork.git
synced 2025-01-01 07:29:40 +00:00
Rework how non-seekable streams are handled by ResourceFile
The broken non-seeking read implementation of ResourceFile is removed, and non-seekable streams are now handled by reading the entire stream data first and wrapping it in a BytesIO to make it seekable. The manual selection of seeking/non-seeking reading has been removed as well, since it is no longer needed and was already nearly useless.
This commit is contained in:
parent
7253c53d67
commit
5ede8a351a
@ -140,6 +140,9 @@ Version 1.2.0 (next version)
|
||||
* Currently, only the three standard System 7.0 compression formats (``'dcmp'`` IDs 0, 1, 2) are supported. Attempting to access a resource compressed in an unsupported format results in a ``DecompressError``.
|
||||
* To access the raw resource data as stored in the file, without automatic decompression, use the ``res.data_raw`` attribute (for the Python API), or the ``--no-decompress`` option (for the command-line interface). This can be used to read the resource data in its compressed form, even if the compression format is not supported.
|
||||
|
||||
* Fixed reading from non-seekable streams not working for some resource files.
|
||||
* Removed the ``allow_seek`` parameter of ``ResourceFork.__init__`` and the ``--read-mode`` command line option. They are no longer necessary, and were already practically useless before due to non-seekable stream reading being broken.
|
||||
|
||||
Version 1.1.3.post1
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -190,7 +190,6 @@ def main():
|
||||
ap.add_argument("--format", choices=["dump", "hex", "raw", "derez"], default="dump", help="How to output the resources - human-readable info with hex dump (dump), data only as hex (hex), data only as raw bytes (raw), or like DeRez with no resource definitions (derez)")
|
||||
ap.add_argument("--header-system", action="store_true", help="Output system-reserved header data and nothing else")
|
||||
ap.add_argument("--header-application", action="store_true", help="Output application-specific header data and nothing else")
|
||||
ap.add_argument("--read-mode", choices=["auto", "stream", "seek"], default="auto", help="Whether to read the data sequentially (stream) or on-demand (seek), or auto to use seeking when possible (default: %(default)s)")
|
||||
|
||||
ap.add_argument("file", help="The file to read, or - for stdin")
|
||||
ap.add_argument("filter", nargs="*", help="One or more filters to select which resources to display, or omit to show an overview of all resources")
|
||||
@ -198,16 +197,15 @@ def main():
|
||||
ns = ap.parse_args()
|
||||
|
||||
ns.fork = {"auto": None, "data": False, "rsrc": True}[ns.fork]
|
||||
ns.read_mode = {"auto": None, "stream": False, "seek": True}[ns.read_mode]
|
||||
|
||||
if ns.file == "-":
|
||||
if ns.fork is not None:
|
||||
print("Cannot specify an explicit fork when reading from stdin", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
rf = api.ResourceFile(sys.stdin.buffer, allow_seek=ns.read_mode)
|
||||
rf = api.ResourceFile(sys.stdin.buffer)
|
||||
else:
|
||||
rf = api.ResourceFile.open(ns.file, rsrcfork=ns.fork, allow_seek=ns.read_mode)
|
||||
rf = api.ResourceFile.open(ns.file, rsrcfork=ns.fork)
|
||||
|
||||
with rf:
|
||||
if ns.header_system or ns.header_application:
|
||||
|
124
rsrcfork/api.py
124
rsrcfork/api.py
@ -1,6 +1,7 @@
|
||||
import collections
|
||||
import collections.abc
|
||||
import enum
|
||||
import io
|
||||
import os
|
||||
import struct
|
||||
import typing
|
||||
@ -177,19 +178,14 @@ class ResourceFile(collections.abc.Mapping):
|
||||
|
||||
if name_offset == 0xffff:
|
||||
name = None
|
||||
elif self._resfile._allow_seek:
|
||||
else:
|
||||
self._resfile._stream.seek(self._resfile.map_offset + self._resfile.map_name_list_offset + name_offset)
|
||||
(name_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||
name = self._resfile._read(name_length)
|
||||
else:
|
||||
name = self._resfile._resource_names[name_offset]
|
||||
name = self._resfile._stream.read(name_length)
|
||||
|
||||
if self._resfile._allow_seek:
|
||||
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
|
||||
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
data = self._resfile._read(data_length)
|
||||
else:
|
||||
data = self._resfile._resource_data[data_offset]
|
||||
self._resfile._stream.seek(self._resfile.data_offset + data_offset)
|
||||
(data_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
data = self._resfile._stream.read(data_length)
|
||||
|
||||
return Resource(self._restype, key, name, attributes, data)
|
||||
|
||||
@ -239,14 +235,14 @@ class ResourceFile(collections.abc.Mapping):
|
||||
# Use the selected fork to build a ResourceFile.
|
||||
return cls(f, close=True, **kwargs)
|
||||
|
||||
def __init__(self, stream: typing.io.BinaryIO, *, allow_seek: typing.Optional[bool]=None, close: bool=False):
|
||||
def __init__(self, stream: typing.io.BinaryIO, *, close: bool=False):
|
||||
"""Create a ResourceFile wrapping the given byte stream.
|
||||
|
||||
To read resource file data from a bytes object, wrap it in an io.BytesIO.
|
||||
|
||||
allow_seek controls whether seeking should be used when reading the file. If allow_seek is None, stream.seekable() is called to determine whether seeking should be used.
|
||||
If seeking is used, only the file header, map header, resource types, and resource references are read into memory. Resource data and names are loaded on-demand when the respective resource is accessed.
|
||||
If seeking is not used, the entire stream is processed sequentially and read into memory, including all resource data and names. This may be necessary when the stream does not support seeking at all. Memory is usually not a concern, most resource files are not even a megabyte in size.
|
||||
If the stream is seekable, only the file header and resource map are read initially. Resource data and names are loaded on-demand when the respective resource is accessed. If the stream is not seekable, the entire stream data is read into memory (this is necessary because the resource map is stored at the end of the resource file).
|
||||
|
||||
In practice, memory usage is usually not a concern when reading resource files. Even large resource files are only a few megabytes in size, and due to limitations in the format, resource files cannot be much larger than 16 MiB (except for special cases that are unlikely to occur in practice).
|
||||
|
||||
close controls whether the stream should be closed when the ResourceFile's close method is called. By default this is False.
|
||||
"""
|
||||
@ -254,50 +250,31 @@ class ResourceFile(collections.abc.Mapping):
|
||||
super().__init__()
|
||||
|
||||
self._close_stream: bool = close
|
||||
self._stream: typing.io.BinaryIO = stream
|
||||
self._stream: typing.io.BinaryIO
|
||||
if stream.seekable():
|
||||
self._stream = stream
|
||||
else:
|
||||
self._stream = io.BytesIO(stream.read())
|
||||
|
||||
try:
|
||||
self._allow_seek: bool
|
||||
if allow_seek is None:
|
||||
self._allow_seek = self._stream.seekable()
|
||||
else:
|
||||
self._allow_seek = allow_seek
|
||||
|
||||
if self._allow_seek:
|
||||
self._pos = None
|
||||
self._init_seeking()
|
||||
else:
|
||||
self._pos: int = 0
|
||||
self._init_streaming()
|
||||
self._read_header()
|
||||
self._stream.seek(self.map_offset)
|
||||
self._read_map_header()
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
except BaseException:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def _tell(self) -> int:
|
||||
"""Get the current position in the stream. This uses the stream's tell method if seeking is enabled, and an internal counter otherwise."""
|
||||
|
||||
if self._allow_seek:
|
||||
return self._stream.tell()
|
||||
else:
|
||||
return self._pos
|
||||
|
||||
def _read(self, count: int) -> bytes:
|
||||
"""Read count bytes from the stream. If seeking is disabled, this also increments the internal seek counter accordingly."""
|
||||
|
||||
ret = self._stream.read(count)
|
||||
if not self._allow_seek:
|
||||
self._pos += len(ret)
|
||||
return ret
|
||||
|
||||
def _stream_unpack(self, st: struct.Struct) -> typing.Tuple:
|
||||
"""Unpack data from the stream according to the struct st. The number of bytes to read is determined using st.size, so variable-sized structs cannot be used with this method."""
|
||||
|
||||
return st.unpack(self._read(st.size))
|
||||
return st.unpack(self._stream.read(st.size))
|
||||
|
||||
def _read_header(self):
|
||||
"""Read the resource file header, starting at the current stream position."""
|
||||
|
||||
assert self._tell() == 0
|
||||
assert self._stream.tell() == 0
|
||||
|
||||
self.data_offset: int
|
||||
self.map_offset: int
|
||||
@ -314,27 +291,12 @@ class ResourceFile(collections.abc.Mapping):
|
||||
self.header_application_data,
|
||||
) = self._stream_unpack(STRUCT_RESOURCE_HEADER)
|
||||
|
||||
assert self._tell() == self.data_offset
|
||||
|
||||
def _read_all_resource_data(self):
|
||||
"""Read all resource data blocks, starting at the current stream position, until self.map_offset is reached."""
|
||||
|
||||
assert self._tell() == self.data_offset
|
||||
|
||||
self._resource_data: typing.MutableMapping[int, bytes] = collections.OrderedDict()
|
||||
|
||||
while self._tell() < self.map_offset:
|
||||
initial_pos = self._tell()
|
||||
(length,) = self._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
|
||||
assert self._tell() + length <= self.map_offset
|
||||
self._resource_data[initial_pos] = self._read(length)
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
assert self._stream.tell() == self.data_offset
|
||||
|
||||
def _read_map_header(self):
|
||||
"""Read the map header, starting at the current stream position."""
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
assert self._stream.tell() == self.map_offset
|
||||
|
||||
self.map_type_list_offset: int
|
||||
self.map_name_list_offset: int
|
||||
@ -383,44 +345,6 @@ class ResourceFile(collections.abc.Mapping):
|
||||
|
||||
resmap[resource_id] = (name_offset, ResourceAttrs(attributes), data_offset)
|
||||
|
||||
def _read_all_resource_names(self):
|
||||
"""Read all resource names, starting at the current stream position, until the end of the map is reached."""
|
||||
|
||||
self._resource_names: typing.MutableMapping[int, bytes] = collections.OrderedDict()
|
||||
|
||||
while self._tell() < self.map_offset + self.map_length:
|
||||
initial_pos = self._tell()
|
||||
(length,) = self._stream_unpack(STRUCT_RESOURCE_NAME_HEADER)
|
||||
self._resource_names[initial_pos] = self._read(length)
|
||||
|
||||
def _init_seeking(self):
|
||||
"""Initialize self with seeking enabled, by reading the header, map header, resource types, and references."""
|
||||
|
||||
self._read_header()
|
||||
self._stream.seek(self.map_offset)
|
||||
self._read_map_header()
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
|
||||
def _init_streaming(self):
|
||||
"""Initialize self with seeking disabled, by reading the entire file sequentially."""
|
||||
|
||||
self._read_header()
|
||||
self._read_all_resource_data()
|
||||
|
||||
assert self._tell() == self.map_offset
|
||||
|
||||
self._read_map_header()
|
||||
|
||||
assert self._tell() == self.map_offset + self.map_type_list_offset
|
||||
|
||||
self._read_all_resource_types()
|
||||
self._read_all_references()
|
||||
|
||||
assert self._tell() == self.map_offset + self.map_name_list_offset
|
||||
|
||||
self._read_all_resource_names()
|
||||
|
||||
def close(self):
|
||||
"""Close this ResourceFile.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user