Reimplement Resource.data_raw using a custom stream type (SubStream)

This way all reads performed on a resource data stream are forwarded
to the underlying resource file stream, with the read offsets and
lengths adjusted appropriately.
This commit is contained in:
dgelessus 2020-07-23 02:42:32 +02:00
parent 2907d9f9e8
commit 126795239c
3 changed files with 76 additions and 12 deletions

View File

@ -119,13 +119,6 @@ Version 1.8.1 (next version)
* Added ``open`` and ``open_raw`` methods to ``Resource`` objects, * Added ``open`` and ``open_raw`` methods to ``Resource`` objects,
for stream-based access to resource data. for stream-based access to resource data.
* These methods are currently implemented using simple ``io.BytesIO`` wrappers around the resource data,
so there is currently no performance difference between ``open``/``open_raw`` and ``data``/``data_raw``.
In the future,
the stream-based API implementations will be optimized
to allow efficient access to parts of the resource data
without having to read the entire data in advance.
Version 1.8.0 Version 1.8.0
^^^^^^^^^^^^^ ^^^^^^^^^^^^^

View File

@ -19,6 +19,72 @@ def read_exact(stream: typing.BinaryIO, byte_count: int) -> bytes:
return data return data
class SubStream(io.BufferedIOBase, typing.BinaryIO):
"""A read-only stream that provides a view over a range of data from another stream."""
_outer_stream: typing.BinaryIO
_start_offset: int
_length: int
_seek_position: int
def __init__(self, stream: typing.BinaryIO, start_offset: int, length: int) -> None:
"""Create a new stream that exposes the specified range of data from ``stream``.
:param stream: The underlying binary stream from which to read the data.
The stream must be readable and seekable and contain at least ``start_offset + length`` bytes of data.
:param start_offset: The absolute offset in the parent stream at which the data to expose starts.
This offset will correspond to offset 0 in the new :class:`SubStream`.
:param length: The length of the data to expose.
This is the highest valid offset in the new :class:`SubStream`.
"""
super().__init__()
self._outer_stream = stream
self._start_offset = start_offset
self._length = length
self._seek_position = 0
outer_stream_length = self._outer_stream.seek(0, io.SEEK_END)
if self._start_offset + self._length > outer_stream_length:
raise ValueError(f"start_offset ({self._start_offset}) or length ({self._length}) too high: outer stream must be at least {self._start_offset + self._length} bytes long, but is only {outer_stream_length} bytes")
def seekable(self) -> bool:
return True
def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
if whence == io.SEEK_SET:
if offset < 0:
raise ValueError(f"Negative seek offset not allowed with SEEK_SET: {offset}")
self._seek_position = offset
elif whence == io.SEEK_CUR:
self._seek_position += offset
elif whence == io.SEEK_END:
self._seek_position = self._length - offset
else:
raise ValueError(f"Invalid whence value: {whence}")
self._seek_position = max(0, min(self._length, self._seek_position))
return self._seek_position
def tell(self) -> int:
return self._seek_position
def readable(self) -> bool:
return True
def read(self, size: typing.Optional[int] = -1) -> bytes:
if size is None or size < 0 or size > self._length - self._seek_position:
size = self._length - self._seek_position
self._outer_stream.seek(self._start_offset + self._seek_position)
res = self._outer_stream.read(size)
self._seek_position += len(res)
return res
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
class PeekableIO(typing.Protocol): class PeekableIO(typing.Protocol):
"""Minimal protocol for binary IO streams that support the peek method. """Minimal protocol for binary IO streams that support the peek method.

View File

@ -109,6 +109,7 @@ class Resource(object):
_name: typing.Optional[bytes] _name: typing.Optional[bytes]
attributes: ResourceAttrs attributes: ResourceAttrs
data_raw_offset: int data_raw_offset: int
_length_raw: int
_data_raw: bytes _data_raw: bytes
_compressed_info: compress.common.CompressedHeaderInfo _compressed_info: compress.common.CompressedHeaderInfo
_data_decompressed: bytes _data_decompressed: bytes
@ -178,9 +179,8 @@ class Resource(object):
try: try:
return self._data_raw return self._data_raw
except AttributeError: except AttributeError:
self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset) with self.open_raw() as f:
(data_raw_length,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER) self._data_raw = f.read()
self._data_raw = self._resfile._read_exact(data_raw_length)
return self._data_raw return self._data_raw
def open_raw(self) -> typing.BinaryIO: def open_raw(self) -> typing.BinaryIO:
@ -196,7 +196,7 @@ class Resource(object):
because the stream API does not require the entire resource data to be read in advance. because the stream API does not require the entire resource data to be read in advance.
""" """
return io.BytesIO(self.data_raw) return _io_utils.SubStream(self._resfile._stream, self._resfile.data_offset + self.data_raw_offset + STRUCT_RESOURCE_DATA_HEADER.size, self.length_raw)
@property @property
def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]: def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
@ -222,7 +222,12 @@ class Resource(object):
Accessing this attribute may be faster than computing len(self.data_raw) manually. Accessing this attribute may be faster than computing len(self.data_raw) manually.
""" """
return len(self.data_raw) try:
return self._length_raw
except AttributeError:
self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset)
(self._length_raw,) = self._resfile._stream_unpack(STRUCT_RESOURCE_DATA_HEADER)
return self._length_raw
@property @property
def length(self) -> int: def length(self) -> int: