Remove custom stream types and read all resource data upfront again

The custom stream types were almost always slower than just reading the entire data into memory, and there's no reason not to do that - resources are small enough that memory usage and disk IO speed aren't a concern (at least not for any machine that's modern enough to run Python 3...). Perhaps the only performance advantage was when reading a small amount of data from the start of a compressed resource. In that case the custom stream could incrementally decompress only the part of the data that's actually needed, which was a bit faster than decompressing the entire resource and then throwing away most of the data. But this situation is rare enough that it's not worth handling in the rsrcfork library. If this is a real performance issue for someone, they can manually call the incremental decompression functions from rsrcfork.compress where needed.
2020-11-01 19:28:25 +01:00 · 2020-11-01 19:28:25 +01:00 · 5c3bc5d7e5
parent d74dbc41ba
commit 5c3bc5d7e5
3 changed files with 7 additions and 170 deletions
--- a/rsrcfork/_io_utils.py
+++ b/rsrcfork/_io_utils.py
@ -19,80 +19,6 @@ def read_exact(stream: typing.BinaryIO, byte_count: int) -> bytes:
 	return data


-class _SubStream(io.RawIOBase):
-	"""A read-only stream that provides a view over a range of data from another stream."""
-	
-	_outer_stream: typing.BinaryIO
-	_start_offset: int
-	_length: int
-	_seek_position: int
-	
-	def __init__(self, stream: typing.BinaryIO, start_offset: int, length: int) -> None:
-		super().__init__()
-		
-		self._outer_stream = stream
-		self._start_offset = start_offset
-		self._length = length
-		self._seek_position = 0
-		
-		outer_stream_length = self._outer_stream.seek(0, io.SEEK_END)
-		if self._start_offset + self._length > outer_stream_length:
-			raise ValueError(f"start_offset ({self._start_offset}) or length ({self._length}) too high: outer stream must be at least {self._start_offset + self._length} bytes long, but is only {outer_stream_length} bytes")
-	
-	def seekable(self) -> bool:
-		return True
-	
-	def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
-		if whence == io.SEEK_SET:
-			if offset < 0:
-				raise ValueError(f"Negative seek offset not allowed with SEEK_SET: {offset}")
-			
-			self._seek_position = offset
-		elif whence == io.SEEK_CUR:
-			self._seek_position += offset
-		elif whence == io.SEEK_END:
-			self._seek_position = self._length - offset
-		else:
-			raise ValueError(f"Invalid whence value: {whence}")
-		
-		self._seek_position = max(0, min(self._length, self._seek_position))
-		
-		return self._seek_position
-	
-	def tell(self) -> int:
-		return self._seek_position
-	
-	def readable(self) -> bool:
-		return True
-	
-	def readall(self) -> bytes:
-		self._outer_stream.seek(self._start_offset + self._seek_position)
-		return self._outer_stream.read(self._length - self._seek_position)
-	
-	def readinto(self, buffer: bytearray) -> typing.Optional[int]:
-		size = min(len(buffer), self._length - self._seek_position)
-		self._outer_stream.seek(self._start_offset + self._seek_position)
-		data = self._outer_stream.read(size)
-		self._seek_position += len(data)
-		buffer[:len(data)] = data
-		return len(data)
-
-
-def make_substream(stream: typing.BinaryIO, start_offset: int, length: int) -> typing.BinaryIO:
-	"""Create a read-only stream that exposes the specified range of data from ``stream``.
-	
-	:param stream: The underlying binary stream from which to read the data.
-		The stream must be readable and seekable and contain at least ``start_offset + length`` bytes of data.
-	:param start_offset: The absolute offset in the parent stream at which the data to expose starts.
-		This offset will correspond to offset 0 in the returned stream.
-	:param length: The length of the data to expose.
-		This is the highest valid offset in the returned stream.
-	"""
-	
-	# For some reason, mypy thinks that io.BufferedReader is not a typing.BinaryIO.
-	return typing.cast(typing.BinaryIO, io.BufferedReader(_SubStream(stream, start_offset, length)))
-
-
 if typing.TYPE_CHECKING:
 	class PeekableIO(typing.Protocol):
 		"""Minimal protocol for binary IO streams that support the peek method.
--- a/rsrcfork/api.py
+++ b/rsrcfork/api.py
@ -179,8 +179,8 @@ class Resource(object):
 		try:
 			return self._data_raw
 		except AttributeError:
-			with self.open_raw() as f:
-				self._data_raw = f.read()
+			self._resfile._stream.seek(self._resfile.data_offset + self.data_raw_offset + STRUCT_RESOURCE_DATA_HEADER.size)
+			self._data_raw = _io_utils.read_exact(self._resfile._stream, self.length_raw)
 			return self._data_raw
 	
 	def open_raw(self) -> typing.BinaryIO:
@ -196,7 +196,7 @@ class Resource(object):
 		because the stream API does not require the entire resource data to be read in advance.
 		"""
 		
-		return _io_utils.make_substream(self._resfile._stream, self._resfile.data_offset + self.data_raw_offset + STRUCT_RESOURCE_DATA_HEADER.size, self.length_raw)
+		return io.BytesIO(self.data_raw)
 	
 	@property
 	def compressed_info(self) -> typing.Optional[compress.common.CompressedHeaderInfo]:
@ -252,8 +252,9 @@ class Resource(object):
 			try:
 				return self._data_decompressed
 			except AttributeError:
-				with self.open() as f:
-					self._data_decompressed = f.read()
+				with self.open_raw() as compressed_f:
+					compressed_f.seek(self.compressed_info.header_length)
+					self._data_decompressed = b"".join(compress.decompress_stream_parsed(self.compressed_info, compressed_f))
 				return self._data_decompressed
 		else:
 			return self.data_raw
@ -271,12 +272,7 @@ class Resource(object):
 		because the stream API does not require the entire resource data to be read (and possibly decompressed) in advance.
 		"""
 		
-		if self.compressed_info is None:
-			return self.open_raw()
-		else:
-			f = self.open_raw()
-			f.seek(self.compressed_info.header_length)
-			return compress.DecompressingStream(f, self.compressed_info, close_stream=True)
+		return io.BytesIO(self.data)


 class _LazyResourceMap(typing.Mapping[int, Resource]):
--- a/rsrcfork/compress/init.py
+++ b/rsrcfork/compress/init.py
@ -66,88 +66,3 @@ def decompress(data: bytes, *, debug: bool = False) -> bytes:
 	"""Decompress the given compressed resource data."""
 	
 	return b"".join(decompress_stream(io.BytesIO(data), debug=debug))
-
-
-class DecompressingStream(io.BufferedIOBase, typing.BinaryIO):
-	_compressed_stream: typing.BinaryIO
-	_close_stream: bool
-	_header_info: CompressedHeaderInfo
-	_decompress_iter: typing.Iterator[bytes]
-	_decompressed_stream: typing.BinaryIO
-	_seek_position: int
-	
-	def __init__(self, compressed_stream: typing.BinaryIO, header_info: typing.Optional[CompressedHeaderInfo], *, close_stream: bool = False) -> None:
-		super().__init__()
-		
-		self._compressed_stream = compressed_stream
-		self._close_stream = close_stream
-		
-		if header_info is not None:
-			self._header_info = header_info
-		else:
-			self._header_info = CompressedHeaderInfo.parse_stream(self._compressed_stream)
-		
-		self._decompress_iter = decompress_stream_parsed(self._header_info, self._compressed_stream)
-		self._decompressed_stream = io.BytesIO()
-		self._seek_position = 0
-	
-	# This override does nothing,
-	# but is needed to make mypy happy,
-	# otherwise it complains (apparently incorrectly) about the __enter__ definitions from IOBase and BinaryIO being incompatible with each other.
-	def __enter__(self: "DecompressingStream") -> "DecompressingStream":
-		return super().__enter__()
-	
-	def close(self) -> None:
-		super().close()
-		if self._close_stream:
-			self._compressed_stream.close()
-		del self._decompress_iter
-		self._decompressed_stream.close()
-	
-	def seekable(self) -> bool:
-		return True
-	
-	def tell(self) -> int:
-		return self._seek_position
-	
-	def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
-		if whence == io.SEEK_SET:
-			if offset < 0:
-				raise ValueError(f"Negative seek offset not allowed with SEEK_SET: {offset}")
-			
-			self._seek_position = offset
-		elif whence == io.SEEK_CUR:
-			self._seek_position += offset
-		elif whence == io.SEEK_END:
-			self._seek_position = self._header_info.decompressed_length - offset
-		else:
-			raise ValueError(f"Invalid whence value: {whence}")
-		
-		self._seek_position = max(0, min(self._header_info.decompressed_length, self._seek_position))
-		
-		return self._seek_position
-	
-	def readable(self) -> bool:
-		return True
-	
-	def read(self, size: typing.Optional[int] = -1) -> bytes:
-		if size is None:
-			size = -1
-		
-		self._decompressed_stream.seek(0, io.SEEK_END)
-		
-		if size < 0:
-			for chunk in self._decompress_iter:
-				self._decompressed_stream.write(chunk)
-		else:
-			if self._decompressed_stream.tell() - self._seek_position < size:
-				for chunk in self._decompress_iter:
-					self._decompressed_stream.write(chunk)
-					
-					if self._decompressed_stream.tell() - self._seek_position >= size:
-						break
-		
-		self._decompressed_stream.seek(self._seek_position)
-		ret = self._decompressed_stream.read(size)
-		self._seek_position += len(ret)
-		return ret