2019-10-02 14:28:40 +00:00
import io
import typing
2019-08-22 19:19:10 +00:00
from . import dcmp0
from . import dcmp1
from . import dcmp2
2019-12-25 23:34:27 +00:00
from . common import DecompressError , CompressedHeaderInfo , CompressedType8HeaderInfo , CompressedType9HeaderInfo
2019-08-22 19:19:10 +00:00
__all__ = [
2019-10-02 14:29:32 +00:00
" CompressedHeaderInfo " ,
2019-12-25 23:34:27 +00:00
" CompressedType8HeaderInfo " ,
" CompressedType9HeaderInfo " ,
2019-08-22 19:19:10 +00:00
" DecompressError " ,
" decompress " ,
2019-10-02 14:29:32 +00:00
" decompress_parsed " ,
" decompress_stream " ,
" decompress_stream_parsed " ,
2019-08-22 19:19:10 +00:00
]
2019-09-23 21:10:55 +00:00
2019-09-23 21:32:38 +00:00
# Maps 'dcmp' IDs to their corresponding Python implementations.
2019-10-02 14:28:40 +00:00
# Each decompressor has the signature (header_info: CompressedHeaderInfo, stream: typing.BinaryIO, *, debug: bool=False) -> typing.Iterator[bytes].
2019-09-23 21:32:38 +00:00
DECOMPRESSORS = {
2019-10-02 14:28:40 +00:00
0 : dcmp0 . decompress_stream ,
1 : dcmp1 . decompress_stream ,
2 : dcmp2 . decompress_stream ,
2019-09-23 21:32:38 +00:00
}
2019-08-22 19:19:10 +00:00
2019-12-30 02:00:12 +00:00
def decompress_stream_parsed ( header_info : CompressedHeaderInfo , stream : typing . BinaryIO , * , debug : bool = False ) - > typing . Iterator [ bytes ] :
2019-10-02 14:28:40 +00:00
""" Decompress compressed resource data from a stream, whose header has already been read and parsed into a CompressedHeaderInfo object. """
2019-09-23 21:10:55 +00:00
2019-09-23 21:32:38 +00:00
try :
decompress_func = DECOMPRESSORS [ header_info . dcmp_id ]
except KeyError :
raise DecompressError ( f " Unsupported ' dcmp ' ID: { header_info . dcmp_id } " )
2019-08-22 19:19:10 +00:00
2019-10-02 14:28:40 +00:00
decompressed_length = 0
for chunk in decompress_func ( header_info , stream , debug = debug ) :
decompressed_length + = len ( chunk )
yield chunk
if decompressed_length != header_info . decompressed_length :
raise DecompressError ( f " Actual length of decompressed data ( { decompressed_length } ) does not match length stored in resource ( { header_info . decompressed_length } ) " )
2019-09-23 21:50:29 +00:00
2019-12-30 02:00:12 +00:00
def decompress_parsed ( header_info : CompressedHeaderInfo , data : bytes , * , debug : bool = False ) - > bytes :
2019-10-02 14:28:40 +00:00
""" Decompress the given compressed resource data, whose header has already been removed and parsed into a CompressedHeaderInfo object. """
return b " " . join ( decompress_stream_parsed ( header_info , io . BytesIO ( data ) , debug = debug ) )
2019-09-23 21:50:29 +00:00
2019-12-30 02:00:12 +00:00
def decompress_stream ( stream : typing . BinaryIO , * , debug : bool = False ) - > typing . Iterator [ bytes ] :
2019-10-02 14:28:40 +00:00
""" Decompress compressed resource data from a stream. """
2019-09-23 21:50:29 +00:00
2019-10-02 14:28:40 +00:00
header_info = CompressedHeaderInfo . parse_stream ( stream )
2019-09-23 21:50:29 +00:00
if debug :
print ( f " Compressed resource data header: { header_info } " )
2019-10-02 14:28:40 +00:00
yield from decompress_stream_parsed ( header_info , stream , debug = debug )
2019-12-30 02:00:12 +00:00
def decompress ( data : bytes , * , debug : bool = False ) - > bytes :
2019-10-02 14:28:40 +00:00
""" Decompress the given compressed resource data. """
return b " " . join ( decompress_stream ( io . BytesIO ( data ) , debug = debug ) )
2020-08-01 12:00:13 +00:00
class DecompressingStream ( io . BufferedIOBase , typing . BinaryIO ) :
_compressed_stream : typing . BinaryIO
_close_stream : bool
_header_info : CompressedHeaderInfo
_decompress_iter : typing . Iterator [ bytes ]
_decompressed_stream : typing . BinaryIO
_seek_position : int
def __init__ ( self , compressed_stream : typing . BinaryIO , header_info : typing . Optional [ CompressedHeaderInfo ] , * , close_stream : bool = False ) - > None :
super ( ) . __init__ ( )
self . _compressed_stream = compressed_stream
self . _close_stream = close_stream
if header_info is not None :
self . _header_info = header_info
else :
self . _header_info = CompressedHeaderInfo . parse_stream ( self . _compressed_stream )
self . _decompress_iter = decompress_stream_parsed ( self . _header_info , self . _compressed_stream )
self . _decompressed_stream = io . BytesIO ( )
self . _seek_position = 0
# This override does nothing,
# but is needed to make mypy happy,
# otherwise it complains (apparently incorrectly) about the __enter__ definitions from IOBase and BinaryIO being incompatible with each other.
def __enter__ ( self : " DecompressingStream " ) - > " DecompressingStream " :
return super ( ) . __enter__ ( )
def close ( self ) - > None :
super ( ) . close ( )
if self . _close_stream :
self . _compressed_stream . close ( )
del self . _decompress_iter
self . _decompressed_stream . close ( )
def seekable ( self ) - > bool :
return True
def tell ( self ) - > int :
return self . _seek_position
def seek ( self , offset : int , whence : int = io . SEEK_SET ) - > int :
if whence == io . SEEK_SET :
if offset < 0 :
raise ValueError ( f " Negative seek offset not allowed with SEEK_SET: { offset } " )
self . _seek_position = offset
elif whence == io . SEEK_CUR :
self . _seek_position + = offset
elif whence == io . SEEK_END :
self . _seek_position = self . _header_info . decompressed_length - offset
else :
raise ValueError ( f " Invalid whence value: { whence } " )
self . _seek_position = max ( 0 , min ( self . _header_info . decompressed_length , self . _seek_position ) )
return self . _seek_position
def readable ( self ) - > bool :
return True
def read ( self , size : typing . Optional [ int ] = - 1 ) - > bytes :
if size is None :
size = - 1
self . _decompressed_stream . seek ( 0 , io . SEEK_END )
if size < 0 :
for chunk in self . _decompress_iter :
self . _decompressed_stream . write ( chunk )
else :
if self . _decompressed_stream . tell ( ) - self . _seek_position < size :
for chunk in self . _decompress_iter :
self . _decompressed_stream . write ( chunk )
if self . _decompressed_stream . tell ( ) - self . _seek_position > = size :
break
self . _decompressed_stream . seek ( self . _seek_position )
ret = self . _decompressed_stream . read ( size )
self . _seek_position + = len ( ret )
return ret