mirror of
https://github.com/dgelessus/python-rsrcfork.git
synced 2025-07-22 16:24:08 +00:00
The leading underscore is meant to distinguish private (for internal use only) APIs from public (for external use) APIs. One can argue about where the line between public and private should be, but if something is used from other modules (as with read_variable_length_integer) it's not really private IMHO. In scripts (like __main__) it also doesn't make much sense to use leading underscores, because the entire file is never meant to be used by external code.
157 lines
5.5 KiB
Python
157 lines
5.5 KiB
Python
import typing
|
|
|
|
from . import common
|
|
|
|
# Lookup table for codes in range(0xd5, 0xfe).
|
|
# This table was obtained by decompressing a manually created compressed resource with the following contents:
|
|
# b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x00R\x80\x03\x00\x01\x00\x00' + bytes(range(0xd5, 0xfe)) + b'\xff'
|
|
TABLE_DATA = (
|
|
# First line corresponds to codes in range(0xd5, 0xd8).
|
|
b"\x00\x00\x00\x01\x00\x02"
|
|
# All following lines correspond to 8 codes each.
|
|
b"\x00\x03.\x01>\x01\x01\x01\x1e\x01\xff\xff\x0e\x011\x00"
|
|
b"\x11\x12\x01\x0732\x129\xed\x10\x01'#\"\x017"
|
|
b"\x07\x06\x01\x17\x01#\x00\xff\x00/\x07\x0e\xfd<\x015"
|
|
b"\x01\x15\x01\x02\x00\x07\x00>\x05\xd5\x02\x01\x06\x07\x07\x08"
|
|
# Last line corresponds to codes in range(0xf8, 0xfe).
|
|
b"0\x01\x013\x00\x10\x17\x167>67"
|
|
)
|
|
# Note: index 0 in this table corresponds to code 0xd5, index 1 to 0xd6, etc.
|
|
TABLE = [TABLE_DATA[i:i + 2] for i in range(0, len(TABLE_DATA), 2)]
|
|
assert len(TABLE) == len(range(0xd5, 0xfe))
|
|
|
|
|
|
def decompress(header_info: common.CompressedHeaderInfo, data: bytes, *, debug: bool=False) -> bytes:
|
|
"""Decompress compressed data in the format used by 'dcmp' (1)."""
|
|
|
|
if not isinstance(header_info, common.CompressedApplicationHeaderInfo):
|
|
raise common.DecompressError(f"Incorrect header type: {type(header_info).__qualname__}")
|
|
|
|
prev_literals: typing.List[bytes] = []
|
|
decompressed = b""
|
|
|
|
i = 0
|
|
|
|
while i < len(data):
|
|
byte = data[i]
|
|
if debug:
|
|
print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
|
|
|
|
if byte in range(0x00, 0x20):
|
|
# Literal byte sequence, 1-byte header.
|
|
# The length of the literal data is stored in the low nibble of the tag byte.
|
|
count = (byte >> 0 & 0xf) + 1
|
|
begin = i + 1
|
|
end = begin + count
|
|
# Controls whether or not the literal is stored so that it can be referenced again later.
|
|
do_store = byte >= 0x10
|
|
literal = data[begin:end]
|
|
if debug:
|
|
print(f"Literal (1-byte header, storing: {do_store})")
|
|
print(f"\t-> {literal}")
|
|
decompressed += literal
|
|
if do_store:
|
|
if debug:
|
|
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
|
prev_literals.append(literal)
|
|
i = end
|
|
elif byte in range(0x20, 0xd0):
|
|
# Backreference to a previous literal, 1-byte form.
|
|
# This can reference literals with indices in range(0xb0).
|
|
table_index = byte - 0x20
|
|
i += 1
|
|
if debug:
|
|
print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
|
|
literal = prev_literals[table_index]
|
|
if debug:
|
|
print(f"\t-> {literal}")
|
|
decompressed += literal
|
|
elif byte in (0xd0, 0xd1):
|
|
# Literal byte sequence, 2-byte header.
|
|
# The length of the literal data is stored in the following byte.
|
|
count = data[i+1]
|
|
begin = i + 2
|
|
end = begin + count
|
|
# Controls whether or not the literal is stored so that it can be referenced again later.
|
|
do_store = byte == 0xd1
|
|
literal = data[begin:end]
|
|
if debug:
|
|
print(f"Literal (2-byte header, storing: {do_store})")
|
|
print(f"\t-> {literal}")
|
|
decompressed += literal
|
|
if do_store:
|
|
if debug:
|
|
print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
|
|
prev_literals.append(literal)
|
|
i = end
|
|
elif byte == 0xd2:
|
|
# Backreference to a previous literal, 2-byte form.
|
|
# This can reference literals with indices in range(0xb0, 0x1b0).
|
|
table_index = data[i+1] + 0xb0
|
|
i += 2
|
|
if debug:
|
|
print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
|
|
literal = prev_literals[table_index]
|
|
if debug:
|
|
print(f"\t-> {literal}")
|
|
decompressed += literal
|
|
elif byte in range(0xd5, 0xfe):
|
|
# Reference into a fixed table of two-byte literals.
|
|
# All compressed resources use the same table.
|
|
table_index = byte - 0xd5
|
|
i += 1
|
|
if debug:
|
|
print(f"Fixed table reference to 0x{table_index:>02x}")
|
|
entry = TABLE[table_index]
|
|
if debug:
|
|
print(f"\t-> {entry}")
|
|
decompressed += entry
|
|
elif byte == 0xfe:
|
|
# Extended code, whose meaning is controlled by the following byte.
|
|
|
|
i += 1
|
|
kind = data[i]
|
|
if debug:
|
|
print(f"Extended code: 0x{kind:>02x}")
|
|
i += 1
|
|
|
|
if kind == 0x02:
|
|
# Repeat 1 byte a certain number of times.
|
|
|
|
byte_count = 1 # Unlike with 'dcmp' (0) compression, there doesn't appear to be a 2-byte repeat (or if there is, it's never used in practice).
|
|
|
|
if debug:
|
|
print(f"Repeat {byte_count}-byte value")
|
|
|
|
# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
|
|
to_repeat_int, length = common.read_variable_length_integer(data, i)
|
|
i += length
|
|
try:
|
|
to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
|
|
except OverflowError:
|
|
raise common.DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
|
|
|
|
count_m1, length = common.read_variable_length_integer(data, i)
|
|
i += length
|
|
count = count_m1 + 1
|
|
if count <= 0:
|
|
raise common.DecompressError(f"Repeat count must be positive: {count}")
|
|
|
|
repeated = to_repeat * count
|
|
if debug:
|
|
print(f"\t-> {to_repeat} * {count}: {repeated}")
|
|
decompressed += repeated
|
|
else:
|
|
raise common.DecompressError(f"Unknown extended code: 0x{kind:>02x}")
|
|
elif byte == 0xff:
|
|
# End of data marker, always occurs exactly once as the last byte of the compressed data.
|
|
if debug:
|
|
print("End marker")
|
|
if i != len(data) - 1:
|
|
raise common.DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
|
|
i += 1
|
|
else:
|
|
raise common.DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
|
|
|
|
return decompressed
|