From 4c32987cc3505c0f8c9281964b9e5ab21720e259 Mon Sep 17 00:00:00 2001 From: dgelessus Date: Thu, 22 Aug 2019 18:36:33 +0200 Subject: [PATCH] Add decompression support for 'dcmp' (1)-compressed resources --- rsrcfork/compress.py | 161 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 6 deletions(-) diff --git a/rsrcfork/compress.py b/rsrcfork/compress.py index 3bdd7e6..f98f1aa 100644 --- a/rsrcfork/compress.py +++ b/rsrcfork/compress.py @@ -28,7 +28,7 @@ STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHI") # 2 bytes: Reserved (always zero). STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH") -# Lookup table for codes in range(0x4b, 0xfe) in "application" compressed resources. +# Lookup table for codes in range(0x4b, 0xfe) in 'dcmp' (0)-compressed resources. # This table was obtained by decompressing a manually created compressed resource that refers to every possible table entry. Detailed steps: # 1. Create a file with a resource fork # 2. Add a resource with the following contents: b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x01f\x80\x03\x00\x00\x00\x00' + bytes(range(0x4b, 0xfe)) + b'\xff' @@ -36,7 +36,7 @@ STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH") # 4. Open the file in ResEdit # 5. Duplicate the resource - this will decompress the original resource and write its contents uncompressed into the duplicate # 6. Read the data from the duplicated resource -COMPRESSED_APPLICATION_TABLE_DATA = ( +COMPRESSED_APPLICATION_0_TABLE_DATA = ( # First line corresponds to codes in range(0x4b, 0x50). b"\x00\x00N\xba\x00\x08Nu\x00\x0c" # All following lines correspond to 8 codes each. @@ -65,8 +65,31 @@ COMPRESSED_APPLICATION_TABLE_DATA = ( b"\x04\x80\x00h\x0b|D\x00A\xe8HA" ) # Note: index 0 in this table corresponds to code 0x4b, index 1 to 0x4c, etc. -COMPRESSED_APPLICATION_TABLE = [COMPRESSED_APPLICATION_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_TABLE_DATA), 2)] -assert len(COMPRESSED_APPLICATION_TABLE) == len(range(0x4b, 0xfe)) +COMPRESSED_APPLICATION_0_TABLE = [COMPRESSED_APPLICATION_0_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_0_TABLE_DATA), 2)] +assert len(COMPRESSED_APPLICATION_0_TABLE) == len(range(0x4b, 0xfe)) + +# Lookup table for codes in range(0xd5, 0xfe) in 'dcmp' (1)-compressed resources. +# This table was obtained by decompressing a manually created compressed resource that refers to every possible table entry. Detailed steps: +# 1. Create a file with a resource fork +# 2. Add a resource with the following contents: b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x00R\x80\x03\x00\x01\x00\x00' + bytes(range(0xd5, 0xfe)) + b'\xff' +# 3. Set the "compressed" flag (0x01) on the resource +# 4. Open the file in ResEdit +# 5. Duplicate the resource - this will decompress the original resource and write its contents uncompressed into the duplicate +# 6. Read the data from the duplicated resource +COMPRESSED_APPLICATION_1_TABLE_DATA = ( + # First line corresponds to codes in range(0xd5, 0xd8). + b"\x00\x00\x00\x01\x00\x02" + # All following lines correspond to 8 codes each. + b"\x00\x03.\x01>\x01\x01\x01\x1e\x01\xff\xff\x0e\x011\x00" + b"\x11\x12\x01\x0732\x129\xed\x10\x01'#\"\x017" + b"\x07\x06\x01\x17\x01#\x00\xff\x00/\x07\x0e\xfd<\x015" + b"\x01\x15\x01\x02\x00\x07\x00>\x05\xd5\x02\x01\x06\x07\x07\x08" + # Last line corresponds to codes in range(0xf8, 0xfe). + b"0\x01\x013\x00\x10\x17\x167>67" +) +# Note: index 0 in this table corresponds to code 0xd5, index 1 to 0xd6, etc. +COMPRESSED_APPLICATION_1_TABLE = [COMPRESSED_APPLICATION_1_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_1_TABLE_DATA), 2)] +assert len(COMPRESSED_APPLICATION_1_TABLE) == len(range(0xd5, 0xfe)) # Header continuation part for a "system" compressed resource. # 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported. @@ -237,7 +260,7 @@ def _decompress_application_0(data: bytes, decompressed_length: int, *, debug: b i += 1 if debug: print(f"Fixed table reference to 0x{table_index:>02x}") - entry = COMPRESSED_APPLICATION_TABLE[table_index] + entry = COMPRESSED_APPLICATION_0_TABLE[table_index] if debug: print(f"\t-> {entry}") decompressed += entry @@ -421,7 +444,133 @@ def _decompress_application_0(data: bytes, decompressed_length: int, *, debug: b def _decompress_application_1(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes: - raise NotImplementedError("'dcmp' (1) decompression not supported yet") + prev_literals = [] + decompressed = b"" + + i = 0 + + while i < len(data): + byte = data[i] + if debug: + print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}") + + if byte in range(0x00, 0x20): + # Literal byte sequence, 1-byte header. + # The length of the literal data is stored in the low nibble of the tag byte. + count = (byte >> 0 & 0xf) + 1 + begin = i + 1 + end = begin + count + # Controls whether or not the literal is stored so that it can be referenced again later. + do_store = byte >= 0x10 + literal = data[begin:end] + if debug: + print(f"Literal (1-byte header, storing: {do_store})") + print(f"\t-> {literal}") + decompressed += literal + if do_store: + if debug: + print(f"\t-> stored as literal number 0x{len(prev_literals):x}") + prev_literals.append(literal) + i = end + elif byte in range(0x20, 0xd0): + # Backreference to a previous literal, 1-byte form. + # This can reference literals with indices in range(0xb0). + table_index = byte - 0x20 + i += 1 + if debug: + print(f"Backreference (1-byte form) to 0x{table_index:>02x}") + literal = prev_literals[table_index] + if debug: + print(f"\t-> {literal}") + decompressed += literal + elif byte in (0xd0, 0xd1): + # Literal byte sequence, 2-byte header. + # The length of the literal data is stored in the following byte. + count = data[i+1] + begin = i + 2 + end = begin + count + # Controls whether or not the literal is stored so that it can be referenced again later. + do_store = byte == 0xd1 + literal = data[begin:end] + if debug: + print(f"Literal (2-byte header, storing: {do_store})") + print(f"\t-> {literal}") + decompressed += literal + if do_store: + if debug: + print(f"\t-> stored as literal number 0x{len(prev_literals):x}") + prev_literals.append(literal) + i = end + elif byte == 0xd2: + # Backreference to a previous literal, 2-byte form. + # This can reference literals with indices in range(0xb0, 0x1b0). + table_index = data[i+1] + 0xb0 + i += 2 + if debug: + print(f"Backreference (2-byte form) to 0x{table_index:>02x}") + literal = prev_literals[table_index] + if debug: + print(f"\t-> {literal}") + decompressed += literal + elif byte in range(0xd5, 0xfe): + # Reference into a fixed table of two-byte literals. + # All compressed resources use the same table. + table_index = byte - 0xd5 + i += 1 + if debug: + print(f"Fixed table reference to 0x{table_index:>02x}") + entry = COMPRESSED_APPLICATION_1_TABLE[table_index] + if debug: + print(f"\t-> {entry}") + decompressed += entry + elif byte == 0xfe: + # Extended code, whose meaning is controlled by the following byte. + + i += 1 + kind = data[i] + if debug: + print(f"Extended code: 0x{kind:>02x}") + i += 1 + + if kind == 0x02: + # Repeat 1 byte a certain number of times. + + byte_count = 1 # Unlike with 'dcmp' (0) compression, there doesn't appear to be a 2-byte repeat (or if there is, it's never used in practice). + + if debug: + print(f"Repeat {byte_count}-byte value") + + # The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative. + to_repeat_int, length = _read_variable_length_integer(data, i) + i += length + try: + to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False) + except OverflowError: + raise DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}") + + count_m1, length = _read_variable_length_integer(data, i) + i += length + count = count_m1 + 1 + if count <= 0: + raise DecompressError(f"Repeat count must be positive: {count}") + + repeated = to_repeat * count + if debug: + print(f"\t-> {to_repeat} * {count}: {repeated}") + decompressed += repeated + else: + raise DecompressError(f"Unknown extended code: 0x{kind:>02x}") + elif byte == 0xff: + # End of data marker, always occurs exactly once as the last byte of the compressed data. + if debug: + print("End marker") + if i != len(data) - 1: + raise DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}") + i += 1 + else: + raise DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}") + + return decompressed def _decompress_application(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes: