Add decompression support for 'dcmp' (1)-compressed resources

2024-11-23 05:33:22 +00:00 · 2019-08-22 18:36:33 +02:00 · 2019-08-22 18:36:33 +02:00 · 4c32987cc3
commit 4c32987cc3
parent acd056973e
1 changed files with 155 additions and 6 deletions
--- a/rsrcfork/compress.py
+++ b/rsrcfork/compress.py
@ -28,7 +28,7 @@ STRUCT_COMPRESSED_HEADER = struct.Struct(">4sHHI")
 # 2 bytes: Reserved (always zero).
 STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH")
-# Lookup table for codes in range(0x4b, 0xfe) in "application" compressed resources.
+# Lookup table for codes in range(0x4b, 0xfe) in 'dcmp' (0)-compressed resources.
 # This table was obtained by decompressing a manually created compressed resource that refers to every possible table entry. Detailed steps:
 # 1. Create a file with a resource fork
 # 2. Add a resource with the following contents: b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x01f\x80\x03\x00\x00\x00\x00' + bytes(range(0x4b, 0xfe)) + b'\xff'
@ -36,7 +36,7 @@ STRUCT_COMPRESSED_APPLICATION_HEADER = struct.Struct(">BBhH")
 # 4. Open the file in ResEdit
 # 5. Duplicate the resource - this will decompress the original resource and write its contents uncompressed into the duplicate
 # 6. Read the data from the duplicated resource
-COMPRESSED_APPLICATION_TABLE_DATA = (
+COMPRESSED_APPLICATION_0_TABLE_DATA = (
 	# First line corresponds to codes in range(0x4b, 0x50).
 	b"\x00\x00N\xba\x00\x08Nu\x00\x0c"
 	# All following lines correspond to 8 codes each.
@ -65,8 +65,31 @@ COMPRESSED_APPLICATION_TABLE_DATA = (
 	b"\x04\x80\x00h\x0b|D\x00A\xe8HA"
 )
 # Note: index 0 in this table corresponds to code 0x4b, index 1 to 0x4c, etc.
-COMPRESSED_APPLICATION_TABLE = [COMPRESSED_APPLICATION_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_TABLE_DATA), 2)]
+COMPRESSED_APPLICATION_0_TABLE = [COMPRESSED_APPLICATION_0_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_0_TABLE_DATA), 2)]
-assert len(COMPRESSED_APPLICATION_TABLE) == len(range(0x4b, 0xfe))
+assert len(COMPRESSED_APPLICATION_0_TABLE) == len(range(0x4b, 0xfe))
 # Lookup table for codes in range(0xd5, 0xfe) in 'dcmp' (1)-compressed resources.
 # This table was obtained by decompressing a manually created compressed resource that refers to every possible table entry. Detailed steps:
 # 1. Create a file with a resource fork
 # 2. Add a resource with the following contents: b'\xa8\x9fer\x00\x12\x08\x01\x00\x00\x00R\x80\x03\x00\x01\x00\x00' + bytes(range(0xd5, 0xfe)) + b'\xff'
 # 3. Set the "compressed" flag (0x01) on the resource
 # 4. Open the file in ResEdit
 # 5. Duplicate the resource - this will decompress the original resource and write its contents uncompressed into the duplicate
 # 6. Read the data from the duplicated resource
 COMPRESSED_APPLICATION_1_TABLE_DATA = (
 	# First line corresponds to codes in range(0xd5, 0xd8).
 	b"\x00\x00\x00\x01\x00\x02"
 	# All following lines correspond to 8 codes each.
 	b"\x00\x03.\x01>\x01\x01\x01\x1e\x01\xff\xff\x0e\x011\x00"
 	b"\x11\x12\x01\x0732\x129\xed\x10\x01'#\"\x017"
 	b"\x07\x06\x01\x17\x01#\x00\xff\x00/\x07\x0e\xfd<\x015"
 	b"\x01\x15\x01\x02\x00\x07\x00>\x05\xd5\x02\x01\x06\x07\x07\x08"
 	# Last line corresponds to codes in range(0xf8, 0xfe).
 	b"0\x01\x013\x00\x10\x17\x167>67"
 )
 # Note: index 0 in this table corresponds to code 0xd5, index 1 to 0xd6, etc.
 COMPRESSED_APPLICATION_1_TABLE = [COMPRESSED_APPLICATION_1_TABLE_DATA[i:i + 2] for i in range(0, len(COMPRESSED_APPLICATION_1_TABLE_DATA), 2)]
 assert len(COMPRESSED_APPLICATION_1_TABLE) == len(range(0xd5, 0xfe))
 # Header continuation part for a "system" compressed resource.
 # 2 bytes: The ID of the 'dcmp' resource that can decompress this resource. Currently only ID 2 is supported.
@ -237,7 +260,7 @@ def _decompress_application_0(data: bytes, decompressed_length: int, *, debug: b
 			i += 1
 			if debug:
 				print(f"Fixed table reference to 0x{table_index:>02x}")
-			entry = COMPRESSED_APPLICATION_TABLE[table_index]
+			entry = COMPRESSED_APPLICATION_0_TABLE[table_index]
 			if debug:
 				print(f"\t-> {entry}")
 			decompressed += entry
@ -421,7 +444,133 @@ def _decompress_application_0(data: bytes, decompressed_length: int, *, debug: b
 def _decompress_application_1(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes:
-	raise NotImplementedError("'dcmp' (1) decompression not supported yet")
+	prev_literals = []
 	decompressed = b""
 	i = 0
 	while i < len(data):
 		byte = data[i]
 		if debug:
 			print(f"Tag byte 0x{byte:>02x}, at 0x{i:x}, decompressing to 0x{len(decompressed):x}")
 		if byte in range(0x00, 0x20):
 			# Literal byte sequence, 1-byte header.
 			# The length of the literal data is stored in the low nibble of the tag byte.
 			count = (byte >> 0 & 0xf) + 1
 			begin = i + 1
 			end = begin + count
 			# Controls whether or not the literal is stored so that it can be referenced again later.
 			do_store = byte >= 0x10
 			literal = data[begin:end]
 			if debug:
 				print(f"Literal (1-byte header, storing: {do_store})")
 				print(f"\t-> {literal}")
 			decompressed += literal
 			if do_store:
 				if debug:
 					print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
 				prev_literals.append(literal)
 			i = end
 		elif byte in range(0x20, 0xd0):
 			# Backreference to a previous literal, 1-byte form.
 			# This can reference literals with indices in range(0xb0).
 			table_index = byte - 0x20
 			i += 1
 			if debug:
 				print(f"Backreference (1-byte form) to 0x{table_index:>02x}")
 			literal = prev_literals[table_index]
 			if debug:
 				print(f"\t-> {literal}")
 			decompressed += literal
 		elif byte in (0xd0, 0xd1):
 			# Literal byte sequence, 2-byte header.
 			# The length of the literal data is stored in the following byte.
 			count = data[i+1]
 			begin = i + 2
 			end = begin + count
 			# Controls whether or not the literal is stored so that it can be referenced again later.
 			do_store = byte == 0xd1
 			literal = data[begin:end]
 			if debug:
 				print(f"Literal (2-byte header, storing: {do_store})")
 				print(f"\t-> {literal}")
 			decompressed += literal
 			if do_store:
 				if debug:
 					print(f"\t-> stored as literal number 0x{len(prev_literals):x}")
 				prev_literals.append(literal)
 			i = end
 		elif byte == 0xd2:
 			# Backreference to a previous literal, 2-byte form.
 			# This can reference literals with indices in range(0xb0, 0x1b0).
 			table_index = data[i+1] + 0xb0
 			i += 2
 			if debug:
 				print(f"Backreference (2-byte form) to 0x{table_index:>02x}")
 			literal = prev_literals[table_index]
 			if debug:
 				print(f"\t-> {literal}")
 			decompressed += literal
 		elif byte in range(0xd5, 0xfe):
 			# Reference into a fixed table of two-byte literals.
 			# All compressed resources use the same table.
 			table_index = byte - 0xd5
 			i += 1
 			if debug:
 				print(f"Fixed table reference to 0x{table_index:>02x}")
 			entry = COMPRESSED_APPLICATION_1_TABLE[table_index]
 			if debug:
 				print(f"\t-> {entry}")
 			decompressed += entry
 		elif byte == 0xfe:
 			# Extended code, whose meaning is controlled by the following byte.
 			i += 1
 			kind = data[i]
 			if debug:
 				print(f"Extended code: 0x{kind:>02x}")
 			i += 1
 			if kind == 0x02:
 				# Repeat 1 byte a certain number of times.
 				byte_count = 1 # Unlike with 'dcmp' (0) compression, there doesn't appear to be a 2-byte repeat (or if there is, it's never used in practice).
 				if debug:
 					print(f"Repeat {byte_count}-byte value")
 				# The byte(s) to repeat, stored as a variable-length integer. The value is treated as unsigned, i. e. the integer is never negative.
 				to_repeat_int, length = _read_variable_length_integer(data, i)
 				i += length
 				try:
 					to_repeat = to_repeat_int.to_bytes(byte_count, "big", signed=False)
 				except OverflowError:
 					raise DecompressError(f"Value to repeat out of range for {byte_count}-byte repeat: {to_repeat_int:#x}")
 				count_m1, length = _read_variable_length_integer(data, i)
 				i += length
 				count = count_m1 + 1
 				if count <= 0:
 					raise DecompressError(f"Repeat count must be positive: {count}")
 				repeated = to_repeat * count
 				if debug:
 					print(f"\t-> {to_repeat} * {count}: {repeated}")
 				decompressed += repeated
 			else:
 				raise DecompressError(f"Unknown extended code: 0x{kind:>02x}")
 		elif byte == 0xff:
 			# End of data marker, always occurs exactly once as the last byte of the compressed data.
 			if debug:
 				print("End marker")
 			if i != len(data) - 1:
 				raise DecompressError(f"End marker reached at {i}, before the expected end of data at {len(data) - 1}")
 			i += 1
 		else:
 			raise DecompressError(f"Unknown tag byte: 0x{data[i]:>02x}")
 	return decompressed
 def _decompress_application(data: bytes, decompressed_length: int, *, debug: bool=False) -> bytes: