mirror of
https://github.com/elliotnunn/macresources.git
synced 2024-11-17 08:04:37 +00:00
2f94e88cec
That's because both compression and decompression are supported for now.
214 lines
7.9 KiB
Python
214 lines
7.9 KiB
Python
'''
|
|
This script implements the GreggyBits decompression algorithm
|
|
found in the 'dcmp' 2 resource.
|
|
'''
|
|
|
|
import struct
|
|
|
|
# predefined lookup table of the most frequent words
|
|
GreggDefLUT = (
|
|
0x0000, 0x0008, 0x4EBA, 0x206E, 0x4E75, 0x000C, 0x0004, 0x7000,
|
|
0x0010, 0x0002, 0x486E, 0xFFFC, 0x6000, 0x0001, 0x48E7, 0x2F2E,
|
|
0x4E56, 0x0006, 0x4E5E, 0x2F00, 0x6100, 0xFFF8, 0x2F0B, 0xFFFF,
|
|
0x0014, 0x000A, 0x0018, 0x205F, 0x000E, 0x2050, 0x3F3C, 0xFFF4,
|
|
0x4CEE, 0x302E, 0x6700, 0x4CDF, 0x266E, 0x0012, 0x001C, 0x4267,
|
|
0xFFF0, 0x303C, 0x2F0C, 0x0003, 0x4ED0, 0x0020, 0x7001, 0x0016,
|
|
0x2D40, 0x48C0, 0x2078, 0x7200, 0x588F, 0x6600, 0x4FEF, 0x42A7,
|
|
0x6706, 0xFFFA, 0x558F, 0x286E, 0x3F00, 0xFFFE, 0x2F3C, 0x6704,
|
|
0x598F, 0x206B, 0x0024, 0x201F, 0x41FA, 0x81E1, 0x6604, 0x6708,
|
|
0x001A, 0x4EB9, 0x508F, 0x202E, 0x0007, 0x4EB0, 0xFFF2, 0x3D40,
|
|
0x001E, 0x2068, 0x6606, 0xFFF6, 0x4EF9, 0x0800, 0x0C40, 0x3D7C,
|
|
0xFFEC, 0x0005, 0x203C, 0xFFE8, 0xDEFC, 0x4A2E, 0x0030, 0x0028,
|
|
0x2F08, 0x200B, 0x6002, 0x426E, 0x2D48, 0x2053, 0x2040, 0x1800,
|
|
0x6004, 0x41EE, 0x2F28, 0x2F01, 0x670A, 0x4840, 0x2007, 0x6608,
|
|
0x0118, 0x2F07, 0x3028, 0x3F2E, 0x302B, 0x226E, 0x2F2B, 0x002C,
|
|
0x670C, 0x225F, 0x6006, 0x00FF, 0x3007, 0xFFEE, 0x5340, 0x0040,
|
|
0xFFE4, 0x4A40, 0x660A, 0x000F, 0x4EAD, 0x70FF, 0x22D8, 0x486B,
|
|
0x0022, 0x204B, 0x670E, 0x4AAE, 0x4E90, 0xFFE0, 0xFFC0, 0x002A,
|
|
0x2740, 0x6702, 0x51C8, 0x02B6, 0x487A, 0x2278, 0xB06E, 0xFFE6,
|
|
0x0009, 0x322E, 0x3E00, 0x4841, 0xFFEA, 0x43EE, 0x4E71, 0x7400,
|
|
0x2F2C, 0x206C, 0x003C, 0x0026, 0x0050, 0x1880, 0x301F, 0x2200,
|
|
0x660C, 0xFFDA, 0x0038, 0x6602, 0x302C, 0x200C, 0x2D6E, 0x4240,
|
|
0xFFE2, 0xA9F0, 0xFF00, 0x377C, 0xE580, 0xFFDC, 0x4868, 0x594F,
|
|
0x0034, 0x3E1F, 0x6008, 0x2F06, 0xFFDE, 0x600A, 0x7002, 0x0032,
|
|
0xFFCC, 0x0080, 0x2251, 0x101F, 0x317C, 0xA029, 0xFFD8, 0x5240,
|
|
0x0100, 0x6710, 0xA023, 0xFFCE, 0xFFD4, 0x2006, 0x4878, 0x002E,
|
|
0x504F, 0x43FA, 0x6712, 0x7600, 0x41E8, 0x4A6E, 0x20D9, 0x005A,
|
|
0x7FFF, 0x51CA, 0x005C, 0x2E00, 0x0240, 0x48C7, 0x6714, 0x0C80,
|
|
0x2E9F, 0xFFD6, 0x8000, 0x1000, 0x4842, 0x4A6B, 0xFFD2, 0x0048,
|
|
0x4A47, 0x4ED1, 0x206F, 0x0041, 0x600C, 0x2A78, 0x422E, 0x3200,
|
|
0x6574, 0x6716, 0x0044, 0x486D, 0x2008, 0x486C, 0x0B7C, 0x2640,
|
|
0x0400, 0x0068, 0x206D, 0x000D, 0x2A40, 0x000B, 0x003E, 0x0220
|
|
)
|
|
|
|
|
|
def EncodeMaskedWords(src, dst, pos, n, tab):
|
|
mask = 0
|
|
encoded = bytearray()
|
|
|
|
for rPos in range(n):
|
|
mask <<= 1
|
|
word = src[pos+rPos]
|
|
compressed = 1 if word in tab else 0
|
|
mask |= compressed
|
|
if compressed: # replace word with table index
|
|
encoded.append(tab.index(word))
|
|
else: # otherwise, just copy unencoded data over
|
|
encoded.extend(word.to_bytes(2, 'big'))
|
|
|
|
if n < 8: # left-justify the mask for n < 8
|
|
mask <<= 8 - n
|
|
|
|
dst.append(mask)
|
|
dst.extend(encoded)
|
|
|
|
return pos+n
|
|
|
|
|
|
def DecodeMaskedWords(src, dst, pos, n, tab, mask):
|
|
'''Decode n words using the specified lookup table under control of mask.
|
|
Return new bitstream position.
|
|
'''
|
|
if mask == 0: # all mask bits are zero?
|
|
nBytes = n * 2
|
|
dst.extend(src[pos:pos+nBytes]) # copy over n*2 bytes
|
|
pos += nBytes
|
|
else:
|
|
for bn in range(7, 7 - n, -1):
|
|
if mask & (1 << bn): # mask bit set?
|
|
word = tab[src[pos]] # decode next word with LUT
|
|
dst.extend(word.to_bytes(2, 'big')) # write it to dst
|
|
pos += 1
|
|
else: # otherwise, copy over that word
|
|
dst.extend(src[pos:pos+2])
|
|
pos += 2
|
|
return pos
|
|
|
|
|
|
def GreggDecompress(src, dst, unpackSize, tabSize, comprFlags):
|
|
'''Decompress resource data from src to dst.
|
|
|
|
Parameters:
|
|
src source buffer containing compressed data
|
|
dst destination buffer, must be bytearray to work properly
|
|
unpackSize size in bytes of the unpacked resource data
|
|
tabSize size of the embedded lookup table
|
|
comprFlags compression flags from the extended resource header
|
|
'''
|
|
hasDynamicTab = comprFlags & 1
|
|
isBitmapped = comprFlags & 2
|
|
print("tabSize: %d" % tabSize)
|
|
print("comprFlags: 0x%X, dynamic table: %s, bitmap data: %s" % (comprFlags,
|
|
"yes" if hasDynamicTab else "no", "yes" if isBitmapped else "no"))
|
|
|
|
if hasDynamicTab:
|
|
nEntries = tabSize + 1
|
|
pos = nEntries * 2
|
|
dynamicLUT = struct.unpack(">" + str(nEntries) + "H", src[:pos])
|
|
# dump dynamic LUT
|
|
if 0:
|
|
for idx, elem in enumerate(dynamicLUT):
|
|
if idx and not idx & 3:
|
|
print(",")
|
|
else:
|
|
print(", ", end="")
|
|
print("0x%04X" % elem, end="")
|
|
print("")
|
|
else:
|
|
pos = 0
|
|
|
|
LUT = dynamicLUT if hasDynamicTab else GreggDefLUT
|
|
nWords = unpackSize >> 1
|
|
hasExtraByte = unpackSize & 1
|
|
|
|
if isBitmapped:
|
|
nRuns = nWords >> 3
|
|
for idx in range(nRuns):
|
|
mask = src[pos]
|
|
pos += 1
|
|
#print("Mask for run #%d: 0x%X" % (idx, bitmap))
|
|
|
|
pos = DecodeMaskedWords(src, dst, pos, 8, LUT, mask)
|
|
|
|
if nWords & 7:
|
|
trailingWords = nWords & 7
|
|
lastMask = src[pos]
|
|
pos += 1
|
|
#print("Last mask: 0x%X, trailing words: %d" % (lastMask, trailingWords))
|
|
pos = DecodeMaskedWords(src, dst, pos, trailingWords, LUT, lastMask)
|
|
else:
|
|
for i in range(nWords):
|
|
word = LUT[src[pos]] # decode next word with LUT
|
|
dst.extend(word.to_bytes(2, 'big')) # write it to dst
|
|
pos += 1
|
|
|
|
if hasExtraByte: # have a got an extra byte at the end?
|
|
dst.expand(src[pos]) # copy it over
|
|
pos += 1
|
|
|
|
#print("Last input position: %d" % pos)
|
|
|
|
|
|
def GreggCompress(src, dst, unpackSize, customTab=False, isBitmapped=False):
|
|
if customTab:
|
|
# convert input bytes into an array of words
|
|
nWords = unpackSize >> 1
|
|
inWords = struct.unpack(">" + str(nWords) + "H", src[:nWords*2])
|
|
|
|
# count occurence of each word
|
|
from collections import Counter
|
|
wordsCounts = Counter(inWords)
|
|
|
|
# grab word counts in descending order (most frequent comes first)
|
|
sortedCounts = list(set(wordsCounts.values()))
|
|
sortedCounts.sort(reverse=True)
|
|
|
|
# now we're ready to construct a table of 256 most common words
|
|
embeddedTab = list()
|
|
nElems = 0
|
|
|
|
for cnt in sortedCounts:
|
|
# pick all words for a specific count and sort them in descending order
|
|
words = [key for (key, value) in wordsCounts.items() if value == cnt]
|
|
words.sort(reverse=True)
|
|
|
|
# append them to the table, stop when tableMax is reached
|
|
if nElems + len(words) < 256:
|
|
embeddedTab.extend(words)
|
|
nElems += len(words)
|
|
else:
|
|
remainedElems = 256 - nElems
|
|
embeddedTab.extend(words[:remainedElems])
|
|
nElems += remainedElems
|
|
|
|
if nElems >= 256:
|
|
break
|
|
|
|
if 0: # dump the resulting table
|
|
for idx, elem in enumerate(embeddedTab):
|
|
if idx and not idx & 3:
|
|
print(",")
|
|
else:
|
|
print(", ", end="")
|
|
print("0x%04X" % elem, end="")
|
|
print("")
|
|
|
|
# write the constructed table into output
|
|
for word in embeddedTab:
|
|
dst.extend(word.to_bytes(2, 'big'))
|
|
|
|
if isBitmapped:
|
|
pos = 0
|
|
nRuns = nWords >> 3
|
|
|
|
for idx in range(nRuns):
|
|
pos = EncodeMaskedWords(inWords, dst, pos, 8, embeddedTab)
|
|
|
|
if nWords & 7:
|
|
pos = EncodeMaskedWords(inWords, dst, pos, nWords & 7, embeddedTab)
|
|
|
|
if unpackSize & 1: # copy over last byte in the case of odd length
|
|
dst.append(src[-1])
|
|
else:
|
|
print("Non-bitmapped compression not yet implemented")
|