Basic InstaCompOne support (thanks to Max Poliakovski)

This commit is contained in:
Elliot Nunn 2020-06-23 13:47:38 +08:00
parent 60f289efb8
commit 1a16ebc3d4
3 changed files with 365 additions and 1 deletions

71
bin/instacomp Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
# Copyright (c) 2019-2020 Elliot Nunn
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
HELP = '''InstaComp: unpack resources in the Macintosh System file (7.5)
Algorithm from MacOS and the Installer SDK (Apple)
Decompression reimplemented by Maxim Poliakovski
Use the 'rfx' wrapper command to access resources inside a file'''
if __name__ == '__main__':
import struct
import sys
import argparse
parser = argparse.ArgumentParser(
description=HELP, formatter_class=argparse.RawTextHelpFormatter,
)
parser.prog = '[rfx] ' + parser.prog
parser.add_argument('path', nargs='+', metavar='file', action='store', help='Resource data')
parser.add_argument('-x', dest='do_compress', action='store_false', required=True, help='extract (currently mandatory)')
args = parser.parse_args()
for el in args.path:
from macresources.instacomp import unpack, WrongFormatError
with open(el, 'r+b') as f:
already_compressed = (f.read(4) == b'\xA8\x9Fer')
if not already_compressed: continue
f.seek(0)
data = f.read()
print(el) # elliot delete!
try:
try:
data = unpack(data)
except WrongFormatError:
continue
f.seek(0)
f.write(data)
f.truncate()
except Exception as e:
print('failed', e)
# print(el)
# raise

293
macresources/instacomp.py Normal file
View File

@ -0,0 +1,293 @@
# Copyright (c) 2018-2020 Maxim Poliakovski, Elliot Nunn
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
'''
InstaCompOne is an older compression scheme used in the installer SDK
of the classic Mac OS. Like the famous Deflate algorithm, InstaCompOne
combines LZ77 with Huffman coding but uses a different bitstream format.
Author: Max Poliakovski 2018
'''
import struct
from math import ceil, log2
LIT_MAX_LEN = 63 # maximal length of the literal block
''' Huffman codes for copy length.
'''
lenHuffTab = {
0b00 : 0,
0b01 : 1,
0b100 : 2,
0b1010 : 3,
0b1011 : 4,
0b11000 : 5,
0b11001 : 6,
0b110100 : 7,
0b110101 : 8,
0b110110 : 9,
0b110111 : 10,
# for large trees, use compact Huffman table representation in the form
# prefix_bits : (num_of_val_bits, offset)
0b1110 : (3, 11),
0b11110 : (3, 19),
0b111110 : (5, 27),
0b1111110 : (6, 59),
0b11111110 : (7, 123),
0b111111110 : (8, 251),
0b1111111110 : (9, 507),
0b11111111110 : (10, 1019)
}
''' Huffman codes for literal length.
'''
litHuffTab = {
0b0 : 1,
0b100 : 2,
0b101 : 3,
0b11000 : 4,
0b11001 : 5,
0b11010 : 6,
0b11011 : 7,
0b1110000 : 8,
0b1110001 : 9,
0b1110010 : 10,
0b1110011 : 11,
0b1110100 : 12,
0b1110101 : 13,
0b1110110 : 14,
0b1110111 : 15,
0b11110 : (4, 16),
0b11111 : (5, 32)
}
# TODO: can that be done more quickly?
next_pow2 = lambda x: 1 if x < 2 else int(ceil(log2(x)))
class BitStreamReader():
''' Convenient methods for bitwise access to the input data.
'''
def __init__(self, input, size, pos):
self.inBuf = input
self.inSize = size
self.inPos = pos
self.bPool = 0
self.bitsInPool = 0
def showbits(self, nb):
''' Return nb bits from the bitstream without advancing the bit position
'''
while nb > self.bitsInPool:
self.bPool = (self.bPool << 8) | self.inBuf[self.inPos]
self.inPos += 1
self.bitsInPool += 8
return (self.bPool >> (self.bitsInPool - nb)) & (0xFFFFFFFF >> (32 - nb))
def flushbits(self, nb):
''' Advance bit position by nb
'''
if nb <= self.bitsInPool:
self.bitsInPool -= nb
else:
self.bitsInPool = 0
def getbits(self, nb):
''' Same as showbits with advancing the bit position
'''
res = self.showbits(nb)
self.flushbits(nb)
return res
def decodehuff(self, tab, minlen, maxlen):
''' Decode Huffman code from bitstream
'''
for w in range(minlen, maxlen+1):
cw = self.showbits(w)
if cw in tab:
val = tab[cw]
if isinstance(val, tuple): # compact format used?
self.flushbits(w) # flush prefix bits
nbits, start = val
val = self.getbits(nbits) + start
return val
self.flushbits(w)
return val
raise ValueError('Error decoding Huffman length')
def DecodeDistance(bs, mag):
''' Decode backward distance for reference copying. Because this values
can be large, the magnitude derived from the output position will be
used to switch between variable-length codes.
Large values will be further divided into sub-ranges; for each sub-range,
an additional bit indicating used/skipped sub-range, will be coded.
Below an example of decoding the bit string 10.0000111 and magnitude of 675:
1 -> skip sub-range 1...32
0 -> use sub-range 33...161
getbits(7) -> 7 + 33 = 40
'''
if mag <= 10:
raise ValueError('Anon9 unimplemented')
elif mag <= 20:
raise ValueError('Anon10 unimplemented')
elif mag <= 40:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(4) + 5
raise ValueError('Unimplemented Anon11 distance encoding')
elif mag <= 80:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(5) + 9
raise ValueError('Unimplemented Anon12 distance encoding')
elif mag <= 160:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(6) + 17
raise ValueError('Unimplemented Anon13 distance encoding')
elif mag <= 672: # 161...672
if bs.getbits(1):
if bs.getbits(1) == 0: # 33...160
return bs.getbits(7) + 33
else:
return bs.getbits(next_pow2(mag - 160)) + 161
else: # 1...32
return bs.getbits(5) + 1
elif mag <= 1000:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(8) + 65
else:
return bs.getbits(next_pow2(mag - 320)) + 321
else:
return bs.getbits(6) + 1
elif mag <= 2688:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(9) + 129
else:
return bs.getbits(next_pow2(mag - 640)) + 641
else:
return bs.getbits(7) + 1
elif mag <= 5376:
if bs.getbits(1):
if bs.getbits(1) == 0:
return bs.getbits(10) + 257
else:
return bs.getbits(next_pow2(mag - 1280)) + 1281
else:
return bs.getbits(8) + 1
elif mag <= 10752:
if bs.getbits(1):
if bs.getbits(1):
return bs.getbits(next_pow2(mag - 2560)) + 2561
else:
return bs.getbits(11) + 513
else:
return bs.getbits(9) + 1
raise ValueError('Unimplemented distance encoding, current dst mag: %d' % mag)
def InstaCompDecompress(src, dst, unpackSize, pos=0):
# skip unused algo specific fields
word, word2 = struct.unpack_from(">HH", src, pos)
pos += 4
bs = BitStreamReader(src, len(src), pos)
dstPos = 0
mode = 1 # 1 - literal decoding, 0 - reference copying
while dstPos < unpackSize:
copyCount = bs.decodehuff(lenHuffTab, 2, 11)
if copyCount > 0 or mode == 0:
copyCount += 2
if mode == 0:
copyCount += 1
distance = DecodeDistance(bs, dstPos)
refPos = dstPos - distance
#print("Distance: %d, ref pos: %d" % (distance, refPos))
for i in range(copyCount):
dst.append(dst[refPos+i])
dstPos += copyCount
mode = 1
else:
litLen = bs.decodehuff(litHuffTab, 1, 7)
for i in range(litLen):
dst.append(bs.getbits(8))
dstPos += litLen
mode = 0 if litLen < LIT_MAX_LEN else 1
#print(hex(bs.getbits(3)))
#print(hex(bs.getbits(1)))
#print(hex(bs.getbits(7)))
#print(hex(bs.getbits(3)))
#print(hex(bs.getbits(1)))
#print(hex(bs.getbits(2)))
#print(hex(bs.getbits(4)))
#print(hex(bs.getbits(2)))
print("current src pos: %d, current dst pos: %d" % (bs.inPos, dstPos))
# End of Max's unchanged code. Here is a simple wrapper...
class WrongFormatError(ValueError):
pass
def unpack(src):
try:
magic, hdrLen, vers, iscmp, unpackSize, dcmp = struct.unpack_from(">LHBBLH", src)
except struct.error:
raise WrongFormatError
if magic != 0xA89F6572 or hdrLen != 18 or vers != 9 or iscmp != 1 or dcmp != 3:
raise WrongFormatError
dst = bytearray()
InstaCompDecompress(src, dst, unpackSize, 14)
return dst

View File

@ -18,5 +18,5 @@ setup(
'Development Status :: 3 - Alpha',
],
packages=['macresources'],
scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits'],
scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits', 'bin/instacomp'],
)