# Copyright (c) 2018-2020 Maxim Poliakovski, Elliot Nunn # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ''' InstaCompOne is an older compression scheme used in the installer SDK of the classic Mac OS. Like the famous Deflate algorithm, InstaCompOne combines LZ77 with Huffman coding but uses a different bitstream format. Author: Max Poliakovski 2018 ''' import struct from math import ceil, log2 LIT_MAX_LEN = 63 # maximal length of the literal block ''' Huffman codes for copy length. ''' lenHuffTab = { 0b00 : 0, 0b01 : 1, 0b100 : 2, 0b1010 : 3, 0b1011 : 4, 0b11000 : 5, 0b11001 : 6, 0b110100 : 7, 0b110101 : 8, 0b110110 : 9, 0b110111 : 10, # for large trees, use compact Huffman table representation in the form # prefix_bits : (num_of_val_bits, offset) 0b1110 : (3, 11), 0b11110 : (3, 19), 0b111110 : (5, 27), 0b1111110 : (6, 59), 0b11111110 : (7, 123), 0b111111110 : (8, 251), 0b1111111110 : (9, 507), 0b11111111110 : (10, 1019) } ''' Huffman codes for literal length. ''' litHuffTab = { 0b0 : 1, 0b100 : 2, 0b101 : 3, 0b11000 : 4, 0b11001 : 5, 0b11010 : 6, 0b11011 : 7, 0b1110000 : 8, 0b1110001 : 9, 0b1110010 : 10, 0b1110011 : 11, 0b1110100 : 12, 0b1110101 : 13, 0b1110110 : 14, 0b1110111 : 15, 0b11110 : (4, 16), 0b11111 : (5, 32) } # TODO: can that be done more quickly? next_pow2 = lambda x: 1 if x < 2 else int(ceil(log2(x))) class BitStreamReader(): ''' Convenient methods for bitwise access to the input data. ''' def __init__(self, input, size, pos): self.inBuf = input self.inSize = size self.inPos = pos self.bPool = 0 self.bitsInPool = 0 def showbits(self, nb): ''' Return nb bits from the bitstream without advancing the bit position ''' while nb > self.bitsInPool: self.bPool = (self.bPool << 8) | self.inBuf[self.inPos] self.inPos += 1 self.bitsInPool += 8 return (self.bPool >> (self.bitsInPool - nb)) & (0xFFFFFFFF >> (32 - nb)) def flushbits(self, nb): ''' Advance bit position by nb ''' if nb <= self.bitsInPool: self.bitsInPool -= nb else: self.bitsInPool = 0 def getbits(self, nb): ''' Same as showbits with advancing the bit position ''' res = self.showbits(nb) self.flushbits(nb) return res def decodehuff(self, tab, minlen, maxlen): ''' Decode Huffman code from bitstream ''' for w in range(minlen, maxlen+1): cw = self.showbits(w) if cw in tab: val = tab[cw] if isinstance(val, tuple): # compact format used? self.flushbits(w) # flush prefix bits nbits, start = val val = self.getbits(nbits) + start return val self.flushbits(w) return val raise ValueError('Error decoding Huffman length') def DecodeDistance(bs, mag): ''' Decode backward distance for reference copying. Because this values can be large, the magnitude derived from the output position will be used to switch between variable-length codes. Large values will be further divided into sub-ranges; for each sub-range, an additional bit indicating used/skipped sub-range, will be coded. Below an example of decoding the bit string 10.0000111 and magnitude of 675: 1 -> skip sub-range 1...32 0 -> use sub-range 33...161 getbits(7) -> 7 + 33 = 40 ''' if mag <= 10: raise ValueError('Anon9 unimplemented') elif mag <= 20: raise ValueError('Anon10 unimplemented') elif mag <= 40: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(4) + 5 raise ValueError('Unimplemented Anon11 distance encoding') elif mag <= 80: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(5) + 9 raise ValueError('Unimplemented Anon12 distance encoding') elif mag <= 160: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(6) + 17 raise ValueError('Unimplemented Anon13 distance encoding') elif mag <= 672: # 161...672 if bs.getbits(1): if bs.getbits(1) == 0: # 33...160 return bs.getbits(7) + 33 else: return bs.getbits(next_pow2(mag - 160)) + 161 else: # 1...32 return bs.getbits(5) + 1 elif mag <= 1000: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(8) + 65 else: return bs.getbits(next_pow2(mag - 320)) + 321 else: return bs.getbits(6) + 1 elif mag <= 2688: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(9) + 129 else: return bs.getbits(next_pow2(mag - 640)) + 641 else: return bs.getbits(7) + 1 elif mag <= 5376: if bs.getbits(1): if bs.getbits(1) == 0: return bs.getbits(10) + 257 else: return bs.getbits(next_pow2(mag - 1280)) + 1281 else: return bs.getbits(8) + 1 elif mag <= 10752: if bs.getbits(1): if bs.getbits(1): return bs.getbits(next_pow2(mag - 2560)) + 2561 else: return bs.getbits(11) + 513 else: return bs.getbits(9) + 1 raise ValueError('Unimplemented distance encoding, current dst mag: %d' % mag) def InstaCompDecompress(src, dst, unpackSize, pos=0): # skip unused algo specific fields word, word2 = struct.unpack_from(">HH", src, pos) pos += 4 bs = BitStreamReader(src, len(src), pos) dstPos = 0 mode = 1 # 1 - literal decoding, 0 - reference copying while dstPos < unpackSize: copyCount = bs.decodehuff(lenHuffTab, 2, 11) if copyCount > 0 or mode == 0: copyCount += 2 if mode == 0: copyCount += 1 distance = DecodeDistance(bs, dstPos) refPos = dstPos - distance #print("Distance: %d, ref pos: %d" % (distance, refPos)) for i in range(copyCount): dst.append(dst[refPos+i]) dstPos += copyCount mode = 1 else: litLen = bs.decodehuff(litHuffTab, 1, 7) for i in range(litLen): dst.append(bs.getbits(8)) dstPos += litLen mode = 0 if litLen < LIT_MAX_LEN else 1 #print(hex(bs.getbits(3))) #print(hex(bs.getbits(1))) #print(hex(bs.getbits(7))) #print(hex(bs.getbits(3))) #print(hex(bs.getbits(1))) #print(hex(bs.getbits(2))) #print(hex(bs.getbits(4))) #print(hex(bs.getbits(2))) print("current src pos: %d, current dst pos: %d" % (bs.inPos, dstPos)) # End of Max's unchanged code. Here is a simple wrapper... class WrongFormatError(ValueError): pass def unpack(src): try: magic, hdrLen, vers, iscmp, unpackSize, dcmp = struct.unpack_from(">LHBBLH", src) except struct.error: raise WrongFormatError if magic != 0xA89F6572 or hdrLen != 18 or vers != 9 or iscmp != 1 or dcmp != 3: raise WrongFormatError dst = bytearray() InstaCompDecompress(src, dst, unpackSize, 14) return dst