From 1a16ebc3d4b5f42838c2aa7a5bb1bc10031035cf Mon Sep 17 00:00:00 2001 From: Elliot Nunn Date: Tue, 23 Jun 2020 13:47:38 +0800 Subject: [PATCH] Basic InstaCompOne support (thanks to Max Poliakovski) --- bin/instacomp | 71 +++++++++ macresources/instacomp.py | 293 ++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 3 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 bin/instacomp create mode 100644 macresources/instacomp.py diff --git a/bin/instacomp b/bin/instacomp new file mode 100644 index 0000000..fa15e38 --- /dev/null +++ b/bin/instacomp @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2019-2020 Elliot Nunn + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +HELP = '''InstaComp: unpack resources in the Macintosh System file (7.5) + +Algorithm from MacOS and the Installer SDK (Apple) +Decompression reimplemented by Maxim Poliakovski + +Use the 'rfx' wrapper command to access resources inside a file''' + +if __name__ == '__main__': + import struct + import sys + + import argparse + + parser = argparse.ArgumentParser( + description=HELP, formatter_class=argparse.RawTextHelpFormatter, + ) + parser.prog = '[rfx] ' + parser.prog + + parser.add_argument('path', nargs='+', metavar='file', action='store', help='Resource data') + parser.add_argument('-x', dest='do_compress', action='store_false', required=True, help='extract (currently mandatory)') + + args = parser.parse_args() + + for el in args.path: + from macresources.instacomp import unpack, WrongFormatError + + with open(el, 'r+b') as f: + already_compressed = (f.read(4) == b'\xA8\x9Fer') + if not already_compressed: continue + + f.seek(0) + data = f.read() + + print(el) # elliot delete! + + try: + try: + data = unpack(data) + except WrongFormatError: + continue + + f.seek(0) + f.write(data) + f.truncate() + except Exception as e: + print('failed', e) + # print(el) + # raise diff --git a/macresources/instacomp.py b/macresources/instacomp.py new file mode 100644 index 0000000..b10ac2b --- /dev/null +++ b/macresources/instacomp.py @@ -0,0 +1,293 @@ +# Copyright (c) 2018-2020 Maxim Poliakovski, Elliot Nunn + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +''' + InstaCompOne is an older compression scheme used in the installer SDK + of the classic Mac OS. Like the famous Deflate algorithm, InstaCompOne + combines LZ77 with Huffman coding but uses a different bitstream format. + + Author: Max Poliakovski 2018 +''' + +import struct +from math import ceil, log2 + + +LIT_MAX_LEN = 63 # maximal length of the literal block + + +''' Huffman codes for copy length. +''' +lenHuffTab = { + 0b00 : 0, + 0b01 : 1, + 0b100 : 2, + 0b1010 : 3, + 0b1011 : 4, + 0b11000 : 5, + 0b11001 : 6, + 0b110100 : 7, + 0b110101 : 8, + 0b110110 : 9, + 0b110111 : 10, + + # for large trees, use compact Huffman table representation in the form + # prefix_bits : (num_of_val_bits, offset) + 0b1110 : (3, 11), + 0b11110 : (3, 19), + 0b111110 : (5, 27), + 0b1111110 : (6, 59), + 0b11111110 : (7, 123), + 0b111111110 : (8, 251), + 0b1111111110 : (9, 507), + 0b11111111110 : (10, 1019) +} + +''' Huffman codes for literal length. +''' +litHuffTab = { + 0b0 : 1, + 0b100 : 2, + 0b101 : 3, + 0b11000 : 4, + 0b11001 : 5, + 0b11010 : 6, + 0b11011 : 7, + 0b1110000 : 8, + 0b1110001 : 9, + 0b1110010 : 10, + 0b1110011 : 11, + 0b1110100 : 12, + 0b1110101 : 13, + 0b1110110 : 14, + 0b1110111 : 15, + + 0b11110 : (4, 16), + 0b11111 : (5, 32) +} + +# TODO: can that be done more quickly? +next_pow2 = lambda x: 1 if x < 2 else int(ceil(log2(x))) + + +class BitStreamReader(): + ''' Convenient methods for bitwise access to the input data. + ''' + def __init__(self, input, size, pos): + self.inBuf = input + self.inSize = size + self.inPos = pos + self.bPool = 0 + self.bitsInPool = 0 + + def showbits(self, nb): + ''' Return nb bits from the bitstream without advancing the bit position + ''' + while nb > self.bitsInPool: + self.bPool = (self.bPool << 8) | self.inBuf[self.inPos] + self.inPos += 1 + self.bitsInPool += 8 + + return (self.bPool >> (self.bitsInPool - nb)) & (0xFFFFFFFF >> (32 - nb)) + + def flushbits(self, nb): + ''' Advance bit position by nb + ''' + if nb <= self.bitsInPool: + self.bitsInPool -= nb + else: + self.bitsInPool = 0 + + def getbits(self, nb): + ''' Same as showbits with advancing the bit position + ''' + res = self.showbits(nb) + self.flushbits(nb) + return res + + def decodehuff(self, tab, minlen, maxlen): + ''' Decode Huffman code from bitstream + ''' + for w in range(minlen, maxlen+1): + cw = self.showbits(w) + if cw in tab: + val = tab[cw] + if isinstance(val, tuple): # compact format used? + self.flushbits(w) # flush prefix bits + nbits, start = val + val = self.getbits(nbits) + start + return val + self.flushbits(w) + return val + + raise ValueError('Error decoding Huffman length') + + +def DecodeDistance(bs, mag): + ''' Decode backward distance for reference copying. Because this values + can be large, the magnitude derived from the output position will be + used to switch between variable-length codes. + Large values will be further divided into sub-ranges; for each sub-range, + an additional bit indicating used/skipped sub-range, will be coded. + + Below an example of decoding the bit string 10.0000111 and magnitude of 675: + 1 -> skip sub-range 1...32 + 0 -> use sub-range 33...161 + getbits(7) -> 7 + 33 = 40 + ''' + if mag <= 10: + raise ValueError('Anon9 unimplemented') + + elif mag <= 20: + raise ValueError('Anon10 unimplemented') + + elif mag <= 40: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(4) + 5 + raise ValueError('Unimplemented Anon11 distance encoding') + + elif mag <= 80: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(5) + 9 + raise ValueError('Unimplemented Anon12 distance encoding') + + elif mag <= 160: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(6) + 17 + raise ValueError('Unimplemented Anon13 distance encoding') + + elif mag <= 672: # 161...672 + if bs.getbits(1): + if bs.getbits(1) == 0: # 33...160 + return bs.getbits(7) + 33 + else: + return bs.getbits(next_pow2(mag - 160)) + 161 + else: # 1...32 + return bs.getbits(5) + 1 + + elif mag <= 1000: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(8) + 65 + else: + return bs.getbits(next_pow2(mag - 320)) + 321 + else: + return bs.getbits(6) + 1 + + elif mag <= 2688: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(9) + 129 + else: + return bs.getbits(next_pow2(mag - 640)) + 641 + else: + return bs.getbits(7) + 1 + + elif mag <= 5376: + if bs.getbits(1): + if bs.getbits(1) == 0: + return bs.getbits(10) + 257 + else: + return bs.getbits(next_pow2(mag - 1280)) + 1281 + else: + return bs.getbits(8) + 1 + + elif mag <= 10752: + if bs.getbits(1): + if bs.getbits(1): + return bs.getbits(next_pow2(mag - 2560)) + 2561 + else: + return bs.getbits(11) + 513 + else: + return bs.getbits(9) + 1 + + raise ValueError('Unimplemented distance encoding, current dst mag: %d' % mag) + + +def InstaCompDecompress(src, dst, unpackSize, pos=0): + # skip unused algo specific fields + word, word2 = struct.unpack_from(">HH", src, pos) + pos += 4 + + bs = BitStreamReader(src, len(src), pos) + + dstPos = 0 + mode = 1 # 1 - literal decoding, 0 - reference copying + + while dstPos < unpackSize: + copyCount = bs.decodehuff(lenHuffTab, 2, 11) + if copyCount > 0 or mode == 0: + copyCount += 2 + if mode == 0: + copyCount += 1 + + distance = DecodeDistance(bs, dstPos) + refPos = dstPos - distance + #print("Distance: %d, ref pos: %d" % (distance, refPos)) + + for i in range(copyCount): + dst.append(dst[refPos+i]) + + dstPos += copyCount + mode = 1 + + else: + litLen = bs.decodehuff(litHuffTab, 1, 7) + + for i in range(litLen): + dst.append(bs.getbits(8)) + + dstPos += litLen + mode = 0 if litLen < LIT_MAX_LEN else 1 + + #print(hex(bs.getbits(3))) + #print(hex(bs.getbits(1))) + #print(hex(bs.getbits(7))) + #print(hex(bs.getbits(3))) + + #print(hex(bs.getbits(1))) + #print(hex(bs.getbits(2))) + #print(hex(bs.getbits(4))) + #print(hex(bs.getbits(2))) + + print("current src pos: %d, current dst pos: %d" % (bs.inPos, dstPos)) + + +# End of Max's unchanged code. Here is a simple wrapper... +class WrongFormatError(ValueError): + pass + + +def unpack(src): + try: + magic, hdrLen, vers, iscmp, unpackSize, dcmp = struct.unpack_from(">LHBBLH", src) + except struct.error: + raise WrongFormatError + + if magic != 0xA89F6572 or hdrLen != 18 or vers != 9 or iscmp != 1 or dcmp != 3: + raise WrongFormatError + + dst = bytearray() + InstaCompDecompress(src, dst, unpackSize, 14) + return dst diff --git a/setup.py b/setup.py index 5d5491d..53cdd5b 100644 --- a/setup.py +++ b/setup.py @@ -18,5 +18,5 @@ setup( 'Development Status :: 3 - Alpha', ], packages=['macresources'], - scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits'], + scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits', 'bin/instacomp'], )