Basic InstaCompOne support (thanks to Max Poliakovski)

2024-06-04 15:29:38 +00:00 · 2020-06-23 13:47:38 +08:00 · 2020-06-23 13:47:38 +08:00 · 1a16ebc3d4
commit 1a16ebc3d4
parent 60f289efb8
3 changed files with 365 additions and 1 deletions
--- a/bin/instacomp
+++ b/bin/instacomp
@ -0,0 +1,71 @@
 #!/usr/bin/env python3
 # Copyright (c) 2019-2020 Elliot Nunn
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 HELP = '''InstaComp: unpack resources in the Macintosh System file (7.5)
 Algorithm from MacOS and the Installer SDK (Apple)
 Decompression reimplemented by Maxim Poliakovski
 Use the 'rfx' wrapper command to access resources inside a file'''
 if __name__ == '__main__':
    import struct
    import sys
    import argparse
    parser = argparse.ArgumentParser(
        description=HELP, formatter_class=argparse.RawTextHelpFormatter,
    )
    parser.prog = '[rfx] ' + parser.prog
    parser.add_argument('path', nargs='+', metavar='file', action='store', help='Resource data')
    parser.add_argument('-x', dest='do_compress', action='store_false', required=True, help='extract (currently mandatory)')
    args = parser.parse_args()
    for el in args.path:
        from macresources.instacomp import unpack, WrongFormatError
        with open(el, 'r+b') as f:
            already_compressed = (f.read(4) == b'\xA8\x9Fer')
            if not already_compressed: continue
            f.seek(0)
            data = f.read()
            print(el) # elliot delete!
            try:
                try:
                    data = unpack(data)
                except WrongFormatError:
                    continue
                f.seek(0)
                f.write(data)
                f.truncate()
            except Exception as e:
                print('failed', e)
                # print(el)
                # raise
--- a/macresources/instacomp.py
+++ b/macresources/instacomp.py
@ -0,0 +1,293 @@
 # Copyright (c) 2018-2020 Maxim Poliakovski, Elliot Nunn
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 '''
    InstaCompOne is an older compression scheme used in the installer SDK
    of the classic Mac OS. Like the famous Deflate algorithm, InstaCompOne
    combines LZ77 with Huffman coding but uses a different bitstream format.
    Author: Max Poliakovski 2018
 '''
 import struct
 from math import ceil, log2
 LIT_MAX_LEN = 63 # maximal length of the literal block
 ''' Huffman codes for copy length.
 '''
 lenHuffTab = {
    0b00 : 0,
    0b01 : 1,
    0b100 : 2,
    0b1010 : 3,
    0b1011 : 4,
    0b11000 : 5,
    0b11001 : 6,
    0b110100 : 7,
    0b110101 : 8,
    0b110110 : 9,
    0b110111 : 10,
    # for large trees, use compact Huffman table representation in the form
    # prefix_bits : (num_of_val_bits, offset)
    0b1110 : (3, 11),
    0b11110 : (3, 19),
    0b111110 : (5, 27),
    0b1111110 : (6, 59),
    0b11111110 : (7, 123),
    0b111111110 : (8, 251),
    0b1111111110 : (9, 507),
    0b11111111110 : (10, 1019)
 }
 ''' Huffman codes for literal length.
 '''
 litHuffTab = {
    0b0 : 1,
    0b100 : 2,
    0b101 : 3,
    0b11000 : 4,
    0b11001 : 5,
    0b11010 : 6,
    0b11011 : 7,
    0b1110000 : 8,
    0b1110001 : 9,
    0b1110010 : 10,
    0b1110011 : 11,
    0b1110100 : 12,
    0b1110101 : 13,
    0b1110110 : 14,
    0b1110111 : 15,
    0b11110 : (4, 16),
    0b11111 : (5, 32)
 }
 # TODO: can that be done more quickly?
 next_pow2 = lambda x: 1 if x < 2 else int(ceil(log2(x)))
 class BitStreamReader():
    ''' Convenient methods for bitwise access to the input data.
    '''
    def __init__(self, input, size, pos):
        self.inBuf  = input
        self.inSize = size
        self.inPos  = pos
        self.bPool  = 0
        self.bitsInPool = 0
    def showbits(self, nb):
        ''' Return nb bits from the bitstream without advancing the bit position
        '''
        while nb > self.bitsInPool:
            self.bPool = (self.bPool << 8) | self.inBuf[self.inPos]
            self.inPos += 1
            self.bitsInPool += 8
        return (self.bPool >> (self.bitsInPool - nb)) & (0xFFFFFFFF >> (32 - nb))
    def flushbits(self, nb):
        ''' Advance bit position by nb
        '''
        if nb <= self.bitsInPool:
            self.bitsInPool -= nb
        else:
            self.bitsInPool = 0
    def getbits(self, nb):
        ''' Same as showbits with advancing the bit position
        '''
        res = self.showbits(nb)
        self.flushbits(nb)
        return res
    def decodehuff(self, tab, minlen, maxlen):
        ''' Decode Huffman code from bitstream
        '''
        for w in range(minlen, maxlen+1):
            cw = self.showbits(w)
            if cw in tab:
                val = tab[cw]
                if isinstance(val, tuple): # compact format used?
                    self.flushbits(w) # flush prefix bits
                    nbits, start = val
                    val = self.getbits(nbits) + start
                    return val
                self.flushbits(w)
                return val
        raise ValueError('Error decoding Huffman length')
 def DecodeDistance(bs, mag):
    ''' Decode backward distance for reference copying. Because this values
        can be large, the magnitude derived from the output position will be
        used to switch between variable-length codes.
        Large values will be further divided into sub-ranges; for each sub-range,
        an additional bit indicating used/skipped sub-range, will be coded.
        Below an example of decoding the bit string 10.0000111 and magnitude of 675:
            1 -> skip sub-range 1...32
            0 -> use sub-range 33...161
                 getbits(7) -> 7 + 33 = 40
    '''
    if mag <= 10:
        raise ValueError('Anon9 unimplemented')
    elif mag <= 20:
        raise ValueError('Anon10 unimplemented')
    elif mag <= 40:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(4) + 5
        raise ValueError('Unimplemented Anon11 distance encoding')
    elif mag <= 80:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(5) + 9
        raise ValueError('Unimplemented Anon12 distance encoding')
    elif mag <= 160:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(6) + 17
        raise ValueError('Unimplemented Anon13 distance encoding')
    elif mag <= 672: # 161...672
        if bs.getbits(1):
            if bs.getbits(1) == 0: # 33...160
                return bs.getbits(7) + 33
            else:
                return bs.getbits(next_pow2(mag - 160)) + 161
        else: # 1...32
            return bs.getbits(5) + 1
    elif mag <= 1000:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(8) + 65
            else:
                return bs.getbits(next_pow2(mag - 320)) + 321
        else:
            return bs.getbits(6) + 1
    elif mag <= 2688:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(9) + 129
            else:
                return bs.getbits(next_pow2(mag - 640)) + 641
        else:
            return bs.getbits(7) + 1
    elif mag <= 5376:
        if bs.getbits(1):
            if bs.getbits(1) == 0:
                return bs.getbits(10) + 257
            else:
                return bs.getbits(next_pow2(mag - 1280)) + 1281
        else:
            return bs.getbits(8) + 1
    elif mag <= 10752:
        if bs.getbits(1):
            if bs.getbits(1):
                return bs.getbits(next_pow2(mag - 2560)) + 2561
            else:
                return bs.getbits(11) + 513
        else:
            return bs.getbits(9) + 1
    raise ValueError('Unimplemented distance encoding, current dst mag: %d' % mag)
 def InstaCompDecompress(src, dst, unpackSize, pos=0):
    # skip unused algo specific fields
    word, word2 = struct.unpack_from(">HH", src, pos)
    pos += 4
    bs = BitStreamReader(src, len(src), pos)
    dstPos = 0
    mode = 1 # 1 - literal decoding, 0 - reference copying
    while dstPos < unpackSize:
        copyCount = bs.decodehuff(lenHuffTab, 2, 11)
        if copyCount > 0 or mode == 0:
            copyCount += 2
            if mode == 0:
                copyCount += 1
            distance = DecodeDistance(bs, dstPos)
            refPos = dstPos - distance
            #print("Distance: %d, ref pos: %d" % (distance, refPos))
            for i in range(copyCount):
                dst.append(dst[refPos+i])
            dstPos += copyCount
            mode = 1
        else:
            litLen = bs.decodehuff(litHuffTab, 1, 7)
            for i in range(litLen):
                dst.append(bs.getbits(8))
            dstPos += litLen
            mode = 0 if litLen < LIT_MAX_LEN else 1
    #print(hex(bs.getbits(3)))
    #print(hex(bs.getbits(1)))
    #print(hex(bs.getbits(7)))
    #print(hex(bs.getbits(3)))
    #print(hex(bs.getbits(1)))
    #print(hex(bs.getbits(2)))
    #print(hex(bs.getbits(4)))
    #print(hex(bs.getbits(2)))
    print("current src pos: %d, current dst pos: %d" % (bs.inPos, dstPos))
 # End of Max's unchanged code. Here is a simple wrapper...
 class WrongFormatError(ValueError):
    pass
 def unpack(src):
    try:
        magic, hdrLen, vers, iscmp, unpackSize, dcmp = struct.unpack_from(">LHBBLH", src)
    except struct.error:
        raise WrongFormatError
    if magic != 0xA89F6572 or hdrLen != 18 or vers != 9 or iscmp != 1 or dcmp != 3:
        raise WrongFormatError
    dst = bytearray()
    InstaCompDecompress(src, dst, unpackSize, 14)
    return dst
--- a/setup.py
+++ b/setup.py
@ -18,5 +18,5 @@ setup(
        'Development Status :: 3 - Alpha',
    ],
    packages=['macresources'],
-    scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits'],
+    scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits', 'bin/instacomp'],
 )