Merge pull request #1 from elliotnunn/master

Wrap a neat API around it
2024-06-07 21:29:27 +00:00 · 2018-12-13 14:25:06 +01:00 · 2018-12-13 14:25:06 +01:00 · 5449528a2d
commit 5449528a2d
parent 2f94e88cec c752c70c6a
3 changed files with 88 additions and 64 deletions
--- a/GreggBits.py
+++ b/GreggBits.py
@ -85,16 +85,19 @@ def DecodeMaskedWords(src, dst, pos, n, tab, mask):
    return pos


-def GreggDecompress(src, dst, unpackSize, tabSize, comprFlags):
+def GreggDecompress(src, dst, unpackSize, pos=0):
    '''Decompress resource data from src to dst.

       Parameters:
       src          source buffer containing compressed data
       dst          destination buffer, must be bytearray to work properly
       unpackSize   size in bytes of the unpacked resource data
-       tabSize      size of the embedded lookup table
-       comprFlags   compression flags from the extended resource header
+       pos          offset to my Gregg-specific buffer in src
    '''
+
+    _dcmp, _slop, tabSize, comprFlags = struct.unpack_from(">HHBB", src, pos)
+    pos += 6
+
    hasDynamicTab = comprFlags & 1
    isBitmapped   = comprFlags & 2
    print("tabSize: %d" % tabSize)
@ -103,8 +106,8 @@ def GreggDecompress(src, dst, unpackSize, tabSize, comprFlags):

    if hasDynamicTab:
        nEntries = tabSize + 1
-        pos = nEntries * 2
-        dynamicLUT = struct.unpack(">" + str(nEntries) + "H", src[:pos])
+        dynamicLUT = struct.unpack_from(">" + str(nEntries) + "H", src, pos)
+        pos += nEntries * 2
        # dump dynamic LUT
        if 0:
            for idx, elem in enumerate(dynamicLUT):
@ -114,8 +117,6 @@ def GreggDecompress(src, dst, unpackSize, tabSize, comprFlags):
                    print(", ", end="")
                print("0x%04X" % elem, end="")
            print("")
-    else:
-        pos = 0

    LUT = dynamicLUT if hasDynamicTab else GreggDefLUT
    nWords = unpackSize >> 1
@ -143,16 +144,20 @@ def GreggDecompress(src, dst, unpackSize, tabSize, comprFlags):
            pos += 1

    if hasExtraByte: # have a got an extra byte at the end?
-        dst.expand(src[pos]) # copy it over
+        dst.append(src[pos]) # copy it over
        pos += 1

    #print("Last input position: %d" % pos)


-def GreggCompress(src, dst, unpackSize, customTab=False, isBitmapped=False):
-    if customTab:
+def GreggCompress(src, dst, customTab='auto', isBitmapped='auto'):
+    # future addition 
+    customTab = True # so the big code path gets tested!
+    isBitmapped = True # required for now
+
+    if customTab: # calculate, and if necessary, resolve 'auto'
        # convert input bytes into an array of words
-        nWords = unpackSize >> 1
+        nWords = len(src) >> 1
        inWords = struct.unpack(">" + str(nWords) + "H", src[:nWords*2])

        # count occurence of each word
@ -193,6 +198,14 @@ def GreggCompress(src, dst, unpackSize, customTab=False, isBitmapped=False):
                print("0x%04X" % elem, end="")
            print("")

+        # here, decide whether 'auto' customTab should be on or off!
+
+    # write out the header
+    isBitmapped = True # will need to resolve this later
+    flags = customTab * 1 + isBitmapped * 2
+    dst.extend(struct.pack(">HHBB", 2, 0, len(embeddedTab)-1, flags))
+
+    if customTab:
        # write the constructed table into output
        for word in embeddedTab:
            dst.extend(word.to_bytes(2, 'big'))
@ -207,7 +220,7 @@ def GreggCompress(src, dst, unpackSize, customTab=False, isBitmapped=False):
        if nWords & 7:
            pos = EncodeMaskedWords(inWords, dst, pos, nWords & 7, embeddedTab)

-        if unpackSize & 1: # copy over last byte in the case of odd length
+        if len(src) & 1: # copy over last byte in the case of odd length
            dst.append(src[-1])
    else:
        print("Non-bitmapped compression not yet implemented")
--- a/ResDecompress.py
+++ b/ResDecompress.py
@ -3,66 +3,64 @@
    Author: Max Poliakovski 2018
 '''
 import struct
-import sys

 from GreggBits import GreggDecompress, GreggCompress


-def DecompressResource(inf):
-    # get the extended resource header first
-    hdrFields = struct.unpack(">IHBBI", inf.read(12))
-    if hdrFields[0] != 0xA89F6572:
-        print("Invalid extended resource header sig: 0x%X" % hdrFields[0])
-    if hdrFields[1] != 18:
-        print("Suspicious extended resource header length: %d" % hdrFields[1])
-    if hdrFields[2] != 8 and hdrFields[2] != 9:
-        print("Unknown ext res header format: %d" % hdrFields[2])
-    if (hdrFields[3] & 1) == 0:
+def GetEncoding(dat):
+    sig, hdrlen, vers, attrs, biglen = struct.unpack_from(">IHBBI", dat)
+    if sig != 0xA89F6572:
+        print("Invalid extended resource header sig: 0x%X" % sig)
+        return 'UnknownCompression'
+    if vers not in (8, 9):
+        print("Unknown ext res header format: %d" % vers)
+        return 'UnknownCompression'
+    if attrs & 1 == 0:
        print("extAttributes,bit0 isn't set. Treat this res as uncompressed.")
+        return 'UnknownCompression'

-    print("Uncompressed length: %d" % hdrFields[4])
+    print("Uncompressed length: %d" % biglen)

-    if hdrFields[2] == 8:
-        DonnSpecific = struct.unpack(">BBHH", inf.read(6))
-        print("DonnDecompress isn't supported yet.")
-        exit()
+    if vers == 8:
+        print('Donn unimplemented!'); return 'UnknownCompression'
+        return 'DonnBits'
+    elif vers == 9:
+        if dat[12:14] == b'\x00\x02':
+            return 'GreggyBits'
+        else:
+            return 'UnknownCompression'
    else:
-        GreggSpecific = struct.unpack(">HHBB", inf.read(6))
-
-    fsize = inf.seek(0, 2)
-    print("Compressed size: %d" % fsize)
-    inf.seek(hdrFields[1], 0) # rewind to the start of compressed data
-
-    dstBuf = bytearray()
-    srcBuf = inf.read(fsize - hdrFields[1])
-
-    # invoke GreggyBits decompressor and pass over required header data
-    GreggDecompress(srcBuf, dstBuf, hdrFields[4], GreggSpecific[2], GreggSpecific[3])
-
-    with open("Dump", 'wb') as outstream:
-        outstream.write(dstBuf)
-
-    # re-compress
-    recompBuf = bytearray()
-
-    # re-create extended resource header
-    recompBuf.extend([0xA8, 0x9F, 0x65, 0x72, 0x00, 0x12, 0x09, 0x01])
-    recompBuf.extend(hdrFields[4].to_bytes(4, 'big'))
-    recompBuf.extend([0x00, 0x02, 0x00, 0x00])
-    recompBuf.append(GreggSpecific[2])
-    recompBuf.append(GreggSpecific[3])
-
-    GreggCompress(dstBuf, recompBuf, hdrFields[4], customTab=True, isBitmapped=True)
-
-    with open("RecompDump", 'wb') as outstream:
-        outstream.write(recompBuf)
+        return 'UnknownCompression'


-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        file = "Compressed"
-    else:
-        file = sys.argv[1]
+def DecompressResource(dat):
+    encoding = GetEncoding(dat)
+    sig, hdrlen, vers, attrs, biglen = struct.unpack_from(">IHBBI", dat)

-    with open(file, 'rb') as instream:
-        DecompressResource(instream)
+    if encoding == 'DonnBits':
+        raise NotImplementedError('DonnBits')
+
+    elif encoding == 'GreggyBits':
+        dst = bytearray()
+        GreggDecompress(dat, dst, unpackSize=biglen, pos=12)
+        return bytes(dst)
+
+    elif encoding == 'UnknownCompression':
+        return dat # passthru
+
+
+def CompressResource(dat, encoding):
+    if encoding == 'UnknownCompression':
+        return dat
+
+    elif encoding == 'GreggyBits':
+        dst = bytearray()
+
+        # re-create extended resource header
+        dst.extend([0xA8, 0x9F, 0x65, 0x72, 0x00, 0x12, 0x09, 0x01])
+        dst.extend(len(dat).to_bytes(4, 'big'))
+
+        # leave Gregg-specific header to the compressor
+        GreggCompress(dat, dst)
+
+        return bytes(dst)
--- a/test_everything.py
+++ b/test_everything.py
@ -0,0 +1,13 @@
+from ResDecompress import GetEncoding, DecompressResource, CompressResource
+
+def compress_then_extract(dat, encoding):
+    a = CompressResource(dat, encoding);
+    if encoding != 'UnknownCompression': assert a.startswith(b'\xA8\x9F\x65\x72')
+    assert GetEncoding(a) == encoding
+    b = DecompressResource(a)
+    assert b == dat
+
+def test_all():
+    for enc in ['GreggyBits', 'UnknownCompression']:
+        compress_then_extract(bytes(100), encoding=enc)
+        compress_then_extract(b'The quick brown fox jumps over the lazy dog', encoding=enc)