diff --git a/README.md b/README.md index 7412c9a..8a55e43 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,10 @@ deprecated `Rez` and `DeRez` utilities. They convert between raw resource forks and Rez-style `.rdump` files. To access a raw resource fork under Mac OS X, you can append `/..namedfork/rsrc` to a filename. +Commands implementing Apple's [undocumented resource compression scheme](http://preserve.mactech.com/articles/mactech/Vol.09/09.01/ResCompression/index.html): + +- `greggybits` (in Python: `from greggybits import pack, unpack`) + All utilities have online help. diff --git a/bin/greggybits b/bin/greggybits new file mode 100755 index 0000000..70a5618 --- /dev/null +++ b/bin/greggybits @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +HELP = '''GreggyBits: (un)pack resources in the Macintosh System file (7.1) + +Algorithm by Greg Mariott +Reimplemented by Maxim Poliakovski and Elliot Nunn + +Use the 'rfx' wrapper command to access resources inside a file''' + +def debug_round_trip(filename, packed): + print(filename.split('/')[-1], end=' ') + + unpacked = unpack(packed) + repacked = pack(unpacked) + + if packed == repacked: + print('good', end=' ') + elif packed[17] != repacked[17]: + print('WRONGMODE', repacked[17], 'not', packed[17], end=' ') + if pack_with_flags(unpacked, packed[17]) == packed: + print('(correct when mode is forced)', end=' ') + elif packed[:14] == repacked[:14] and packed[16:] == repacked[16:]: + print('sloppy', hex(struct.unpack_from('>H', repacked, 14)[0]), 'not', hex(struct.unpack_from('>H', packed, 14)[0]), end=' ') + else: + print('other', end=' ') + + print() + +if __name__ == '__main__': + # Some cheeky debug code for testing... rfx greggybits --debug System// + from macresources.greggybits import pack, pack_with_flags, unpack + import struct + import sys + + import argparse + + parser = argparse.ArgumentParser( + description=HELP, formatter_class=argparse.RawTextHelpFormatter, + ) + parser.prog = '[rfx] ' + parser.prog + + parser.add_argument('path', nargs='+', metavar='file', action='store', help='Resource data') + parser.add_argument('-x', dest='do_compress', action='store_false', help='extract (default: compress)') + parser.add_argument('--debug', action='store_true', help='attempt to round-trip resources') + + args = parser.parse_args() + + for el in args.path: + from macresources.greggybits import pack, unpack + + with open(el, 'r+b') as f: + already_compressed = (f.read(4) == b'\xA8\x9Fer') + if already_compressed == args.do_compress: continue + + f.seek(0) + data = f.read() + + try: + if args.do_compress: + data = pack(data) + if args.debug: debug_round_trip(el, data) + else: + if args.debug: debug_round_trip(el, data) + data = unpack(data) + + f.seek(0) + f.write(data) + f.truncate() + except ValueError: + raise + print('some kind of error') diff --git a/macresources/greggybits.py b/macresources/greggybits.py new file mode 100644 index 0000000..4a052d2 --- /dev/null +++ b/macresources/greggybits.py @@ -0,0 +1,188 @@ +import struct + +# predefined lookup table of the most frequent words +TABLE = ( + 0x0000, 0x0008, 0x4EBA, 0x206E, 0x4E75, 0x000C, 0x0004, 0x7000, + 0x0010, 0x0002, 0x486E, 0xFFFC, 0x6000, 0x0001, 0x48E7, 0x2F2E, + 0x4E56, 0x0006, 0x4E5E, 0x2F00, 0x6100, 0xFFF8, 0x2F0B, 0xFFFF, + 0x0014, 0x000A, 0x0018, 0x205F, 0x000E, 0x2050, 0x3F3C, 0xFFF4, + 0x4CEE, 0x302E, 0x6700, 0x4CDF, 0x266E, 0x0012, 0x001C, 0x4267, + 0xFFF0, 0x303C, 0x2F0C, 0x0003, 0x4ED0, 0x0020, 0x7001, 0x0016, + 0x2D40, 0x48C0, 0x2078, 0x7200, 0x588F, 0x6600, 0x4FEF, 0x42A7, + 0x6706, 0xFFFA, 0x558F, 0x286E, 0x3F00, 0xFFFE, 0x2F3C, 0x6704, + 0x598F, 0x206B, 0x0024, 0x201F, 0x41FA, 0x81E1, 0x6604, 0x6708, + 0x001A, 0x4EB9, 0x508F, 0x202E, 0x0007, 0x4EB0, 0xFFF2, 0x3D40, + 0x001E, 0x2068, 0x6606, 0xFFF6, 0x4EF9, 0x0800, 0x0C40, 0x3D7C, + 0xFFEC, 0x0005, 0x203C, 0xFFE8, 0xDEFC, 0x4A2E, 0x0030, 0x0028, + 0x2F08, 0x200B, 0x6002, 0x426E, 0x2D48, 0x2053, 0x2040, 0x1800, + 0x6004, 0x41EE, 0x2F28, 0x2F01, 0x670A, 0x4840, 0x2007, 0x6608, + 0x0118, 0x2F07, 0x3028, 0x3F2E, 0x302B, 0x226E, 0x2F2B, 0x002C, + 0x670C, 0x225F, 0x6006, 0x00FF, 0x3007, 0xFFEE, 0x5340, 0x0040, + 0xFFE4, 0x4A40, 0x660A, 0x000F, 0x4EAD, 0x70FF, 0x22D8, 0x486B, + 0x0022, 0x204B, 0x670E, 0x4AAE, 0x4E90, 0xFFE0, 0xFFC0, 0x002A, + 0x2740, 0x6702, 0x51C8, 0x02B6, 0x487A, 0x2278, 0xB06E, 0xFFE6, + 0x0009, 0x322E, 0x3E00, 0x4841, 0xFFEA, 0x43EE, 0x4E71, 0x7400, + 0x2F2C, 0x206C, 0x003C, 0x0026, 0x0050, 0x1880, 0x301F, 0x2200, + 0x660C, 0xFFDA, 0x0038, 0x6602, 0x302C, 0x200C, 0x2D6E, 0x4240, + 0xFFE2, 0xA9F0, 0xFF00, 0x377C, 0xE580, 0xFFDC, 0x4868, 0x594F, + 0x0034, 0x3E1F, 0x6008, 0x2F06, 0xFFDE, 0x600A, 0x7002, 0x0032, + 0xFFCC, 0x0080, 0x2251, 0x101F, 0x317C, 0xA029, 0xFFD8, 0x5240, + 0x0100, 0x6710, 0xA023, 0xFFCE, 0xFFD4, 0x2006, 0x4878, 0x002E, + 0x504F, 0x43FA, 0x6712, 0x7600, 0x41E8, 0x4A6E, 0x20D9, 0x005A, + 0x7FFF, 0x51CA, 0x005C, 0x2E00, 0x0240, 0x48C7, 0x6714, 0x0C80, + 0x2E9F, 0xFFD6, 0x8000, 0x1000, 0x4842, 0x4A6B, 0xFFD2, 0x0048, + 0x4A47, 0x4ED1, 0x206F, 0x0041, 0x600C, 0x2A78, 0x422E, 0x3200, + 0x6574, 0x6716, 0x0044, 0x486D, 0x2008, 0x486C, 0x0B7C, 0x2640, + 0x0400, 0x0068, 0x206D, 0x000D, 0x2A40, 0x000B, 0x003E, 0x0220 +) + +TABLE_DICT = {word: idx for (idx, word) in enumerate(TABLE)} + + +def unpack(src, _calculate_slop=False): + dst = bytearray() + + pos = 0 + magic, hdrLen, vers, iscmp, unpackSize, _dcmp, _slop, tabSize, comprFlags = struct.unpack_from(">LHBBLHHBB", src, pos) + pos += 18 + + hasDynamicTab = comprFlags & 1 + isBitmapped = comprFlags & 2 + + if hasDynamicTab: + nEntries = tabSize + 1 + dynamicLUT = struct.unpack_from(">" + str(nEntries) + "H", src, pos) + pos += nEntries * 2 + # dump dynamic LUT + if 0: + for idx, elem in enumerate(dynamicLUT): + if idx and not idx & 3: + print(",") + else: + print(", ", end="") + print("0x%04X" % elem, end="") + print("") + + LUT = dynamicLUT if hasDynamicTab else TABLE + nWords = unpackSize >> 1 + hasExtraByte = unpackSize & 1 + + LUT = [word.to_bytes(2, 'big') for word in LUT] + + slop = 0 + slop = max(slop, len(dst) + (len(src) - pos) - unpackSize) + + if isBitmapped: + evenUnpackLen = unpackSize - (unpackSize % 2) + while len(dst) < evenUnpackLen: + if not len(dst) & 0xF: + mask = src[pos]; pos += 1 + + if mask & 0x80: + dst.extend(LUT[src[pos]]); pos += 1 + else: + dst.append(src[pos]); pos += 1 + dst.append(src[pos]); pos += 1 + + mask <<= 1 + + slop = max(slop, len(dst) + (len(src) - pos) - unpackSize) + else: + while len(dst) < unpackSize & ~1: + dst.extend(LUT[src[pos]]); pos += 1 + + slop = max(slop, len(dst) + (len(src) - pos) - unpackSize) + + if hasExtraByte: # have a got an extra byte at the end? + dst.append(src[pos]) # copy it over + pos += 1 + + if _calculate_slop: return slop + + return(dst) + + +def pack_with_flags(src, flags, _defer_slop=False): + nWords = len(src) >> 1 + inWords = struct.unpack(">" + str(nWords) + "H", src[:nWords*2]) + + dst = bytearray(b'\xA8\x9Fer') + dst.extend(struct.pack('>HBBLH', 0x12, 9, 1, len(src), 2)) # magic, hdrlen, 9=gregg, 1=compressed, size, 2=dcmp + dst.extend(bytes(4)) # to fill in later + dst[17] = flags + + # Create a custom lookup table instead of the one at the beginning of the file + LUT = TABLE_DICT + if flags & 1: + import collections + wordsCounts = collections.Counter(inWords) + customTab = sorted(wordsCounts, reverse=True, key=lambda word: (wordsCounts[word], word)) + + if len(customTab) > 256: + # The table has 8-bit indices. The Apple compressor short circuits inappropriately in this case. + del customTab[256:] + elif flags & 2: + # If we are able to encode words not in the table, then save space + # by removing remove rarely used entries from the table + while customTab and wordsCounts[customTab[-1]] <= 2: + del customTab[-1] + + # Put the table after the header + dst[16] = len(customTab) - 1 + dst.extend(struct.pack('>' + str(len(customTab)) + 'H', *customTab)) + + # Use our custom function for looking up words + LUT = {word: idx for (idx, word) in enumerate(customTab)} + + if flags & 2: + # Use bitmaps to distinguish between verbatim copies and table lookupss + LUT = LUT.get + for pos, word in enumerate(inWords): + if not (pos & 7): + mask = len(dst) + dst.append(0) + + result = LUT(word, None) + if result is None: + dst.append(word >> 8) + dst.append(word & 0xFF) + else: + dst.append(result) + dst[mask] |= 0x80 >> (pos & 7) + + else: + # Table lookups only + dst.extend(LUT[word] for word in inWords) # triggers https://bugs.python.org/issue37417 + + if len(src) & 1: # copy over last byte in the case of odd length + dst.append(src[-1]) + + if not _defer_slop: + slop = unpack(dst, _calculate_slop=True) + struct.pack_into('>H', dst, 14, slop) + + return dst + + +def pack(src): + if len(src) < 18: return src + + bestCompress = src + bestSize = len(src) + for flags in (0, 1, 2, 3): + try: + thisCompress = pack_with_flags(src, flags, _defer_slop=True) + except: + continue + + thisSize = len(thisCompress) + # thisSize += struct.unpack_from('>H', thisCompress, 14)[0] + if thisSize < bestSize: + bestCompress = thisCompress + bestSize = thisSize + + if bestCompress is not src: + slop = unpack(bestCompress, _calculate_slop=True) + struct.pack_into('>H', bestCompress, 14, slop) + + return bestCompress diff --git a/setup.py b/setup.py index 8de1b86..df557dd 100644 --- a/setup.py +++ b/setup.py @@ -18,5 +18,5 @@ setup( 'Development Status :: 3 - Alpha', ], packages=['macresources'], - scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx'], + scripts=['bin/SimpleRez', 'bin/SimpleDeRez', 'bin/hexrez', 'bin/rezhex', 'bin/sortrez', 'bin/rfx', 'bin/greggybits'], )