ToolboxToolbox/fsnek

#!/usr/bin/env python3

import sys
from os import path
import hashlib
import textwrap


# Thanks user25148: https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
def generate_file_md5(filename, blocksize=2**20):
    m = hashlib.md5()
    with open(filename, 'rb') as f:
        while True:
            buf = f.read(blocksize)
            if not buf:
                break
            m.update(buf)
    return m.hexdigest()


def split_args_by_delim(xx):
    retval = [[]]
    for el in xx:
        if el in ('', '--'):
            retval.append([])
        else:
            retval[-1].append(el)
    return retval


def group_to_dict(group):
    retval = {}
    for filename in group:
        key = path.basename(filename)
        if key in retval:
            raise ValueError('duplicate filename in same group: %r' % filename)
        value = generate_file_md5(filename)
        retval[key] = value
    return retval


def indices(container, searchfor):
    retval = []
    for i, el in enumerate(container):
        if el == searchfor:
            retval.append(i)
    return retval


def where_do_snakes_go(hash_list):
    hash_do_list = list(set(hash_list))
    hash_do_list.sort(key=hash_list.index)

    hash_y_dict = {}
    occupied_points = set()

    # Next, try to cram everything else onto other lines...
    for the_hash in hash_do_list:
        if the_hash is None or hash_list.count(the_hash) == 1:
            hash_y_dict[the_hash] = 0

        else:
            first_x = hash_list.index(the_hash)
            last_x = len(hash_list) - list(reversed(hash_list)).index(the_hash) - 1

            y = 1
            while any((x, y) in occupied_points for x in range(first_x, last_x + 1)):
                y += 1

            hash_y_dict[the_hash] = y

            for x in range(first_x, last_x + 1):
                occupied_points.add((x, y))

    return hash_y_dict


def one_snake(hash_list, col_width):
    col_x = [i * col_width + (col_width + 1) // 2 for i in range(len(hash_list))]

    locs = where_do_snakes_go(hash_list)

    num_lines = max(locs.values())

    lines = []
    for this_hash, hash_y in locs.items():
        if this_hash is None: continue

        while len(lines) <= hash_y:
            lines.append([' '] * (col_width * len(hash_list)))

        which = indices(hash_list, this_hash)
        which_x = [col_x[col] for col in which]

        for x in range(col_x[which[0]], col_x[which[-1]] + 1):
            if x == col_x[which[0]]:
                the_char = '\u2514' # '\'
            elif x == col_x[which[-1]]:
                the_char = '\u2518' # '/'
            elif x in which_x:
                the_char = '\u2534' # inv 'T'
            else:
                the_char = '\u2500' # '-'

            lines[hash_y][x] = the_char

        for ind in which:
            lines[0][col_x[ind]] = '\u25CF' # '*'

        for ind in which:
            x = col_x[ind]
            for y in range(1, hash_y):
                lines[y][x] = '\u2502' # '|'

    return '\n'.join(''.join(l).rstrip() for l in lines).rstrip('\n')


# uniques_by_name is a list of (name, [hash, ...])
def snake_table(uniques_by_name, headings=None):
    if headings:
        col_width = max(len(h) for h in headings) + 1
        col_width = max(col_width, 5)
        while col_width % 2 == 0: col_width += 1
    else:
        col_width = 5

    gutter_width = max(len(n) for n, uu in uniques_by_name)

    lines = []

    for i, (name, uniques) in enumerate(uniques_by_name):
        if lines: lines.append('')

        if i % 20 == 0 and headings:
            lines.append((' ' * gutter_width) + ' ' + ''.join(h.center(col_width) for h in headings).rstrip())
            lines.append('')

        gutter = name
        for line in one_snake(uniques, col_width).split('\n'):
            lines.append(gutter.rjust(gutter_width) + ' ' + line)
            gutter = ''

    return '\n'.join(lines)


# Cheeky experiment
if __name__ == '__main__' and sys.argv[1:2] == ['-r']:
    import macresources

    files = sys.argv[2:]
    resfiles = [list(macresources.parse_rez_code(open(p, 'rb').read())) for p in files]
    headings = [path.basename(p) for p in files]
    headings = [h[:-6] if h.endswith('.rdump') else h for h in headings]

    every_typeid = list(set((r.type, r.id) for rf in resfiles for r in rf))
    every_typeid.sort(key=lambda rtypeid: (rtypeid[0].decode('mac_roman').upper(), rtypeid[0].decode('mac_roman'), rtypeid[1]))

    by_typeid = {typeid: [None] * len(resfiles) for typeid in every_typeid}

    for i, rf in enumerate(resfiles):
        for r in rf:
            by_typeid[(r.type, r.id)][i] = hash(bytes(r))

    uniques_by_name = []
    for rtype, rid in every_typeid:
        leftcol = rtype.decode('mac_roman') + str(rid).rjust(7)
        uniques_by_name.append((leftcol, by_typeid[(rtype, rid)]))

    print(snake_table(uniques_by_name, headings=headings))
    exit()


if __name__ == '__main__':
    group_lists = split_args_by_delim(sys.argv[1:])

    every_filename = sorted(set(path.basename(p) for g in group_lists for p in g))

    headings = [path.commonprefix([path.abspath(p) for p in group]) for group in group_lists]
    heading_strip = path.commonprefix(headings)
    headings = [h[len(heading_strip):].lstrip(path.sep) for h in headings]

    uniques_by_name = {fn: [None] * len(group_lists) for fn in every_filename}
    for i, group in enumerate(group_lists):
        for p in group:
            uniques_by_name[path.basename(p)][i] = generate_file_md5(p)

    uniques_by_name = list(uniques_by_name.items())

    print(snake_table(uniques_by_name, headings=headings))