ToolboxToolbox/fsnek

#!/usr/bin/env python3

import sys
from os import path
import hashlib
import textwrap


# Thanks user25148: https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
def generate_file_md5(filename, blocksize=2**20):
    m = hashlib.md5()
    with open(filename, 'rb') as f:
        while True:
            buf = f.read(blocksize)
            if not buf:
                break
            m.update(buf)
    return m.hexdigest()


def split_args_by_delim(xx):
    retval = [[]]
    for el in xx:
        if el in ('', '--'):
            retval.append([])
        else:
            retval[-1].append(el)
    return retval


def group_to_dict(group):
    retval = {}
    for filename in group:
        key = path.basename(filename)
        if key in retval:
            raise ValueError('duplicate filename in same group: %r' % filename)
        value = generate_file_md5(filename)
        retval[key] = value
    return retval


def indices(container, searchfor):
    retval = []
    for i, el in enumerate(container):
        if el == searchfor:
            retval.append(i)
    return retval


def where_do_snakes_go(hash_list):
    hash_do_list = list(set(hash_list))
    hash_do_list.sort(key=hash_list.index)

    hash_y_dict = {}
    occupied_points = set()

    # Next, try to cram everything else onto other lines...
    for the_hash in hash_do_list:
        if the_hash is None or hash_list.count(the_hash) == 1:
            hash_y_dict[the_hash] = 0

        else:
            first_x = hash_list.index(the_hash)
            last_x = len(hash_list) - list(reversed(hash_list)).index(the_hash) - 1

            y = 1
            while any((x, y) in occupied_points for x in range(first_x, last_x + 1)):
                y += 1

            hash_y_dict[the_hash] = y

            for x in range(first_x, last_x + 1):
                occupied_points.add((x, y))

    return hash_y_dict


def one_snake(hash_list, col_width):
    col_x = [i * col_width + (col_width + 1) // 2 for i in range(len(hash_list))]

    locs = where_do_snakes_go(hash_list)

    num_lines = max(locs.values())

    lines = []
    for this_hash, hash_y in locs.items():
        if this_hash is None: continue

        while len(lines) <= hash_y:
            lines.append([' '] * (col_width * len(hash_list)))

        which = indices(hash_list, this_hash)
        which_x = [col_x[col] for col in which]

        for x in range(col_x[which[0]], col_x[which[-1]] + 1):
            if x == col_x[which[0]]:
                the_char = '\u2514' # '\'
            elif x == col_x[which[-1]]:
                the_char = '\u2518' # '/'
            elif x in which_x:
                the_char = '\u2534' # inv 'T'
            else:
                the_char = '\u2500' # '-'

            lines[hash_y][x] = the_char

        for ind in which:
            lines[0][col_x[ind]] = '\u25CF' # '*'

        for ind in which:
            x = col_x[ind]
            for y in range(1, hash_y):
                lines[y][x] = '\u2502' # '|'

    return '\n'.join(''.join(l).rstrip() for l in lines).rstrip('\n')


# uniques_by_name is a list of (name, [hash, ...])
def snake_table(uniques_by_name, headings=None):
    if headings:
        col_width = max(len(h) for h in headings) + 1
        col_width = max(col_width, 5)
        while col_width % 2 == 0: col_width += 1
    else:
        col_width = 5

    gutter_width = max(len(n) for n, uu in uniques_by_name)

    lines = []

    for i, (name, uniques) in enumerate(uniques_by_name):
        if lines: lines.append('')

        if i % 20 == 0 and headings:
            lines.append((' ' * gutter_width) + ' ' + ''.join(h.center(col_width) for h in headings).rstrip())
            lines.append('')

        gutter = name
        for line in one_snake(uniques, col_width).split('\n'):
            lines.append(gutter.rjust(gutter_width) + ' ' + line)
            gutter = ''

    return '\n'.join(lines)


# Cheeky experiment
if __name__ == '__main__' and sys.argv[1:2] == ['-r']:
    import macresources

    files = sys.argv[2:]
    resfiles = [list(macresources.parse_rez_code(open(p, 'rb').read())) for p in files]
    headings = [path.basename(p) for p in files]
    headings = [h[:-6] if h.endswith('.rdump') else h for h in headings]

    every_typeid = list(set((r.type, r.id) for rf in resfiles for r in rf))
    every_typeid.sort(key=lambda rtypeid: (rtypeid[0].decode('mac_roman').upper(), rtypeid[0].decode('mac_roman'), rtypeid[1]))

    by_typeid = {typeid: [None] * len(resfiles) for typeid in every_typeid}

    for i, rf in enumerate(resfiles):
        for r in rf:
            by_typeid[(r.type, r.id)][i] = hash(bytes(r))

    uniques_by_name = []
    for rtype, rid in every_typeid:
        leftcol = rtype.decode('mac_roman') + str(rid).rjust(7)
        uniques_by_name.append((leftcol, by_typeid[(rtype, rid)]))

    print(snake_table(uniques_by_name, headings=headings))
    exit()


if __name__ == '__main__':
    group_lists = split_args_by_delim(sys.argv[1:])

    every_filename = sorted(set(path.basename(p) for g in group_lists for p in g))

    headings = [path.commonprefix([path.abspath(p) for p in group]) for group in group_lists]
    heading_strip = path.commonprefix(headings)
    headings = [h[len(heading_strip):].lstrip(path.sep) for h in headings]

    uniques_by_name = {fn: [None] * len(group_lists) for fn in every_filename}
    for i, group in enumerate(group_lists):
        for p in group:
            uniques_by_name[path.basename(p)][i] = generate_file_md5(p)

    uniques_by_name = list(uniques_by_name.items())

    print(snake_table(uniques_by_name, headings=headings))
More junk 2020-05-13 11:14:40 +00:00			`#!/usr/bin/env python3`

			`import sys`
			`from os import path`
			`import hashlib`
			`import textwrap`


			`# Thanks user25148: https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python`
			`def generate_file_md5(filename, blocksize=2**20):`
			`m = hashlib.md5()`
			`with open(filename, 'rb') as f:`
			`while True:`
			`buf = f.read(blocksize)`
			`if not buf:`
			`break`
			`m.update(buf)`
			`return m.hexdigest()`


			`def split_args_by_delim(xx):`
			`retval = [[]]`
			`for el in xx:`
			`if el in ('', '--'):`
			`retval.append([])`
			`else:`
			`retval[-1].append(el)`
			`return retval`


			`def group_to_dict(group):`
			`retval = {}`
			`for filename in group:`
			`key = path.basename(filename)`
			`if key in retval:`
			`raise ValueError('duplicate filename in same group: %r' % filename)`
			`value = generate_file_md5(filename)`
			`retval[key] = value`
			`return retval`


			`def indices(container, searchfor):`
			`retval = []`
			`for i, el in enumerate(container):`
			`if el == searchfor:`
			`retval.append(i)`
			`return retval`


			`def where_do_snakes_go(hash_list):`
			`hash_do_list = list(set(hash_list))`
			`hash_do_list.sort(key=hash_list.index)`

			`hash_y_dict = {}`
			`occupied_points = set()`

			`# Next, try to cram everything else onto other lines...`
			`for the_hash in hash_do_list:`
			`if the_hash is None or hash_list.count(the_hash) == 1:`
			`hash_y_dict[the_hash] = 0`

			`else:`
			`first_x = hash_list.index(the_hash)`
			`last_x = len(hash_list) - list(reversed(hash_list)).index(the_hash) - 1`

			`y = 1`
			`while any((x, y) in occupied_points for x in range(first_x, last_x + 1)):`
			`y += 1`

			`hash_y_dict[the_hash] = y`

			`for x in range(first_x, last_x + 1):`
			`occupied_points.add((x, y))`

			`return hash_y_dict`


			`def one_snake(hash_list, col_width):`
			`col_x = [i * col_width + (col_width + 1) // 2 for i in range(len(hash_list))]`

			`locs = where_do_snakes_go(hash_list)`

			`num_lines = max(locs.values())`

			`lines = []`
			`for this_hash, hash_y in locs.items():`
			`if this_hash is None: continue`

			`while len(lines) <= hash_y:`
			`lines.append([' '] * (col_width * len(hash_list)))`

			`which = indices(hash_list, this_hash)`
			`which_x = [col_x[col] for col in which]`

			`for x in range(col_x[which[0]], col_x[which[-1]] + 1):`
			`if x == col_x[which[0]]:`
			`the_char = '\u2514' # '\'`
			`elif x == col_x[which[-1]]:`
			`the_char = '\u2518' # '/'`
			`elif x in which_x:`
			`the_char = '\u2534' # inv 'T'`
			`else:`
			`the_char = '\u2500' # '-'`

			`lines[hash_y][x] = the_char`

			`for ind in which:`
			`lines[0][col_x[ind]] = '\u25CF' # '*'`

			`for ind in which:`
			`x = col_x[ind]`
			`for y in range(1, hash_y):`
			`lines[y][x] = '\u2502' # '\|'`

			`return '\n'.join(''.join(l).rstrip() for l in lines).rstrip('\n')`


			`# uniques_by_name is a list of (name, [hash, ...])`
			`def snake_table(uniques_by_name, headings=None):`
			`if headings:`
			`col_width = max(len(h) for h in headings) + 1`
			`col_width = max(col_width, 5)`
			`while col_width % 2 == 0: col_width += 1`
			`else:`
			`col_width = 5`

			`gutter_width = max(len(n) for n, uu in uniques_by_name)`

			`lines = []`

			`for i, (name, uniques) in enumerate(uniques_by_name):`
			`if lines: lines.append('')`

			`if i % 20 == 0 and headings:`
			`lines.append((' ' * gutter_width) + ' ' + ''.join(h.center(col_width) for h in headings).rstrip())`
			`lines.append('')`

			`gutter = name`
			`for line in one_snake(uniques, col_width).split('\n'):`
			`lines.append(gutter.rjust(gutter_width) + ' ' + line)`
			`gutter = ''`

			`return '\n'.join(lines)`


			`# Cheeky experiment`
			`if __name__ == '__main__' and sys.argv[1:2] == ['-r']:`
			`import macresources`

			`files = sys.argv[2:]`
			`resfiles = [list(macresources.parse_rez_code(open(p, 'rb').read())) for p in files]`
			`headings = [path.basename(p) for p in files]`
			`headings = [h[:-6] if h.endswith('.rdump') else h for h in headings]`

			`every_typeid = list(set((r.type, r.id) for rf in resfiles for r in rf))`
			`every_typeid.sort(key=lambda rtypeid: (rtypeid[0].decode('mac_roman').upper(), rtypeid[0].decode('mac_roman'), rtypeid[1]))`

			`by_typeid = {typeid: [None] * len(resfiles) for typeid in every_typeid}`

			`for i, rf in enumerate(resfiles):`
			`for r in rf:`
			`by_typeid[(r.type, r.id)][i] = hash(bytes(r))`

			`uniques_by_name = []`
			`for rtype, rid in every_typeid:`
			`leftcol = rtype.decode('mac_roman') + str(rid).rjust(7)`
			`uniques_by_name.append((leftcol, by_typeid[(rtype, rid)]))`

			`print(snake_table(uniques_by_name, headings=headings))`
			`exit()`


			`if __name__ == '__main__':`
			`group_lists = split_args_by_delim(sys.argv[1:])`

			`every_filename = sorted(set(path.basename(p) for g in group_lists for p in g))`

			`headings = [path.commonprefix([path.abspath(p) for p in group]) for group in group_lists]`
			`heading_strip = path.commonprefix(headings)`
			`headings = [h[len(heading_strip):].lstrip(path.sep) for h in headings]`

			`uniques_by_name = {fn: [None] * len(group_lists) for fn in every_filename}`
			`for i, group in enumerate(group_lists):`
			`for p in group:`
			`uniques_by_name[path.basename(p)][i] = generate_file_md5(p)`

			`uniques_by_name = list(uniques_by_name.items())`

			`print(snake_table(uniques_by_name, headings=headings))`