diff --git a/build_levenshtein_graph.py b/build_levenshtein_graph.py new file mode 100644 index 0000000..d438f21 --- /dev/null +++ b/build_levenshtein_graph.py @@ -0,0 +1,55 @@ +# Constructs JSON representing boot1 levenshtein distance data from DB + +import json +import sqlite3 + +DB_PATH = '/tank/apple2/data/apple2.db' + +def main(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + q = cursor.execute( + """ + select boot1_sha1, boot1.name, count(*) as c from disks + join + (select sha1, name from boot1) as boot1 + on disks.boot1_sha1 = boot1.sha1 group by 1; + """ + ) + + sha1_indexes = {} + graph = { + "nodes": [], + "links": [] + } + idx = 0 + for r in q: + (sha1, name, count) = r + if count < 10: + continue + sha1_indexes[sha1] = idx + idx += 1 + graph["nodes"].append({"sha1": sha1, "name": name, "radius": count, "group": idx}) + + q = cursor.execute( + """ + select source, target, distance from boot1distances; + """ + ) + + for r in q: + (source, target, distance) = r + if source > target: + try: + graph["links"].append( + {"source": sha1_indexes[source], "target": sha1_indexes[target], "distance": distance}) + except KeyError: + # Source or target is not common enough to include + continue + + out = file("levenshtein.json", "w+") + json.dump(graph, out, indent=4, separators=(',', ': ')) + +if __name__ == "__main__": + main() diff --git a/render.html b/render.html new file mode 100644 index 0000000..63f34f4 --- /dev/null +++ b/render.html @@ -0,0 +1,119 @@ + + + + +