Build a JSON graph of the levenshtein distances between pairs of boot1

sectors and visualize it as a D3 force-directed graph.  This is pretty
cool

There are two thresholds to control the graph size and connectivity
- limit to boot1 images that are represented in at least 10 disks
- only render links for distances < 200 bits

Needs improvements, e.g.

- use human-readable names for the boot1 images
- potentially add controls for dynamically changing those thresholds
  and other D3 params
- display list of associated disks to each image
This commit is contained in:
kris 2017-05-11 23:54:59 +01:00
parent 8eab43e5ee
commit 6a4706d6b0
2 changed files with 174 additions and 0 deletions

View File

@ -0,0 +1,55 @@
# Constructs JSON representing boot1 levenshtein distance data from DB
import json
import sqlite3
DB_PATH = '/tank/apple2/data/apple2.db'
def main():
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
q = cursor.execute(
"""
select boot1_sha1, boot1.name, count(*) as c from disks
join
(select sha1, name from boot1) as boot1
on disks.boot1_sha1 = boot1.sha1 group by 1;
"""
)
sha1_indexes = {}
graph = {
"nodes": [],
"links": []
}
idx = 0
for r in q:
(sha1, name, count) = r
if count < 10:
continue
sha1_indexes[sha1] = idx
idx += 1
graph["nodes"].append({"sha1": sha1, "name": name, "radius": count, "group": idx})
q = cursor.execute(
"""
select source, target, distance from boot1distances;
"""
)
for r in q:
(source, target, distance) = r
if source > target:
try:
graph["links"].append(
{"source": sha1_indexes[source], "target": sha1_indexes[target], "distance": distance})
except KeyError:
# Source or target is not common enough to include
continue
out = file("levenshtein.json", "w+")
json.dump(graph, out, indent=4, separators=(',', ': '))
if __name__ == "__main__":
main()

119
render.html Normal file
View File

@ -0,0 +1,119 @@
<!DOCTYPE html>
<meta charset="utf-8">
<canvas width="2000" height="2000"></canvas>
<script src="https://d3js.org/d3.v4.js"></script>
<script>
var canvas = document.querySelector("canvas"),
context = canvas.getContext("2d"),
width = canvas.width,
height = canvas.height;
// TODO
// - hover text
// - distance on links
// - full file name on nodes
// - list of all disks on nodes
// - size nodes by number of matching disks
// Double-click on a node and sort it in relation to all of its links
d3.json("levenshtein.json", function(error, graph) {
if (error) throw error;
var nodes = graph.nodes;
console.log(graph.links);
var links = [];
graph.links.forEach(function(link) {
if (link.distance < 200 && link.distance > 0) {
links.push(link);
}
});
console.log(links);
var simulation = d3.forceSimulation(nodes)
.force("charge", d3.forceManyBody().strength(-500))
.force("link", d3.forceLink(links)
.strength(1)
// TODO: Also add source and target radius so circles don't overlap
.distance(function(d) {return d.distance;}))
.force("x", d3.forceX())
.force("y", d3.forceY())
.on("tick", ticked);
d3.select(canvas)
.call(d3.drag()
.container(canvas)
.subject(dragsubject)
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended)
);
function ticked() {
context.clearRect(0, 0, width, height);
context.save();
context.translate(width / 2, height / 2);
context.beginPath();
links.forEach(drawLink);
context.strokeStyle = "#aaa";
context.stroke();
nodes.forEach(drawNode);
context.strokeStyle = "#fff";
context.stroke();
context.restore();
}
function dragsubject() {
return simulation.find(d3.event.x - width / 2, d3.event.y - height / 2);
}
function dragstarted() {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d3.event.subject.fx = d3.event.subject.x;
d3.event.subject.fy = d3.event.subject.y;
}
function dragged() {
d3.event.subject.fx = d3.event.x;
d3.event.subject.fy = d3.event.y;
}
function dragended() {
if (!d3.event.active) simulation.alphaTarget(0);
d3.event.subject.fx = null;
d3.event.subject.fy = null;
}
function drawLink(d) {
context.moveTo(d.source.x, d.source.y);
context.lineTo(d.target.x, d.target.y);
}
function drawNode(d) {
context.beginPath();
context.moveTo(d.x, d.y); // was d.x+3 for some reason
var radius = (Math.log10(d.radius)+1)*5
context.arc(d.x, d.y, radius, 0, 2 * Math.PI);
context.strokeStyle = "#000000"
context.strokeText(d.sha1, d.x+radius+5, d.y);
context.fillStyle = '#' + d.sha1.substr(0,6).toUpperCase();
context.fill();
}
function mouseOver(d, i) {
d3.select(this).append("text")
.text( d.x)
.attr("x", x(d.x))
.attr("y", y(d.y));
}
function mouseOut(d) {
}
});
</script>