104 lines
3.1 KiB
Python
104 lines
3.1 KiB
Python
# Read all disk images under argv[1] path, and insert metadata into SQLite DB
|
|
|
|
import binascii
|
|
import hashlib
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
|
|
DB_PATH = '/tank/apple2/data/apple2.db'
|
|
|
|
def main():
|
|
disks = []
|
|
idx = 0
|
|
for root, dirs, files in os.walk(sys.argv[1]):
|
|
for f in files:
|
|
if not f.lower().endswith('.dsk') and not f.lower().endswith('.do') and not f.lower().endswith('.po'):
|
|
continue
|
|
disk = os.path.join(root, f)
|
|
try:
|
|
# TODO: deal with filenames with 8-bit characters
|
|
check_ascii = unicode(disk)
|
|
disks.append(disk)
|
|
idx += 1
|
|
except UnicodeDecodeError:
|
|
print "Skipping disk %s" % disk
|
|
continue
|
|
|
|
num_disks = len(disks)
|
|
|
|
if num_disks == 0:
|
|
return
|
|
|
|
hashes = {}
|
|
boot_sectors = {}
|
|
# TODO: no need for this first pass if we're going to insert duplicate disks anyway
|
|
for idx, f in enumerate(disks):
|
|
|
|
print "(%d/%d:%d%%) %s" % (idx+1, num_disks, (idx+1)*100/num_disks, f)
|
|
disk = bytearray(open(f, 'r').read())
|
|
|
|
length = len(disk)
|
|
if length < 140*1024:
|
|
print "Disk %s truncated (%d bytes)" % length
|
|
continue
|
|
|
|
if length % 256 != 0:
|
|
print "Disk length %d does not align to sector boundary" % length
|
|
continue
|
|
|
|
boot1 = disk[:256]
|
|
|
|
sha1 = hashlib.sha1(disk).hexdigest()
|
|
hashes.setdefault(sha1, []).append(f)
|
|
|
|
boot_sectors[sha1] = boot1
|
|
|
|
unique_disks = hashes.keys()
|
|
print unique_disks
|
|
num_unique_disks = len(unique_disks)
|
|
print "%d/%d unique files" % (len(unique_disks), num_disks)
|
|
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
boot1_hashes = set()
|
|
idx = 0
|
|
for disk_hash, boot1 in boot_sectors.iteritems():
|
|
# Pick first duplicate disk name
|
|
disk_name = hashes[disk_hash][0]
|
|
|
|
print "(%d/%d %d%%): %s" % (idx+1, num_unique_disks, 100*(idx+1)/num_unique_disks, disk_name)
|
|
boot1_hash = hashlib.sha1(boot1).hexdigest()
|
|
|
|
if not boot1_hash in boot1_hashes:
|
|
boot1_hashes.add(boot1_hash)
|
|
|
|
# We insert first and then update, in case there is already a record. We can't INSERT OR REPLACE because
|
|
# that would clear the other fields, and sqlite has no UPSERT :(
|
|
cursor.execute(
|
|
"INSERT OR IGNORE INTO Boot1 (sha1) VALUES (?)",
|
|
[boot1_hash]
|
|
)
|
|
cursor.execute(
|
|
"UPDATE Boot1 SET data=? WHERE sha1=?",
|
|
[buffer(boot1), boot1_hash]
|
|
)
|
|
idx += 1
|
|
|
|
for disk in hashes[disk_hash]:
|
|
cursor.execute(
|
|
'INSERT OR IGNORE INTO Disks (path, sha1) VALUES (?, ?)', [disk, disk_hash]
|
|
)
|
|
cursor.execute(
|
|
'UPDATE Disks set name=?, sha1=?, boot1_sha1=? WHERE path=?',
|
|
[os.path.basename(disk), disk_hash, boot1_hash, disk]
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|