machfs/machfs/btree.py

188 lines
6.3 KiB
Python

import struct
import bitmanip as _bitmanip
class _Node:
"""Wrapper to use while serialising a B*-tree"""
def __init__(self, **kwargs):
self.records = []
self.ndFLink = self.ndBLink = self.ndType = self.ndNHeight = 0
self.__dict__.update(kwargs)
def __bytes__(self):
buf = bytearray(512)
next_left = 14
next_right = 510
for r in self.records:
if next_left + len(r) > next_right - 2:
raise ValueError('cannot fit these records in a B*-tree node')
buf[next_left:next_left+len(r)] = r
struct.pack_into('>H', buf, next_right, next_left)
next_left += len(r)
next_right -= 2
struct.pack_into('>H', buf, next_right, next_left) # offset of free space
struct.pack_into('>LLBBH', buf, 0,
self.ndFLink, self.ndBLink, self.ndType, self.ndNHeight&0xFF, len(self.records))
return bytes(buf)
def records_fit(self):
"""Tell whether the records will fit in 512 bytes"""
try:
self.__bytes__()
except ValueError:
return False
else:
return True
def unpack_extent_record(record):
"""Extract up to three (first_block, block_count) tuples from a 12-byte extent record"""
a, b, c, d, e, f = struct.unpack('>6H', record)
l = []
if b: l.append((a, b))
if d: l.append((c, d))
if f: l.append((e, f))
return l
def _unpack_btree_node(buf, start):
"""Slice a btree node into records, including the 14-byte node descriptor"""
ndFLink, ndBLink, ndType, ndNHeight, ndNRecs = struct.unpack_from('>LLBBH', buf, start)
offsets = list(reversed(struct.unpack_from('>%dH'%(ndNRecs+1), buf, start+512-2*(ndNRecs+1))))
starts = offsets[:-1]
stops = offsets[1:]
records = [bytes(buf[start+i_start:start+i_stop]) for (i_start, i_stop) in zip(starts, stops)]
return ndFLink, ndBLink, ndType, ndNHeight, records
def dump_btree(buf):
"""Walk an HFS B*-tree, returning an iterator of (key, value) tuples."""
# Get the header node
ndFLink, ndBLink, ndType, ndNHeight, (header_rec, unused_rec, map_rec) = _unpack_btree_node(buf, 0)
# Ask about the header record in the header node
bthDepth, bthRoot, bthNRecs, bthFNode, bthLNode, bthNodeSize, bthKeyLen, bthNNodes, bthFree = \
struct.unpack_from('>HLLLLHHLL', header_rec)
# print('btree', bthDepth, bthRoot, bthNRecs, bthFNode, bthLNode, bthNodeSize, bthKeyLen, bthNNodes, bthFree)
# And iterate through the linked list of leaf nodes
this_leaf = bthFNode
while True:
ndFLink, ndBLink, ndType, ndNHeight, records = _unpack_btree_node(buf, 512*this_leaf)
yield from records
if this_leaf == bthLNode:
break
this_leaf = ndFLink
def _pack_leaf_record(key, value):
"""Pack a key-value pair to go into a leaf node as a record"""
if len(value) & 1: value += b'\x00'
b = bytes([len(key)+1, 0, *key])
if len(b) & 1: b += bytes(1)
b += value
return b
def _make_index_record(rec, pointer):
"""Convert a key-value to a special key-pointer record"""
rec = rec[:1+rec[0]]
rec = b'\x25' + rec[1:]
rec += bytes(rec[0]+1-len(rec))
rec += struct.pack('>L', pointer)
return rec
def make_btree(records, bthKeyLen):
nodelist = [] # append to this as we go
# pointers per index node, range 2-11
index_step = 8 # not really worth tuning
# First node is always a header node, with three records:
# header records, reserved record, bitmap record
headnode = _Node(ndType=1, ndNHeight=0, records=['header placeholder', bytes(128), 'bitmap placeholder'])
nodelist.append(headnode)
# Followed (in our implementation) by leaf nodes
bthNRecs = 0
bthRoot = 0
bthDepth = 0
for key, val in records:
bthNRecs += 1
bthRoot = 1
bthDepth = 1
packed = _pack_leaf_record(key, val)
if bthNRecs == 1:
nodelist.append(_Node(ndType=0xFF, ndNHeight=1))
nodelist[-1].records.append(packed)
if not nodelist[-1].records_fit():
nodelist[-1].records.pop()
nodelist.append(_Node(ndType=0xFF, ndNHeight=1, records=[packed]))
# Create index nodes (some sort of Btree, they tell me)
while len(nodelist) - bthRoot > 1:
nums = list(range(bthRoot, len(nodelist)))
groups = [nums[i:i+index_step] for i in range(0, len(nums), index_step)]
bthRoot = len(nodelist)
bthDepth = nodelist[-1].ndNHeight + 1
# each element of groups will become a record:
# it is currently a list of node indices to point to
for g in groups:
newnode = _Node(ndType=0, ndNHeight=bthDepth)
for pointer in g:
rec = nodelist[pointer].records[0]
rec = _make_index_record(rec, pointer)
newnode.records.append(rec)
nodelist.append(newnode)
# Header node already has a 256-bit bitmap record (2048-bit)
# Add map nodes with 3952-bit bitmap recs to cover every node
bits_covered = 2048
mapnodes = []
while bits_covered < len(nodelist):
mapnode = _Node(ndType=2, ndNHeight=1)
nodelist.append(mapnode)
mapnodes.append(mapnode)
mapnode.records = [bytes(3952//8)]
bits_covered += len(mapnode.records[0]) * 8
# Populate the bitmap (1 = used)
headnode.records[2] = _bitmanip.bits(2048, len(nodelist))
for i, mnode in mapnodes:
nset = len(nodelist) - 2048 - i*3952
mnode.records = [_bitmanip.bits(3952, nset)]
# Run back and forth to join up one linked list for each type
most_recent = {}
for i, node in enumerate(nodelist):
node.ndBLink = most_recent.get(node.ndType, 0)
most_recent[node.ndType] = i
bthLNode = most_recent.get(0xFF, 0)
most_recent = {}
for i, node in reversed(list(enumerate(nodelist))):
node.ndFLink = most_recent.get(node.ndType, 0)
most_recent[node.ndType] = i
bthFNode = most_recent.get(0xFF, 0)
# Populate the first (header) record of the header node
bthNodeSize = 512
bthNNodes = len(nodelist); bthFree = 0
headnode.records[0] = struct.pack('>HLLLLHHLL76x',
bthDepth, bthRoot, bthNRecs, bthFNode, bthLNode,
bthNodeSize, bthKeyLen, bthNNodes, bthFree)
return b''.join(bytes(node) for node in nodelist)