Create a tree structure for storing file metadata

This adds a a "simple" red-black tree that can store a record of each
file both on the local and remote side.
This commit is contained in:
Dietrich Epp 2022-03-30 05:23:37 -04:00
parent 56c8229dd5
commit 70abe95ff1
7 changed files with 702 additions and 2 deletions

View File

@ -104,7 +104,10 @@ typedef enum
kErrorNoMemory,
/* Invaild table data. */
kErrorBadData
kErrorBadData,
/* Too many files in one directory. */
kErrorDirectoryTooLarge
} ErrorCode;
/*==============================================================================
@ -131,4 +134,14 @@ Boolean ResizeHandle(Handle h, Size newSize);
/* Fill memory with zeroes. */
void MemClear(void *ptr, Size size);
/*==============================================================================
Assertions
==============================================================================*/
#if NDEBUG
#define assert(x) ((void)0)
#else
#include <assert.h>
#endif
#endif

View File

@ -15,7 +15,12 @@ void Fatalf(const char *msg, ...)
exit(1);
}
static const char *const kErrorNames[] = {"ok", "no memory", "bad data"};
static const char *const kErrorNames[] = {
"ok",
"no memory",
"bad data",
"too many files in one directory",
};
const char *ErrorDescription(ErrorCode err)
{

32
sync/BUILD.bazel Normal file
View File

@ -0,0 +1,32 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("//bazel:copts.bzl", "COPTS")
cc_library(
name = "tree",
srcs = [
"tree.c",
],
hdrs = [
"meta.h",
"tree.h",
],
copts = COPTS,
visibility = ["//visibility:public"],
deps = [
"//lib",
],
)
cc_test(
name = "tree_test",
size = "small",
srcs = [
"tree_test.c",
],
copts = COPTS,
deps = [
":tree",
"//lib",
"//lib:test",
],
)

69
sync/meta.h Normal file
View File

@ -0,0 +1,69 @@
#ifndef sync_meta_h
#define sync_meta_h
/* meta.h - file metadata */
#include "lib/defs.h"
#if TARGET_API_MAC_OS8
struct Timespec {
UInt32 mactime;
};
#else
struct Timestamp {
int64_t sec;
int32_t nsec;
};
#endif
typedef enum
{
kTypeNotExist,
kTypeFile,
kTypeDirectory
} FileType;
enum
{
kLocal,
kRemote
};
enum
{
/* Maximum length of a filename. This is a Mac OS limitation. */
kFilenameLength = 31,
/* Size of a filename record in 32-bit units. */
kFilenameSizeU32 = (kFilenameLength + 3) / 4
};
/* The name of a file, as a Pascal string -- the first byte is the string
length, 0-31. The name must be padded with zeroes. */
typedef union {
UInt8 u8[kFilenameSizeU32 * 4];
UInt32 u32[kFilenameSizeU32];
} FileName;
/* Metadata for a file or directory. */
struct Metadata {
FileType type;
UInt32 size;
struct Timestamp mod_time;
};
/* A reference to a file node by index, or 0 for none. */
typedef int FileRef;
/* Information about a local and remote file or directory. */
struct FileRec {
FileName name[2];
/* Metadata for kLocal and kRemote. */
struct Metadata meta[2];
/* Type and creator code for local copy. */
UInt32 type_code;
UInt32 creator_code;
};
#endif

223
sync/tree.c Normal file
View File

@ -0,0 +1,223 @@
#include "sync/tree.h"
#include <stdio.h>
#define GetNode(nodes, ref) ((nodes) + (ref)-1)
/* Compare two filenames. Return <0 if x comes before y, >0 if x comes after y,
or 0 if x is equal to y. Only compares the raw bytes in the filename. The
filenames must be zero-padded. */
static int CompareFilename(const FileName *x, const FileName *y)
{
UInt32 ux, uy;
int i;
for (i = 0; i < kFilenameSizeU32; i++) {
ux = x->u32[i];
uy = y->u32[i];
if (ux != uy) {
return ux < uy ? -1 : 1;
}
}
return 0;
}
enum
{
/*
Maximum height of a file tree.
If a red-black tree has N black nodes in every path from root to leaf, and
has maximum height (which is 2N), then it contains at least 2^N+N-1 nodes.
Therefore, given a maximum height of 32, we can have at least 65551 nodes.
*/
kTreeMaxHeight = 32,
/* Initial number of nodes allocated. */
kTreeInitialSize = 16
};
/* Allocate a new node in the tree. Retrun a negative error code on failure. */
static FileRef TreeNewNode(struct FileTree *tree)
{
Handle h;
Size newalloc;
if (tree->count >= tree->alloc) {
if (tree->alloc == 0) {
h = NewHandle(kTreeInitialSize * sizeof(struct FileNode));
if (h == NULL) {
return -kErrorNoMemory;
}
tree->nodes = (struct FileNode **)h;
tree->alloc = kTreeInitialSize;
} else {
newalloc = tree->alloc * 2;
if (!ResizeHandle((Handle)tree->nodes,
newalloc * sizeof(struct FileNode))) {
return -kErrorNoMemory;
}
tree->alloc = newalloc;
}
}
return ++tree->count;
}
/* Initialize a node in a file tree. The key is set, and all other fields are
zeroed. */
static void TreeInitNode(struct FileNode *node, const FileName *key)
{
node->key = *key;
MemClear((char *)node + sizeof(FileName), sizeof(*node) - sizeof(FileName));
}
FileRef TreeInsert(struct FileTree *tree, FileRef directory,
const FileName *key)
{
FileRef path[kTreeMaxHeight], root, cref, pref, sref, gref, nref;
int depth, cmp, cidx, pidx;
struct FileNode *nodes, *cnode, *pnode, *gnode, *snode;
if (directory == 0) {
root = tree->root;
} else {
root = GetNode(*tree->nodes, directory)->directory_root;
}
/* Empty tree. */
if (root == 0) {
cref = TreeNewNode(tree);
if (cref <= 0) {
return cref;
}
TreeInitNode(GetNode(*tree->nodes, cref), key);
if (directory == 0) {
tree->root = cref;
} else {
GetNode(*tree->nodes, directory)->directory_root = cref;
}
return cref;
}
/* Find the node, or the parent where it will be inserted. */
nodes = *tree->nodes;
pref = root;
for (depth = 0; depth < kTreeMaxHeight; depth++) {
path[depth] = pref;
pnode = GetNode(nodes, pref);
cmp = CompareFilename(key, &pnode->key);
if (cmp == 0) {
return pref;
}
cidx = cmp < 0 ? 0 : 1;
cref = pnode->children[cidx];
if (cref == 0) {
goto insert;
}
pref = cref;
}
/* Maximum depth exceeded: too many files in one directory. */
return -kErrorDirectoryTooLarge;
insert:
/* Insert a new node into the tree. */
if (depth + 1 >= kTreeMaxHeight) {
return -kErrorDirectoryTooLarge;
}
cref = TreeNewNode(tree);
if (cref <= 0) {
return cref;
}
nref = cref;
nodes = *tree->nodes;
pnode = GetNode(nodes, pref);
cnode = GetNode(nodes, cref);
TreeInitNode(cnode, key);
cnode->color = kNodeRed;
pnode->children[cidx] = cref;
path[depth + 1] = cref;
if (pnode->color != kNodeBlack) {
assert(depth > 0);
depth--;
/*
Loop invariants:
path[depth+2] == cref, cnode: child node, red
path[depth+1] == pref, pnode: parent node, red
cidx: cref is which child of parent
*/
for (;;) {
/*
path[depth] == gref, gnode: grandparent node, black
pidx: pref is which child of grandparent
sref: sibling of parent node
*/
gref = path[depth];
gnode = GetNode(nodes, gref);
assert(gnode->color == kNodeBlack);
pidx = gnode->children[0] != pref;
assert(gnode->children[pidx] == pref);
sref = gnode->children[pidx ^ 1];
snode = sref == 0 ? NULL : GetNode(nodes, sref);
if (sref == 0 || snode->color == kNodeBlack) {
/* Rotate. */
if (cidx == pidx) {
/* Move parent to top. */
gnode->children[cidx] = pnode->children[cidx ^ 1];
pnode->children[cidx ^ 1] = gref;
cnode->color = kNodeBlack;
cref = pref;
cnode = pnode;
} else {
/* Move child to top. */
gnode->children[cidx ^ 1] = cnode->children[cidx];
pnode->children[cidx] = cnode->children[cidx ^ 1];
cnode->children[cidx] = gref;
cnode->children[cidx ^ 1] = pref;
pnode->color = kNodeBlack;
}
if (depth == 0) {
cnode->color = kNodeBlack;
if (directory == 0) {
tree->root = cref;
} else {
GetNode(nodes, directory)->directory_root = cref;
}
break;
}
pref = path[depth - 1];
pnode = GetNode(nodes, pref);
cidx = pnode->children[0] != gref;
assert(pnode->children[cidx] == gref);
pnode->children[cidx] = cref;
if (depth == 1 || pnode->color == kNodeBlack) {
break;
}
} else {
/* Recolor. */
pnode->color = kNodeBlack;
snode->color = kNodeBlack;
if (depth <= 1) {
if (depth == 1) {
gnode->color = kNodeRed;
}
break;
}
gnode->color = kNodeRed;
cref = gref;
cnode = gnode;
pref = path[depth - 1];
pnode = GetNode(nodes, pref);
if (pnode->color == kNodeBlack) {
break;
}
cidx = pnode->children[0] != cref;
assert(pnode->children[cidx] == cref);
}
depth -= 2;
}
}
return nref;
}

46
sync/tree.h Normal file
View File

@ -0,0 +1,46 @@
#ifndef sync_tree_h
#define sync_tree_h
/* tree.h - binary search trees of file metadata */
#include "sync/meta.h"
/* A reference to a file node by 1-based index, or 0 for none. */
typedef int FileRef;
typedef enum
{
kNodeBlack,
kNodeRed,
} NodeColor;
/* A file record stored in a binary search tree. */
struct FileNode {
/* The sort key. This is a case-folded version of the local filename. */
FileName key;
struct FileRec file;
/* The root of the tree for the directory contents, if this is a
directory. */
FileRef directory_root;
/* Binary search tree bookkeeping. */
NodeColor color;
FileRef children[2];
};
/* Binary search tree of files. */
struct FileTree {
struct FileNode **nodes;
Size count;
Size alloc;
FileRef root;
};
/* Insert a node into the tree with the given key. On success, return a positive
FileRef. On failure, return a negative error code. To get the error code
value, negate the function result. */
FileRef TreeInsert(struct FileTree *tree, FileRef directory,
const FileName *key);
#endif

312
sync/tree_test.c Normal file
View File

@ -0,0 +1,312 @@
#include "sync/tree.h"
#include "lib/test.h"
#include "lib/util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum
{
kFileCount = 200
};
static UInt32 Random(UInt32 state)
{
/* Taken from Wikipedia's LGC
https://en.wikipedia.org/wiki/Linear_congruential_generator
These are the constants used from Numerical Recipes, according to that
article. */
return 1664525 * state + 1013904223;
}
/* Return a random permutation of 1..count. */
static UInt32 *Shuffle(UInt32 seed, int count)
{
UInt32 *arr, div;
int i, j;
arr = malloc(sizeof(*arr) * count);
if (arr == 0) {
Fatalf("out of memory");
}
arr[0] = 1;
for (i = 1; i < count; i++) {
div = (0xffffffff - i) / (i + 1) + 1;
do {
j = seed / div;
seed = Random(seed);
} while (j >= i);
arr[i] = arr[j];
arr[j] = i + 1;
}
return arr;
}
static struct FileNode *GetNode(struct FileTree *tree, FileRef ref)
{
if (ref <= 0 || tree->count < ref) {
fprintf(stderr, "Error: invalid ref: %d (size = %ld)\n", ref,
tree->count);
exit(1);
}
return *tree->nodes + ref - 1;
}
static void SetKey(FileName *key, UInt32 n)
{
key->u8[0] = 3;
key->u8[1] = n >> 16;
key->u8[2] = n >> 8;
key->u8[3] = n;
}
static int CheckSubTree(struct FileTree *tree, FileRef root);
/* Check tree invariants, return number of black nodes in path from here to
leaf. Return -1 if there is an error. */
static int CheckTreeNode(struct FileTree *tree, FileRef ref,
struct FileNode *parent, int cidx)
{
struct FileNode *node;
int cmp, height[2], i;
if (ref == 0) {
return 0;
}
node = GetNode(tree, ref);
if (node->file.type_code != 0) {
Failf("node visited twice: node=%u", node->file.creator_code);
return -1;
}
node->file.type_code = 1;
if (parent != NULL) {
if (parent->color == kNodeRed && node->color == kNodeRed) {
Failf("red child of red node: parent=%u, child=%u",
parent->file.creator_code, node->file.creator_code);
return -1;
}
cmp = memcmp(&node->key, &parent->key, sizeof(node->key));
if (cmp == 0) {
Failf("child and parent have same key: parent=%u, child=%u",
parent->file.creator_code, node->file.creator_code);
return -1;
}
if ((cmp > 0) != cidx) {
Failf("bad sort order: parent=%u, child=%u",
parent->file.creator_code, node->file.creator_code);
return -1;
}
}
if (node->directory_root != 0) {
if (CheckSubTree(tree, node->directory_root)) {
return -1;
}
}
for (i = 0; i < 2; i++) {
height[i] = CheckTreeNode(tree, node->children[i], node, i);
if (height[i] < 0) {
return -1;
}
}
if (height[0] != height[1]) {
Failf("mismatched tree length: node=%u, len=%d,%d",
node->file.creator_code, height[0], height[1]);
return -1;
}
return height[0] + (node->color == kNodeBlack);
}
static void PrintNode(struct FileTree *tree, FileRef ref, int indent)
{
struct FileNode *node;
int i;
char bl, br;
node = GetNode(tree, ref);
if (node->color == kNodeBlack) {
bl = '[';
br = ']';
} else {
bl = ' ';
br = ' ';
}
if (node->file.type_code > 1) {
/* This node loops. */
for (i = 0; i < indent; i++) {
fputc(' ', stderr);
}
fprintf(stderr, "%c%d...%c\n", bl, node->file.creator_code, br);
return;
}
node->file.type_code = 2;
if (node->children[0] != 0) {
PrintNode(tree, node->children[0], indent + 4);
}
for (i = 0; i < indent; i++) {
fputc(' ', stderr);
}
fprintf(stderr, "%c%d%c\n", bl, node->file.creator_code, br);
if (node->children[1] != 0) {
PrintNode(tree, node->children[1], indent + 4);
}
}
static void PrintTree(struct FileTree *tree, FileRef root)
{
if (root == 0) {
fputs("(empty)\n", stderr);
} else {
PrintNode(tree, root, 0);
}
}
static int CheckSubTree(struct FileTree *tree, FileRef root)
{
int r;
if (root != 0 && GetNode(tree, root)->color != kNodeBlack) {
Failf("root is not black");
r = -1;
} else {
r = CheckTreeNode(tree, root, NULL, 0);
}
if (r < 0) {
PrintTree(tree, root);
return -1;
}
return 0;
}
static int CheckTree(struct FileTree *tree)
{
struct FileNode *nodes;
int i, n;
/* Mark all as unvisited. */
nodes = *tree->nodes;
for (i = 0, n = tree->count; i < n; i++) {
nodes[i].file.type_code = 0;
}
return CheckSubTree(tree, tree->root);
}
static void TestTree1(struct FileTree *tree, FileRef directory, int count,
const UInt32 *files)
{
int i;
UInt32 fl;
FileName key;
FileRef ref;
struct FileNode *node;
/* Insert into the tree in random order. */
memset(&key, 0, sizeof(key));
for (i = 0; i < count; i++) {
fl = files[i];
SetKey(&key, fl);
ref = TreeInsert(tree, directory, &key);
if (ref < 0) {
Failf("TreeInsert (i=%d): %s", i, ErrorDescriptionOrDie(-ref));
return;
}
if (ref == 0) {
Failf("ref == 0 (i=%d)", i);
return;
}
node = GetNode(tree, ref);
node->file.creator_code = fl; /* To test we get same node back. */
/* Check tree invariants. */
if (CheckTree(tree)) {
return;
}
}
/* Test that we can find all nodes. */
for (i = 0; i < count; i++) {
fl = files[i];
SetKey(&key, fl);
ref = TreeInsert(tree, directory, &key);
if (ref < 0) {
Failf("second TreeInsert (i=%d): %s", i,
ErrorDescriptionOrDie(-ref));
}
node = GetNode(tree, ref);
if (memcmp(&node->key, &key, sizeof(key)) != 0) {
Failf("got node with wrong key");
continue;
}
/* Check we don't get a new node. */
if (node->file.creator_code != fl) {
Failf(
"got wrong node back from equery "
"(i=%d, ref=%d, cc=%u, expect cc=%u)",
i, ref, node->file.creator_code, fl);
}
}
}
static void ClearTree(struct FileTree *tree)
{
tree->nodes = NULL;
tree->count = 0;
tree->alloc = 0;
tree->root = 0;
}
static void TestRandomTree(UInt32 seed)
{
UInt32 *files;
struct FileTree tree;
files = Shuffle(seed, kFileCount);
ClearTree(&tree);
SetTestNamef("Random(%u,root)", seed);
TestTree1(&tree, 0, kFileCount, files);
SetTestNamef("Random(%u,subdir)", seed);
TestTree1(&tree, 1, kFileCount, files);
DisposeHandle((Handle)tree.nodes);
free(files);
}
static void TestLinearTree(void)
{
UInt32 *files;
struct FileTree tree;
int i;
files = malloc(kFileCount * sizeof(*files));
if (files == NULL) {
Fatalf("out of memory");
}
for (i = 0; i < kFileCount; i++) {
files[i] = i + 1;
}
ClearTree(&tree);
TestTree1(&tree, 0, kFileCount, files);
for (i = 0; i < kFileCount; i++) {
files[i] = kFileCount - i;
}
tree.count = 0;
tree.root = 0;
TestTree1(&tree, 0, kFileCount, files);
DisposeHandle((Handle)tree.nodes);
free(files);
}
int main(int argc, char **argv)
{
(void)argc;
(void)argv;
TestRandomTree(123);
TestRandomTree(456);
TestLinearTree();
return TestsDone();
}