From 70abe95ff1cec5798b180b81a3db692988c238df Mon Sep 17 00:00:00 2001 From: Dietrich Epp Date: Wed, 30 Mar 2022 05:23:37 -0400 Subject: [PATCH] Create a tree structure for storing file metadata This adds a a "simple" red-black tree that can store a record of each file both on the local and remote side. --- lib/defs.h | 15 ++- lib/util.c | 7 +- sync/BUILD.bazel | 32 +++++ sync/meta.h | 69 +++++++++++ sync/tree.c | 223 +++++++++++++++++++++++++++++++++ sync/tree.h | 46 +++++++ sync/tree_test.c | 312 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 702 insertions(+), 2 deletions(-) create mode 100644 sync/BUILD.bazel create mode 100644 sync/meta.h create mode 100644 sync/tree.c create mode 100644 sync/tree.h create mode 100644 sync/tree_test.c diff --git a/lib/defs.h b/lib/defs.h index 45df4dd..257a949 100644 --- a/lib/defs.h +++ b/lib/defs.h @@ -104,7 +104,10 @@ typedef enum kErrorNoMemory, /* Invaild table data. */ - kErrorBadData + kErrorBadData, + + /* Too many files in one directory. */ + kErrorDirectoryTooLarge } ErrorCode; /*============================================================================== @@ -131,4 +134,14 @@ Boolean ResizeHandle(Handle h, Size newSize); /* Fill memory with zeroes. */ void MemClear(void *ptr, Size size); +/*============================================================================== +Assertions +==============================================================================*/ + +#if NDEBUG +#define assert(x) ((void)0) +#else +#include +#endif + #endif diff --git a/lib/util.c b/lib/util.c index 4c86e13..d2bd181 100644 --- a/lib/util.c +++ b/lib/util.c @@ -15,7 +15,12 @@ void Fatalf(const char *msg, ...) exit(1); } -static const char *const kErrorNames[] = {"ok", "no memory", "bad data"}; +static const char *const kErrorNames[] = { + "ok", + "no memory", + "bad data", + "too many files in one directory", +}; const char *ErrorDescription(ErrorCode err) { diff --git a/sync/BUILD.bazel b/sync/BUILD.bazel new file mode 100644 index 0000000..b9f4022 --- /dev/null +++ b/sync/BUILD.bazel @@ -0,0 +1,32 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("//bazel:copts.bzl", "COPTS") + +cc_library( + name = "tree", + srcs = [ + "tree.c", + ], + hdrs = [ + "meta.h", + "tree.h", + ], + copts = COPTS, + visibility = ["//visibility:public"], + deps = [ + "//lib", + ], +) + +cc_test( + name = "tree_test", + size = "small", + srcs = [ + "tree_test.c", + ], + copts = COPTS, + deps = [ + ":tree", + "//lib", + "//lib:test", + ], +) diff --git a/sync/meta.h b/sync/meta.h new file mode 100644 index 0000000..2dc3445 --- /dev/null +++ b/sync/meta.h @@ -0,0 +1,69 @@ +#ifndef sync_meta_h +#define sync_meta_h +/* meta.h - file metadata */ + +#include "lib/defs.h" + +#if TARGET_API_MAC_OS8 +struct Timespec { + UInt32 mactime; +}; +#else +struct Timestamp { + int64_t sec; + int32_t nsec; +}; +#endif + +typedef enum +{ + kTypeNotExist, + kTypeFile, + kTypeDirectory +} FileType; + +enum +{ + kLocal, + kRemote +}; + +enum +{ + /* Maximum length of a filename. This is a Mac OS limitation. */ + kFilenameLength = 31, + + /* Size of a filename record in 32-bit units. */ + kFilenameSizeU32 = (kFilenameLength + 3) / 4 +}; + +/* The name of a file, as a Pascal string -- the first byte is the string + length, 0-31. The name must be padded with zeroes. */ +typedef union { + UInt8 u8[kFilenameSizeU32 * 4]; + UInt32 u32[kFilenameSizeU32]; +} FileName; + +/* Metadata for a file or directory. */ +struct Metadata { + FileType type; + UInt32 size; + struct Timestamp mod_time; +}; + +/* A reference to a file node by index, or 0 for none. */ +typedef int FileRef; + +/* Information about a local and remote file or directory. */ +struct FileRec { + FileName name[2]; + + /* Metadata for kLocal and kRemote. */ + struct Metadata meta[2]; + + /* Type and creator code for local copy. */ + UInt32 type_code; + UInt32 creator_code; +}; + +#endif diff --git a/sync/tree.c b/sync/tree.c new file mode 100644 index 0000000..e919dac --- /dev/null +++ b/sync/tree.c @@ -0,0 +1,223 @@ +#include "sync/tree.h" + +#include + +#define GetNode(nodes, ref) ((nodes) + (ref)-1) + +/* Compare two filenames. Return <0 if x comes before y, >0 if x comes after y, + or 0 if x is equal to y. Only compares the raw bytes in the filename. The + filenames must be zero-padded. */ +static int CompareFilename(const FileName *x, const FileName *y) +{ + UInt32 ux, uy; + int i; + + for (i = 0; i < kFilenameSizeU32; i++) { + ux = x->u32[i]; + uy = y->u32[i]; + if (ux != uy) { + return ux < uy ? -1 : 1; + } + } + return 0; +} + +enum +{ + /* + Maximum height of a file tree. + + If a red-black tree has N black nodes in every path from root to leaf, and + has maximum height (which is 2N), then it contains at least 2^N+N-1 nodes. + Therefore, given a maximum height of 32, we can have at least 65551 nodes. + */ + kTreeMaxHeight = 32, + + /* Initial number of nodes allocated. */ + kTreeInitialSize = 16 +}; + +/* Allocate a new node in the tree. Retrun a negative error code on failure. */ +static FileRef TreeNewNode(struct FileTree *tree) +{ + Handle h; + Size newalloc; + + if (tree->count >= tree->alloc) { + if (tree->alloc == 0) { + h = NewHandle(kTreeInitialSize * sizeof(struct FileNode)); + if (h == NULL) { + return -kErrorNoMemory; + } + tree->nodes = (struct FileNode **)h; + tree->alloc = kTreeInitialSize; + } else { + newalloc = tree->alloc * 2; + if (!ResizeHandle((Handle)tree->nodes, + newalloc * sizeof(struct FileNode))) { + return -kErrorNoMemory; + } + tree->alloc = newalloc; + } + } + + return ++tree->count; +} + +/* Initialize a node in a file tree. The key is set, and all other fields are + zeroed. */ +static void TreeInitNode(struct FileNode *node, const FileName *key) +{ + node->key = *key; + MemClear((char *)node + sizeof(FileName), sizeof(*node) - sizeof(FileName)); +} + +FileRef TreeInsert(struct FileTree *tree, FileRef directory, + const FileName *key) +{ + FileRef path[kTreeMaxHeight], root, cref, pref, sref, gref, nref; + int depth, cmp, cidx, pidx; + struct FileNode *nodes, *cnode, *pnode, *gnode, *snode; + + if (directory == 0) { + root = tree->root; + } else { + root = GetNode(*tree->nodes, directory)->directory_root; + } + + /* Empty tree. */ + if (root == 0) { + cref = TreeNewNode(tree); + if (cref <= 0) { + return cref; + } + TreeInitNode(GetNode(*tree->nodes, cref), key); + if (directory == 0) { + tree->root = cref; + } else { + GetNode(*tree->nodes, directory)->directory_root = cref; + } + return cref; + } + + /* Find the node, or the parent where it will be inserted. */ + nodes = *tree->nodes; + pref = root; + for (depth = 0; depth < kTreeMaxHeight; depth++) { + path[depth] = pref; + pnode = GetNode(nodes, pref); + cmp = CompareFilename(key, &pnode->key); + if (cmp == 0) { + return pref; + } + cidx = cmp < 0 ? 0 : 1; + cref = pnode->children[cidx]; + if (cref == 0) { + goto insert; + } + pref = cref; + } + + /* Maximum depth exceeded: too many files in one directory. */ + return -kErrorDirectoryTooLarge; + +insert: + /* Insert a new node into the tree. */ + if (depth + 1 >= kTreeMaxHeight) { + return -kErrorDirectoryTooLarge; + } + cref = TreeNewNode(tree); + if (cref <= 0) { + return cref; + } + nref = cref; + nodes = *tree->nodes; + pnode = GetNode(nodes, pref); + cnode = GetNode(nodes, cref); + TreeInitNode(cnode, key); + cnode->color = kNodeRed; + pnode->children[cidx] = cref; + path[depth + 1] = cref; + + if (pnode->color != kNodeBlack) { + assert(depth > 0); + depth--; + /* + Loop invariants: + path[depth+2] == cref, cnode: child node, red + path[depth+1] == pref, pnode: parent node, red + cidx: cref is which child of parent + */ + for (;;) { + /* + path[depth] == gref, gnode: grandparent node, black + pidx: pref is which child of grandparent + sref: sibling of parent node + */ + gref = path[depth]; + gnode = GetNode(nodes, gref); + assert(gnode->color == kNodeBlack); + pidx = gnode->children[0] != pref; + assert(gnode->children[pidx] == pref); + sref = gnode->children[pidx ^ 1]; + snode = sref == 0 ? NULL : GetNode(nodes, sref); + if (sref == 0 || snode->color == kNodeBlack) { + /* Rotate. */ + if (cidx == pidx) { + /* Move parent to top. */ + gnode->children[cidx] = pnode->children[cidx ^ 1]; + pnode->children[cidx ^ 1] = gref; + cnode->color = kNodeBlack; + cref = pref; + cnode = pnode; + } else { + /* Move child to top. */ + gnode->children[cidx ^ 1] = cnode->children[cidx]; + pnode->children[cidx] = cnode->children[cidx ^ 1]; + cnode->children[cidx] = gref; + cnode->children[cidx ^ 1] = pref; + pnode->color = kNodeBlack; + } + if (depth == 0) { + cnode->color = kNodeBlack; + if (directory == 0) { + tree->root = cref; + } else { + GetNode(nodes, directory)->directory_root = cref; + } + break; + } + pref = path[depth - 1]; + pnode = GetNode(nodes, pref); + cidx = pnode->children[0] != gref; + assert(pnode->children[cidx] == gref); + pnode->children[cidx] = cref; + if (depth == 1 || pnode->color == kNodeBlack) { + break; + } + } else { + /* Recolor. */ + pnode->color = kNodeBlack; + snode->color = kNodeBlack; + if (depth <= 1) { + if (depth == 1) { + gnode->color = kNodeRed; + } + break; + } + gnode->color = kNodeRed; + cref = gref; + cnode = gnode; + pref = path[depth - 1]; + pnode = GetNode(nodes, pref); + if (pnode->color == kNodeBlack) { + break; + } + cidx = pnode->children[0] != cref; + assert(pnode->children[cidx] == cref); + } + depth -= 2; + } + } + return nref; +} diff --git a/sync/tree.h b/sync/tree.h new file mode 100644 index 0000000..08566bb --- /dev/null +++ b/sync/tree.h @@ -0,0 +1,46 @@ +#ifndef sync_tree_h +#define sync_tree_h +/* tree.h - binary search trees of file metadata */ + +#include "sync/meta.h" + +/* A reference to a file node by 1-based index, or 0 for none. */ +typedef int FileRef; + +typedef enum +{ + kNodeBlack, + kNodeRed, +} NodeColor; + +/* A file record stored in a binary search tree. */ +struct FileNode { + /* The sort key. This is a case-folded version of the local filename. */ + FileName key; + + struct FileRec file; + + /* The root of the tree for the directory contents, if this is a + directory. */ + FileRef directory_root; + + /* Binary search tree bookkeeping. */ + NodeColor color; + FileRef children[2]; +}; + +/* Binary search tree of files. */ +struct FileTree { + struct FileNode **nodes; + Size count; + Size alloc; + FileRef root; +}; + +/* Insert a node into the tree with the given key. On success, return a positive + FileRef. On failure, return a negative error code. To get the error code + value, negate the function result. */ +FileRef TreeInsert(struct FileTree *tree, FileRef directory, + const FileName *key); + +#endif diff --git a/sync/tree_test.c b/sync/tree_test.c new file mode 100644 index 0000000..bcdbee9 --- /dev/null +++ b/sync/tree_test.c @@ -0,0 +1,312 @@ + +#include "sync/tree.h" + +#include "lib/test.h" +#include "lib/util.h" + +#include +#include +#include + +enum +{ + kFileCount = 200 +}; + +static UInt32 Random(UInt32 state) +{ + /* Taken from Wikipedia's LGC + https://en.wikipedia.org/wiki/Linear_congruential_generator + These are the constants used from Numerical Recipes, according to that + article. */ + return 1664525 * state + 1013904223; +} + +/* Return a random permutation of 1..count. */ +static UInt32 *Shuffle(UInt32 seed, int count) +{ + UInt32 *arr, div; + int i, j; + + arr = malloc(sizeof(*arr) * count); + if (arr == 0) { + Fatalf("out of memory"); + } + arr[0] = 1; + for (i = 1; i < count; i++) { + div = (0xffffffff - i) / (i + 1) + 1; + do { + j = seed / div; + seed = Random(seed); + } while (j >= i); + arr[i] = arr[j]; + arr[j] = i + 1; + } + return arr; +} + +static struct FileNode *GetNode(struct FileTree *tree, FileRef ref) +{ + if (ref <= 0 || tree->count < ref) { + fprintf(stderr, "Error: invalid ref: %d (size = %ld)\n", ref, + tree->count); + exit(1); + } + return *tree->nodes + ref - 1; +} + +static void SetKey(FileName *key, UInt32 n) +{ + key->u8[0] = 3; + key->u8[1] = n >> 16; + key->u8[2] = n >> 8; + key->u8[3] = n; +} + +static int CheckSubTree(struct FileTree *tree, FileRef root); + +/* Check tree invariants, return number of black nodes in path from here to + leaf. Return -1 if there is an error. */ +static int CheckTreeNode(struct FileTree *tree, FileRef ref, + struct FileNode *parent, int cidx) +{ + struct FileNode *node; + int cmp, height[2], i; + + if (ref == 0) { + return 0; + } + node = GetNode(tree, ref); + if (node->file.type_code != 0) { + Failf("node visited twice: node=%u", node->file.creator_code); + return -1; + } + node->file.type_code = 1; + if (parent != NULL) { + if (parent->color == kNodeRed && node->color == kNodeRed) { + Failf("red child of red node: parent=%u, child=%u", + parent->file.creator_code, node->file.creator_code); + return -1; + } + cmp = memcmp(&node->key, &parent->key, sizeof(node->key)); + if (cmp == 0) { + Failf("child and parent have same key: parent=%u, child=%u", + parent->file.creator_code, node->file.creator_code); + return -1; + } + if ((cmp > 0) != cidx) { + Failf("bad sort order: parent=%u, child=%u", + parent->file.creator_code, node->file.creator_code); + return -1; + } + } + if (node->directory_root != 0) { + if (CheckSubTree(tree, node->directory_root)) { + return -1; + } + } + for (i = 0; i < 2; i++) { + height[i] = CheckTreeNode(tree, node->children[i], node, i); + if (height[i] < 0) { + return -1; + } + } + if (height[0] != height[1]) { + Failf("mismatched tree length: node=%u, len=%d,%d", + node->file.creator_code, height[0], height[1]); + return -1; + } + return height[0] + (node->color == kNodeBlack); +} + +static void PrintNode(struct FileTree *tree, FileRef ref, int indent) +{ + struct FileNode *node; + int i; + char bl, br; + + node = GetNode(tree, ref); + if (node->color == kNodeBlack) { + bl = '['; + br = ']'; + } else { + bl = ' '; + br = ' '; + } + if (node->file.type_code > 1) { + /* This node loops. */ + for (i = 0; i < indent; i++) { + fputc(' ', stderr); + } + fprintf(stderr, "%c%d...%c\n", bl, node->file.creator_code, br); + return; + } + node->file.type_code = 2; + if (node->children[0] != 0) { + PrintNode(tree, node->children[0], indent + 4); + } + for (i = 0; i < indent; i++) { + fputc(' ', stderr); + } + fprintf(stderr, "%c%d%c\n", bl, node->file.creator_code, br); + if (node->children[1] != 0) { + PrintNode(tree, node->children[1], indent + 4); + } +} + +static void PrintTree(struct FileTree *tree, FileRef root) +{ + if (root == 0) { + fputs("(empty)\n", stderr); + } else { + PrintNode(tree, root, 0); + } +} + +static int CheckSubTree(struct FileTree *tree, FileRef root) +{ + int r; + if (root != 0 && GetNode(tree, root)->color != kNodeBlack) { + Failf("root is not black"); + r = -1; + } else { + r = CheckTreeNode(tree, root, NULL, 0); + } + if (r < 0) { + PrintTree(tree, root); + return -1; + } + return 0; +} + +static int CheckTree(struct FileTree *tree) +{ + struct FileNode *nodes; + int i, n; + + /* Mark all as unvisited. */ + nodes = *tree->nodes; + for (i = 0, n = tree->count; i < n; i++) { + nodes[i].file.type_code = 0; + } + return CheckSubTree(tree, tree->root); +} + +static void TestTree1(struct FileTree *tree, FileRef directory, int count, + const UInt32 *files) +{ + int i; + UInt32 fl; + FileName key; + FileRef ref; + struct FileNode *node; + + /* Insert into the tree in random order. */ + memset(&key, 0, sizeof(key)); + for (i = 0; i < count; i++) { + fl = files[i]; + SetKey(&key, fl); + ref = TreeInsert(tree, directory, &key); + if (ref < 0) { + Failf("TreeInsert (i=%d): %s", i, ErrorDescriptionOrDie(-ref)); + return; + } + if (ref == 0) { + Failf("ref == 0 (i=%d)", i); + return; + } + node = GetNode(tree, ref); + node->file.creator_code = fl; /* To test we get same node back. */ + + /* Check tree invariants. */ + if (CheckTree(tree)) { + return; + } + } + + /* Test that we can find all nodes. */ + for (i = 0; i < count; i++) { + fl = files[i]; + SetKey(&key, fl); + ref = TreeInsert(tree, directory, &key); + if (ref < 0) { + Failf("second TreeInsert (i=%d): %s", i, + ErrorDescriptionOrDie(-ref)); + } + node = GetNode(tree, ref); + if (memcmp(&node->key, &key, sizeof(key)) != 0) { + Failf("got node with wrong key"); + continue; + } + /* Check we don't get a new node. */ + if (node->file.creator_code != fl) { + Failf( + "got wrong node back from equery " + "(i=%d, ref=%d, cc=%u, expect cc=%u)", + i, ref, node->file.creator_code, fl); + } + } +} + +static void ClearTree(struct FileTree *tree) +{ + tree->nodes = NULL; + tree->count = 0; + tree->alloc = 0; + tree->root = 0; +} + +static void TestRandomTree(UInt32 seed) +{ + UInt32 *files; + struct FileTree tree; + + files = Shuffle(seed, kFileCount); + ClearTree(&tree); + + SetTestNamef("Random(%u,root)", seed); + TestTree1(&tree, 0, kFileCount, files); + SetTestNamef("Random(%u,subdir)", seed); + TestTree1(&tree, 1, kFileCount, files); + + DisposeHandle((Handle)tree.nodes); + free(files); +} + +static void TestLinearTree(void) +{ + UInt32 *files; + struct FileTree tree; + int i; + + files = malloc(kFileCount * sizeof(*files)); + if (files == NULL) { + Fatalf("out of memory"); + } + + for (i = 0; i < kFileCount; i++) { + files[i] = i + 1; + } + ClearTree(&tree); + TestTree1(&tree, 0, kFileCount, files); + + for (i = 0; i < kFileCount; i++) { + files[i] = kFileCount - i; + } + tree.count = 0; + tree.root = 0; + TestTree1(&tree, 0, kFileCount, files); + + DisposeHandle((Handle)tree.nodes); + free(files); +} + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + TestRandomTree(123); + TestRandomTree(456); + TestLinearTree(); + return TestsDone(); +}