diff options
author | David Robillard <d@drobilla.net> | 2014-10-01 15:50:28 +0000 |
---|---|---|
committer | David Robillard <d@drobilla.net> | 2014-10-01 15:50:28 +0000 |
commit | b10ae60816015d85ed9aa27fdb98d1b0f6bfdf64 (patch) | |
tree | 5e2307f443633b9c4bbf8bc96cfeaff5ff2337f8 | |
parent | dee23c2058fd33e14610ceda277c11e4d8ce768b (diff) | |
download | zix-b10ae60816015d85ed9aa27fdb98d1b0f6bfdf64.tar.gz zix-b10ae60816015d85ed9aa27fdb98d1b0f6bfdf64.tar.bz2 zix-b10ae60816015d85ed9aa27fdb98d1b0f6bfdf64.zip |
Improve ZixPatree with more compact index.
git-svn-id: http://svn.drobilla.net/zix/trunk@95 df6676b4-ccc9-40e5-b5d6-7c4628a128e3
-rw-r--r-- | test/dict_bench.c | 2 | ||||
-rw-r--r-- | test/patree_test.c | 8 | ||||
-rw-r--r-- | wscript | 7 | ||||
-rw-r--r-- | zix/patree.c | 353 | ||||
-rw-r--r-- | zix/patree.h | 17 |
5 files changed, 227 insertions, 160 deletions
diff --git a/test/dict_bench.c b/test/dict_bench.c index d7407d3..a940ed2 100644 --- a/test/dict_bench.c +++ b/test/dict_bench.c @@ -128,7 +128,7 @@ main(int argc, char** argv) // ZixPatree insert_start = bench_start(); for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i]); + ZixStatus st = zix_patree_insert(patree, strings[i], lengths[i]); if (st && st != ZIX_STATUS_EXISTS) { return test_fail("Failed to insert `%s'\n", strings[i]); } diff --git a/test/patree_test.c b/test/patree_test.c index 6d30509..c28ec40 100644 --- a/test/patree_test.c +++ b/test/patree_test.c @@ -63,17 +63,19 @@ main(int argc, char** argv) // Insert each string for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i]); + ZixStatus st = zix_patree_insert(patree, strings[i], strlen(strings[i])); if (st) { return test_fail("Failed to insert `%s'\n", strings[i]); } } - //zix_patree_print_dot(patree, stdout); + FILE* dot_file = fopen("patree.dot", "w"); + zix_patree_print_dot(patree, dot_file); + fclose(dot_file); // Attempt to insert each string again for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i]); + ZixStatus st = zix_patree_insert(patree, strings[i], strlen(strings[i])); if (st != ZIX_STATUS_EXISTS) { return test_fail("Double inserted `%s'\n", strings[i]); } @@ -232,10 +232,11 @@ def bench(ctx): import random out = open(filename, 'w') for i in xrange(1 << 20): - wordlen = random.randrange(1, 128) + wordlen = random.randrange(1, 64) + word = '' for j in xrange(wordlen): - out.write(chr(random.randrange(ord('A'), ord('Z')))) - out.write(' ') + word += chr(random.randrange(ord('A'), min(ord('Z'), ord('A') + j + 1))) + out.write(word + ' ') out.close() subprocess.call(['test/dict_bench', 'gibberish.txt']) diff --git a/zix/patree.c b/zix/patree.c index efb0a26..9ad3981 100644 --- a/zix/patree.c +++ b/zix/patree.c @@ -1,5 +1,5 @@ /* - Copyright 2011 David Robillard <http://drobilla.net> + Copyright 2011-2014 David Robillard <http://drobilla.net> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -32,33 +32,52 @@ // #warning SSE 4.2 disabled #endif -typedef uint_fast8_t n_edges_t; +// #define ZIX_PATREE_LINEAR_SEARCH 1 -struct _ZixPatreeNode; +//typedef uint_fast8_t n_edges_t; +typedef uintptr_t n_edges_t; typedef struct { - struct _ZixPatreeNode* child; - char first_char; -} ZixChildRef; - -typedef struct _ZixPatreeNode { const char* label_first; /* Start of incoming label */ const char* label_last; /* End of incoming label */ const char* str; /* String stored at this node */ n_edges_t num_children; /* Number of outgoing edges */ - ZixChildRef children[]; /* Children of this node */ + + /** Array of first child characters, followed by parallel array of pointers + to ZixPatreeNode pointers. */ + char children[]; } ZixPatreeNode; struct _ZixPatree { ZixPatreeNode* root; /* Root of the tree */ }; -static void +/** Round `size` up to the next multiple of the word size. */ +ZIX_PRIVATE uint32_t +zix_patree_pad(uint32_t size) +{ + return (size + sizeof(void*) - 1) & (~(sizeof(void*) - 1)); +} + +ZIX_PRIVATE ZixPatreeNode** +zix_patree_get_children(ZixPatreeNode* node) +{ + return (ZixPatreeNode**)(node->children + + zix_patree_pad(node->num_children)); +} + +ZIX_PRIVATE ZixPatreeNode** +zix_patree_get_child_ptr(ZixPatreeNode* node, n_edges_t index) +{ + return zix_patree_get_children(node) + index; +} + +ZIX_PRIVATE void zix_patree_print_rec(ZixPatreeNode* node, FILE* fd) { if (node->label_first) { size_t edge_label_len = node->label_last - node->label_first + 1; - char* edge_label = (char*)malloc(edge_label_len + 1); + char* edge_label = (char*)calloc(1, edge_label_len + 1); strncpy(edge_label, node->label_first, edge_label_len); fprintf(fd, "\t\"%p\" [label=<" "<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">" @@ -74,7 +93,7 @@ zix_patree_print_rec(ZixPatreeNode* node, FILE* fd) for (n_edges_t i = 0; i < node->num_children; ++i) { - ZixPatreeNode* const child = node->children[i].child; + ZixPatreeNode* const child = *zix_patree_get_child_ptr(node, i); zix_patree_print_rec(child, fd); fprintf(fd, "\t\"%p\" -> \"%p\" ;\n", (void*)node, (void*)child); } @@ -89,79 +108,119 @@ zix_patree_print_dot(const ZixPatree* t, FILE* fd) fprintf(fd, "}\n"); } -static inline ZixPatreeNode* +ZIX_PRIVATE size_t +zix_patree_node_size(n_edges_t num_children) +{ + return (sizeof(ZixPatreeNode) + + zix_patree_pad(num_children) + + (num_children * sizeof(ZixPatreeNode*))); +} + +ZIX_PRIVATE ZixPatreeNode* realloc_node(ZixPatreeNode* n, int num_children) { - return (ZixPatreeNode*)realloc(n, sizeof(ZixPatreeNode) - + num_children * sizeof(ZixChildRef)); + return (ZixPatreeNode*)realloc(n, zix_patree_node_size(num_children)); } -ZIX_API -ZixPatree* +ZIX_API ZixPatree* zix_patree_new(void) { - ZixPatree* t = (ZixPatree*)malloc(sizeof(ZixPatree)); - memset(t, '\0', sizeof(struct _ZixPatree)); + ZixPatree* t = (ZixPatree*)calloc(1, sizeof(ZixPatree)); - t->root = realloc_node(NULL, 0); - memset(t->root, '\0', sizeof(ZixPatreeNode)); + t->root = calloc(1, sizeof(ZixPatreeNode)); return t; } -static void +ZIX_PRIVATE void zix_patree_free_rec(ZixPatreeNode* n) { if (n) { for (n_edges_t i = 0; i < n->num_children; ++i) { - zix_patree_free_rec(n->children[i].child); + zix_patree_free_rec(*zix_patree_get_child_ptr(n, i)); } free(n); } } -ZIX_API -void +ZIX_API void zix_patree_free(ZixPatree* t) { zix_patree_free_rec(t->root); free(t); } -static inline bool +ZIX_PRIVATE inline bool patree_is_leaf(const ZixPatreeNode* n) { return n->num_children == 0; } -static bool +#ifndef ZIX_PATREE_LINEAR_SEARCH +ZIX_PRIVATE n_edges_t +zix_patree_lower_bound(const char* const children, + const n_edges_t n_children, + const char c) +{ + n_edges_t first = 0; + n_edges_t len = n_children; + while (len > 0) { + const n_edges_t half = len >> 1; + const n_edges_t i = first + half; + if (children[i] < c) { + const n_edges_t chop = half + 1; + first += chop; + len -= chop; + } else { + len = half; + } + } + assert(first == n_children || children[first] >= c); + return first; +} +#endif + +ZIX_PRIVATE bool patree_find_edge(const ZixPatreeNode* n, const char c, n_edges_t* const index) { +#ifdef ZIX_PATREE_LINEAR_SEARCH for (n_edges_t i = 0; i < n->num_children; ++i) { - if (n->children[i].first_char == c) { + if (n->children[i] == c) { *index = i; return true; } } return false; +#else + *index = zix_patree_lower_bound(n->children, n->num_children, c); + return *index < n->num_children && n->children[*index] == c; +#endif } -static void +#ifndef NDEBUG +ZIX_PRIVATE bool +patree_node_check(const ZixPatreeNode* const node) +{ + for (n_edges_t i = 0; i < n->num_children; ++i) { + assert(n->children[i] != '\0'); + assert(*zix_patree_get_child_ptr(n, i) != NULL); + } + for (n_edges_t i = 0; i < child->num_children; ++i) { + assert(child->children[i] != '\0'); + assert(*zix_patree_get_child_ptr(child, i) != NULL); + } + return true; +} +#endif + +ZIX_PRIVATE void patree_add_edge(ZixPatreeNode** n_ptr, const char* str, - const char* first) + const char* first, + const char* last) { ZixPatreeNode* n = *n_ptr; - const char* last = first; - for (; *last; ++last) ; - --last; - - /* Interesting performance note: building a sorted array here makes - the search considerably slower, regardless of whether binary search - or the existing search algorithm is used. I suppose moving things - around blows the cache for child->children which trumps everything. - */ assert(last >= first); ZixPatreeNode* const child = realloc_node(NULL, 0); @@ -170,83 +229,147 @@ patree_add_edge(ZixPatreeNode** n_ptr, child->str = str; child->num_children = 0; + n = realloc_node(n, n->num_children + 1); +#ifdef ZIX_PATREE_LINEAR_SEARCH + memmove(n->children + zix_patree_pad(n->num_children + 1), + n->children + zix_patree_pad(n->num_children), + n->num_children * sizeof(ZixPatreeNode*)); ++n->num_children; - n = realloc_node(n, n->num_children); - n->children[n->num_children - 1].first_char = first[0]; - n->children[n->num_children - 1].child = child; + n->children[n->num_children - 1] = first[0]; + *zix_patree_get_child_ptr(n, n->num_children - 1) = child; +#else + const n_edges_t l = zix_patree_lower_bound(n->children, n->num_children, first[0]); + ZixPatreeNode* m = malloc(zix_patree_node_size(n->num_children + 1)); + m->label_first = n->label_first; + m->label_last = n->label_last; + m->str = n->str; + m->num_children = n->num_children + 1; + + memcpy(m->children, n->children, l); + m->children[l] = first[0]; + memcpy(m->children + l + 1, n->children + l, n->num_children - l); + + memcpy(zix_patree_get_child_ptr(m, 0), + zix_patree_get_child_ptr(n, 0), + l * sizeof(ZixPatreeNode*)); + *zix_patree_get_child_ptr(m, l) = child; + memcpy(zix_patree_get_child_ptr(m, l + 1), + zix_patree_get_child_ptr(n, l), + (n->num_children - l) * sizeof(ZixPatreeNode*)); + free(n); + n = m; +#endif *n_ptr = n; - for (n_edges_t i = 0; i < n->num_children; ++i) { - assert(n->children[i].first_char != '\0'); - } - for (n_edges_t i = 0; i < child->num_children; ++i) { - assert(child->children[i].first_char != '\0'); - } + assert(zix_patree_node_check(n)); + assert(zix_patree_node_check(child)); } /* Split an outgoing edge (to a child) at the given index */ -static void +ZIX_PRIVATE void patree_split_edge(ZixPatreeNode** child_ptr, size_t index) { ZixPatreeNode* child = *child_ptr; - const size_t grandchild_size = sizeof(ZixPatreeNode) - + (child->num_children * sizeof(ZixChildRef)); + const size_t grandchild_size = zix_patree_node_size(child->num_children); ZixPatreeNode* const grandchild = realloc_node(NULL, child->num_children); memcpy(grandchild, child, grandchild_size); grandchild->label_first = child->label_first + index; - child = realloc_node(child, 1); - child->children[0].first_char = grandchild->label_first[0]; - child->children[0].child = grandchild; - child->label_last = child->label_first + (index - 1); // chop end - - child->num_children = 1; - - child->str = NULL; + child = realloc_node(child, 1); + child->num_children = 1; + child->children[0] = grandchild->label_first[0]; + *zix_patree_get_child_ptr(child, 0) = grandchild; + child->label_last = child->label_first + (index - 1); + child->str = NULL; *child_ptr = child; - for (n_edges_t i = 0; i < child->num_children; ++i) { - assert(child->children[i].first_char != '\0'); + assert(zix_patree_node_check(n)); + assert(zix_patree_node_check(child)); +} + +ZIX_PRIVATE int +change_index_c(const char* a, const char* b, size_t len) +{ + for (size_t i = 0; i < len; ++i) { + if (a[i] != b[i]) { + return i; + } } - for (n_edges_t i = 0; i < grandchild->num_children; ++i) { - assert(grandchild->children[i].first_char != '\0'); + return len; +} + +#ifdef __SSE4_2__ +ZIX_PRIVATE int +change_index_sse(const char* a, const char* b, const size_t len) +{ + const size_t veclen = len & ~(sizeof(__m128i) - 1); + size_t i = 0; + for (; i < veclen; i += sizeof(__m128i)) { + const __m128i r = _mm_loadu_si128((const __m128i*)(a + i)); + const __m128i s = _mm_loadu_si128((const __m128i*)(b + i)); + const int index = _mm_cmpistri( + r, s, _SIDD_SBYTE_OPS|_SIDD_CMP_EQUAL_EACH|_SIDD_NEGATIVE_POLARITY); + + if (index != sizeof(__m128i)) { + assert(i + index == change_index_c(a, b, len)); + return i + index; + } } + + const __m128i r = _mm_loadu_si128((const __m128i*)(a + i)); + const __m128i s = _mm_loadu_si128((const __m128i*)(b + i)); + const size_t l = len - i; + const int index = _mm_cmpestri( + r, l, s, l, + _SIDD_SBYTE_OPS|_SIDD_CMP_EQUAL_EACH|_SIDD_MASKED_NEGATIVE_POLARITY); + + assert(i + index == change_index_c(a, b, len)); + return i + index; +} +#endif + +ZIX_PRIVATE int +zix_patree_change_index(const char* a, const char* b, const size_t len) +{ +#ifdef __SSE4_2__ + return change_index_sse(a, b, len); +#else + return change_index_c(a, b, len); +#endif } -static ZixStatus -patree_insert_internal(ZixPatreeNode** n_ptr, const char* str, const char* first) +ZIX_PRIVATE ZixStatus +patree_insert_internal(ZixPatreeNode** n_ptr, + const char* str, + const char* first, + const size_t len) { ZixPatreeNode* n = *n_ptr; n_edges_t child_i; if (patree_find_edge(n, *first, &child_i)) { - ZixPatreeNode** child_ptr = &n->children[child_i].child; + ZixPatreeNode** child_ptr = zix_patree_get_child_ptr(n, child_i); ZixPatreeNode* child = *child_ptr; const char* const label_first = child->label_first; const char* const label_last = child->label_last; - size_t split_i = 0; const size_t label_len = label_last - label_first + 1; - - while (first[split_i] && split_i < label_len - && first[split_i] == label_first[split_i]) { - ++split_i; - } + const int split_i = zix_patree_change_index(first, label_first, label_len); if (first[split_i]) { if (split_i == label_len) { - return patree_insert_internal(child_ptr, str, first + label_len); + return patree_insert_internal(child_ptr, str, first + label_len, len); } else { patree_split_edge(child_ptr, split_i); - return patree_insert_internal(child_ptr, str, first + split_i); + return patree_insert_internal(child_ptr, str, first + split_i, len); } } else if (label_len != split_i) { patree_split_edge(child_ptr, split_i); if (first[split_i]) { - patree_add_edge(child_ptr, str, first + split_i); + patree_add_edge(child_ptr, str, first + split_i, str + len - 1); } else { assert(!(*child_ptr)->str); (*child_ptr)->str = str; @@ -258,55 +381,21 @@ patree_insert_internal(ZixPatreeNode** n_ptr, const char* str, const char* first return ZIX_STATUS_EXISTS; } } else { - patree_add_edge(n_ptr, str, first); + patree_add_edge(n_ptr, str, first, str + len - 1); } return ZIX_STATUS_SUCCESS; } -ZIX_API -ZixStatus -zix_patree_insert(ZixPatree* t, const char* str) -{ - return patree_insert_internal(&t->root, str, str); -} - -static inline int -change_index_c(const char* a, const char* b, size_t len) -{ - for (size_t i = 0; i < len; ++i) { - if (a[i] != b[i]) { - return i; - } - } - return len; -} - -#ifdef __SSE4_2__ -static inline int -change_index_sse(const char* a, const char* b, const size_t len) +ZIX_API ZixStatus +zix_patree_insert(ZixPatree* t, const char* str, size_t len) { - for (size_t i = 0; i < len; i += sizeof(__m128i)) { - const __m128i r = _mm_loadu_si128((const __m128i*)(a + i)); - const __m128i* s = (const __m128i*)(b + i); - const int index = _mm_cmpistri( - r, *s, _SIDD_SBYTE_OPS|_SIDD_CMP_EQUAL_EACH|_SIDD_NEGATIVE_POLARITY); - - if (index != sizeof(__m128i)) { - size_t ret = i + index; - if (ret > len) { - ret = len; - } - return ret; - } - } + assert(strlen(str) == len); - return len; + return patree_insert_internal(&t->root, str, str, len); } -#endif -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_patree_find(const ZixPatree* t, const char* const str, const char** match) { ZixPatreeNode* n = t->root; @@ -316,40 +405,20 @@ zix_patree_find(const ZixPatree* t, const char* const str, const char** match) while (patree_find_edge(n, p[0], &child_i)) { assert(child_i <= n->num_children); - ZixPatreeNode* const child = n->children[child_i].child; - const char* const child_str = child->str; + ZixPatreeNode* const child = *zix_patree_get_child_ptr(n, child_i); /* Prefix compare search string and label */ - const char* l = child->label_first; - const char* const l_end = child->label_last; - const size_t len = l_end - l + 1; -#ifdef __SSE4_2__ - int change_index; - if (len >= sizeof(__m128i)) { - change_index = change_index_sse(p, l, len); - } else { - change_index = change_index_c(p, l, len); - } -#else - int change_index = change_index_c(p, l, len); -#endif + const char* const l = child->label_first; + const size_t len = child->label_last - l + 1; + const int change_index = zix_patree_change_index(p, l, len); - l += change_index; p += change_index; -#if 0 - while (l <= l_end) { - if (*l++ != *p++) { - return ZIX_STATUS_NOT_FOUND; - } - } -#endif - if (!*p) { /* Reached end of search string */ - if (l == l_end + 1) { + if (l + change_index == child->label_last + 1) { /* Reached end of label string as well, match */ - *match = child_str; + *match = child->str; return *match ? ZIX_STATUS_SUCCESS : ZIX_STATUS_NOT_FOUND; } else { /* Label string is longer, no match (prefix match) */ @@ -362,7 +431,7 @@ zix_patree_find(const ZixPatree* t, const char* const str, const char** match) return ZIX_STATUS_NOT_FOUND; } else { /* Match at this node, continue search downwards (or match) */ - n = child; + n = child; } } } diff --git a/zix/patree.h b/zix/patree.h index c89bc9b..35436c8 100644 --- a/zix/patree.h +++ b/zix/patree.h @@ -31,36 +31,31 @@ typedef struct _ZixPatree ZixPatree; /** Construct a new Patree. */ -ZIX_API -ZixPatree* +ZIX_API ZixPatree* zix_patree_new(void); /** Destroy `t`. */ -ZIX_API -void +ZIX_API void zix_patree_free(ZixPatree* t); /** Print a DOT description of `t` to `fd`. */ -ZIX_API -void +ZIX_API void zix_patree_print_dot(const ZixPatree* t, FILE* fd); /** Insert `str` into `t`. */ -ZIX_API -ZixStatus -zix_patree_insert(ZixPatree* t, const char* str); +ZIX_API ZixStatus +zix_patree_insert(ZixPatree* t, const char* str, size_t len); /** Search for `str` in `t`. */ -ZIX_API -ZixStatus +ZIX_API ZixStatus zix_patree_find(const ZixPatree* t, const char* str, const char** match); /** |