diff options
-rw-r--r-- | benchmark/dict_bench.c | 90 | ||||
-rw-r--r-- | test/ampatree_test.c | 117 | ||||
-rw-r--r-- | test/patree_test.c | 117 | ||||
-rw-r--r-- | test/trie_test.c | 117 | ||||
-rw-r--r-- | wscript | 8 | ||||
-rw-r--r-- | zix/ampatree.c | 321 | ||||
-rw-r--r-- | zix/ampatree.h | 68 | ||||
-rw-r--r-- | zix/patree.c | 347 | ||||
-rw-r--r-- | zix/patree.h | 68 | ||||
-rw-r--r-- | zix/trie.c | 353 | ||||
-rw-r--r-- | zix/trie.h | 68 | ||||
-rw-r--r-- | zix/trie_util.h | 127 |
12 files changed, 3 insertions, 1798 deletions
diff --git a/benchmark/dict_bench.c b/benchmark/dict_bench.c index e26f88a..3cb2f0d 100644 --- a/benchmark/dict_bench.c +++ b/benchmark/dict_bench.c @@ -16,12 +16,9 @@ #include "bench.h" -#include "zix/ampatree.h" #include "zix/chunk.h" #include "zix/common.h" #include "zix/hash.h" -#include "zix/patree.h" -#include "zix/trie.h" #include <glib.h> @@ -97,14 +94,11 @@ main(int argc, char** argv) FILE* insert_dat = fopen("dict_insert.txt", "w"); FILE* search_dat = fopen("dict_search.txt", "w"); - fprintf(insert_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixTrie\tZixAMPatree\n"); - fprintf(search_dat, "# n\tGHashTable\tZixHash\tZixPatree\tZixTrie\tZixAMPatree\n"); + fprintf(insert_dat, "# n\tGHashTable\tZixHash\n"); + fprintf(search_dat, "# n\tGHashTable\tZixHash\n"); for (size_t n = n_strings / 16; n <= n_strings; n *= 2) { printf("Benchmarking n = %zu\n", n); - ZixPatree* patree = zix_patree_new(); - ZixAMPatree* ampatree = zix_ampatree_new(); - ZixTrie* trie = zix_trie_new(); GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal); ZixHash* zhash = zix_hash_new((ZixHashFunc)zix_chunk_hash, (ZixEqualFunc)zix_chunk_equal, @@ -130,36 +124,6 @@ main(int argc, char** argv) return test_fail("Failed to insert `%s'\n", strings[i]); } } - fprintf(insert_dat, "\t%lf", bench_end(&insert_start)); - - // ZixPatree - insert_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i], lengths[i]); - if (st && st != ZIX_STATUS_EXISTS) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } - fprintf(insert_dat, "\t%lf", bench_end(&insert_start)); - - // ZixTrie - insert_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_trie_insert(trie, strings[i], lengths[i]); - if (st && st != ZIX_STATUS_EXISTS) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } - fprintf(insert_dat, "\t%lf", bench_end(&insert_start)); - - // ZixAMPatree - insert_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - ZixStatus st = zix_ampatree_insert(ampatree, strings[i], lengths[i]); - if (st && st != ZIX_STATUS_EXISTS) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } fprintf(insert_dat, "\t%lf\n", bench_end(&insert_start)); // Benchmark search @@ -193,58 +157,8 @@ main(int argc, char** argv) (const char*)match->buf); } } - fprintf(search_dat, "\t%lf", bench_end(&search_start)); - - // ZixPatree - srand(seed); - search_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - const size_t index = rand() % n; - const char* match = NULL; - if (zix_patree_find(patree, strings[index], &match)) { - return test_fail("Patree: Failed to find `%s'\n", strings[index]); - } - if (strcmp(match, strings[index])) { - return test_fail("Patree: Bad match for `%s'\n", strings[index]); - } - } - fprintf(search_dat, "\t%lf", bench_end(&search_start)); - - // ZixTrie - srand(seed); - search_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - const size_t index = rand() % n; - const char* match = NULL; - if (zix_trie_find(trie, strings[index], &match)) { - return test_fail("Trie: Failed to find `%s'\n", strings[index]); - } - if (strcmp(match, strings[index])) { - return test_fail("Trie: Bad match `%s' for `%s'\n", - match, strings[index]); - } - } - fprintf(search_dat, "\t%lf", bench_end(&search_start)); - - // ZixAMPatree - srand(seed); - search_start = bench_start(); - for (size_t i = 0; i < n; ++i) { - const size_t index = rand() % n; - const char* match = NULL; - if (zix_ampatree_find(ampatree, strings[index], &match)) { - return test_fail("AMPatree: Failed to find `%s'\n", strings[index]); - } - if (strcmp(match, strings[index])) { - return test_fail("AMPatree: Bad match `%s' for `%s'\n", - match, strings[index]); - } - } fprintf(search_dat, "\t%lf\n", bench_end(&search_start)); - zix_patree_free(patree); - zix_ampatree_free(ampatree); - zix_trie_free(trie); zix_hash_free(zhash); g_hash_table_unref(hash); } diff --git a/test/ampatree_test.c b/test/ampatree_test.c deleted file mode 100644 index bc7d161..0000000 --- a/test/ampatree_test.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "zix/ampatree.h" -#include "zix/common.h" - -#include <stdarg.h> -#include <stdio.h> -#include <string.h> - -static const char* strings[] = { - "http://example.org/foo", - "http://example.org/bar", - "http://example.org/baz", - "http://example.net/foo", - "http://example.net/bar", - "http://example.net/baz", - "http://drobilla.net/", - "http://drobilla.net/software/zix", - "http://www.gbengasesan.com/blog", - "http://www.gbengasesan.com", - "http://echo.jpl.nasa.gov/~lance/delta_v/delta_v.rendezvous.html", - "http://echo.jpl.nasa.gov/asteroids/1986da/1986DA.html", - "http://echo.jpl.nasa.gov/", - "http://echo.jpl.nasa.gov/asteroids/1620_Geographos/geographos.html", - "http://echo.jpl.nasa.gov/~ostro/KY26/", - "http://echo.jpl.nasa.gov/~ostro/KY26/JPL_press_release.text", - "http://echo.jpl.nasa.gov", - "http://echo.jpl.nasa.gov/asteroids/4179_Toutatis/toutatis.html", - "http://echo.jpl.nasa.gov/asteroids/4769_Castalia/cast01.html", - "http://echo.jpl.nasa.gov/publications/review_abs.html", -}; - -ZIX_LOG_FUNC(1, 2) -static int -test_fail(const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - fprintf(stderr, "error: "); - vfprintf(stderr, fmt, args); - va_end(args); - return 1; -} - -int -main(void) -{ - ZixAMPatree* patree = zix_ampatree_new(); - - const size_t n_strings = sizeof(strings) / sizeof(char*); - - // Insert each string - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_ampatree_insert(patree, strings[i], strlen(strings[i])); - if (st) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } - - FILE* dot_file = fopen("ampatree.dot", "w"); - zix_ampatree_print_dot(patree, dot_file); - fclose(dot_file); - - // Attempt to insert each string again - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_ampatree_insert(patree, strings[i], strlen(strings[i])); - if (st != ZIX_STATUS_EXISTS) { - return test_fail("Double inserted `%s'\n", strings[i]); - } - } - - // Search for each string - for (size_t i = 0; i < n_strings; ++i) { - const char* match = NULL; - ZixStatus st = zix_ampatree_find(patree, strings[i], &match); - if (st) { - return test_fail("Failed to find `%s'\n", strings[i]); - } - if (match != strings[i]) { - return test_fail("Bad match for `%s'\n", strings[i]); - } - } - - // Try some false matches - const char* not_indexed[] = { - "ftp://example.org/not-there-at-all", - "http://example.org/foobar", - "http://", - "http://otherdomain.com" - }; - const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); - for (size_t i = 0; i < n_not_indexed; ++i) { - const char* match = NULL; - ZixStatus st = zix_ampatree_find(patree, not_indexed[i], &match); - if (st != ZIX_STATUS_NOT_FOUND) { - return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); - } - } - - zix_ampatree_free(patree); - - return 0; -} diff --git a/test/patree_test.c b/test/patree_test.c deleted file mode 100644 index d7def0c..0000000 --- a/test/patree_test.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright 2011 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "zix/common.h" -#include "zix/patree.h" - -#include <stdarg.h> -#include <stdio.h> -#include <string.h> - -static const char* strings[] = { - "http://example.org/foo", - "http://example.org/bar", - "http://example.org/baz", - "http://example.net/foo", - "http://example.net/bar", - "http://example.net/baz", - "http://drobilla.net/", - "http://drobilla.net/software/zix", - "http://www.gbengasesan.com/blog", - "http://www.gbengasesan.com", - "http://echo.jpl.nasa.gov/~lance/delta_v/delta_v.rendezvous.html", - "http://echo.jpl.nasa.gov/asteroids/1986da/1986DA.html", - "http://echo.jpl.nasa.gov/", - "http://echo.jpl.nasa.gov/asteroids/1620_Geographos/geographos.html", - "http://echo.jpl.nasa.gov/~ostro/KY26/", - "http://echo.jpl.nasa.gov/~ostro/KY26/JPL_press_release.text", - "http://echo.jpl.nasa.gov", - "http://echo.jpl.nasa.gov/asteroids/4179_Toutatis/toutatis.html", - "http://echo.jpl.nasa.gov/asteroids/4769_Castalia/cast01.html", - "http://echo.jpl.nasa.gov/publications/review_abs.html", -}; - -ZIX_LOG_FUNC(1, 2) -static int -test_fail(const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - fprintf(stderr, "error: "); - vfprintf(stderr, fmt, args); - va_end(args); - return 1; -} - -int -main(void) -{ - ZixPatree* patree = zix_patree_new(); - - const size_t n_strings = sizeof(strings) / sizeof(char*); - - // Insert each string - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i], strlen(strings[i])); - if (st) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } - - FILE* dot_file = fopen("patree.dot", "w"); - zix_patree_print_dot(patree, dot_file); - fclose(dot_file); - - // Attempt to insert each string again - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_patree_insert(patree, strings[i], strlen(strings[i])); - if (st != ZIX_STATUS_EXISTS) { - return test_fail("Double inserted `%s'\n", strings[i]); - } - } - - // Search for each string - for (size_t i = 0; i < n_strings; ++i) { - const char* match = NULL; - ZixStatus st = zix_patree_find(patree, strings[i], &match); - if (st) { - return test_fail("Failed to find `%s'\n", strings[i]); - } - if (match != strings[i]) { - return test_fail("Bad match for `%s'\n", strings[i]); - } - } - - // Try some false matches - const char* not_indexed[] = { - "ftp://example.org/not-there-at-all", - "http://example.org/foobar", - "http://", - "http://otherdomain.com" - }; - const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); - for (size_t i = 0; i < n_not_indexed; ++i) { - const char* match = NULL; - ZixStatus st = zix_patree_find(patree, not_indexed[i], &match); - if (st != ZIX_STATUS_NOT_FOUND) { - return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); - } - } - - zix_patree_free(patree); - - return 0; -} diff --git a/test/trie_test.c b/test/trie_test.c deleted file mode 100644 index f4b3eb2..0000000 --- a/test/trie_test.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#include "zix/common.h" -#include "zix/trie.h" - -#include <stdarg.h> -#include <stdio.h> -#include <string.h> - -static const char* strings[] = { - "http://example.org/foo", - "http://example.org/bar", - "http://example.org/baz", - "http://example.net/foo", - "http://example.net/bar", - "http://example.net/baz", - "http://drobilla.net/", - "http://drobilla.net/software/zix", - "http://www.gbengasesan.com/blog", - "http://www.gbengasesan.com", - "http://echo.jpl.nasa.gov/~lance/delta_v/delta_v.rendezvous.html", - "http://echo.jpl.nasa.gov/asteroids/1986da/1986DA.html", - "http://echo.jpl.nasa.gov/", - "http://echo.jpl.nasa.gov/asteroids/1620_Geographos/geographos.html", - "http://echo.jpl.nasa.gov/~ostro/KY26/", - "http://echo.jpl.nasa.gov/~ostro/KY26/JPL_press_release.text", - "http://echo.jpl.nasa.gov", - "http://echo.jpl.nasa.gov/asteroids/4179_Toutatis/toutatis.html", - "http://echo.jpl.nasa.gov/asteroids/4769_Castalia/cast01.html", - "http://echo.jpl.nasa.gov/publications/review_abs.html", -}; - -ZIX_LOG_FUNC(1, 2) -static int -test_fail(const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - fprintf(stderr, "error: "); - vfprintf(stderr, fmt, args); - va_end(args); - return 1; -} - -int -main(void) -{ - ZixTrie* trie = zix_trie_new(); - - const size_t n_strings = sizeof(strings) / sizeof(char*); - - // Insert each string - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_trie_insert(trie, strings[i], strlen(strings[i])); - if (st) { - return test_fail("Failed to insert `%s'\n", strings[i]); - } - } - - FILE* dot_file = fopen("trie.dot", "w"); - zix_trie_print_dot(trie, dot_file); - fclose(dot_file); - - // Attempt to insert each string again - for (size_t i = 0; i < n_strings; ++i) { - ZixStatus st = zix_trie_insert(trie, strings[i], strlen(strings[i])); - if (st != ZIX_STATUS_EXISTS) { - return test_fail("Double inserted `%s'\n", strings[i]); - } - } - - // Search for each string - for (size_t i = 0; i < n_strings; ++i) { - const char* match = NULL; - ZixStatus st = zix_trie_find(trie, strings[i], &match); - if (st) { - return test_fail("Failed to find `%s'\n", strings[i]); - } - if (match != strings[i]) { - return test_fail("Bad match for `%s'\n", strings[i]); - } - } - - // Try some false matches - const char* not_indexed[] = { - "ftp://example.org/not-there-at-all", - "http://example.org/foobar", - "http://", - "http://otherdomain.com" - }; - const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); - for (size_t i = 0; i < n_not_indexed; ++i) { - const char* match = NULL; - ZixStatus st = zix_trie_find(trie, not_indexed[i], &match); - if (st != ZIX_STATUS_NOT_FOUND) { - return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); - } - } - - zix_trie_free(trie); - - return 0; -} @@ -122,23 +122,18 @@ sources = [ 'zix/chunk.c', 'zix/digest.c', 'zix/hash.c', - 'zix/patree.c', - 'zix/trie.c', 'zix/ring.c', 'zix/sorted_array.c', 'zix/strindex.c', 'zix/tree.c', 'zix/btree.c', 'zix/bitset.c', - 'zix/ampatree.c' ] tests = [ 'digest_test', 'hash_test', 'inline_test', - 'patree_test', - 'trie_test', 'ring_test', 'sem_test', 'sorted_array_test', @@ -146,7 +141,6 @@ tests = [ 'tree_test', 'btree_test', 'bitset_test', - 'ampatree_test' ] @@ -325,7 +319,7 @@ def bench(ctx): # Benchmark trees - subprocess.call(['benchmark/tree_bench', '400000', '6400000']) + subprocess.call(['benchmark/tree_bench', '40000', '640000']) subprocess.call(['../plot.py', 'tree_bench.svg', 'tree_insert.txt', 'tree_search.txt', diff --git a/zix/ampatree.c b/zix/ampatree.c deleted file mode 100644 index bb67d06..0000000 --- a/zix/ampatree.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#define _XOPEN_SOURCE 500 - -#include "zix/ampatree.h" - -#include "zix/bitset.h" -#include "zix/common.h" -#include "zix/trie_util.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define ZIX_AMPATREE_BITS 256 -#define ZIX_AMPATREE_ELEMS (ZIX_BITSET_ELEMS(ZIX_AMPATREE_BITS)) - -typedef size_t n_edges_t; - -typedef struct ZixAMPatreeNode { - const uint8_t* label_first; /* Start of incoming label */ - const uint8_t* label_last; /* End of incoming label */ - const uint8_t* str; /* String stored at this node */ - n_edges_t n_children; /* Number of outgoing edges */ - - ZixBitsetTally tally[ZIX_AMPATREE_ELEMS]; - ZixBitset bits[ZIX_AMPATREE_ELEMS]; - - struct ZixAMPatreeNode* children[]; -} ZixAMPatreeNode; - -struct _ZixAMPatree { - ZixAMPatreeNode* root; /* Root of the tree */ -}; - -ZIX_PRIVATE ZixAMPatreeNode** -zix_ampatree_get_child_ptr(ZixAMPatreeNode* node, n_edges_t index) -{ - return node->children + index; -} - -ZIX_PRIVATE void -zix_ampatree_print_rec(ZixAMPatreeNode* node, FILE* fd) -{ - if (node->label_first) { - size_t edge_label_len = node->label_last - node->label_first + 1; - char* edge_label = (char*)calloc(1, edge_label_len + 1); - strncpy(edge_label, (const char*)node->label_first, edge_label_len); - fprintf(fd, "\t\"%p\" [label=<" - "<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">" - "<TR><TD>%s</TD></TR>", (void*)node, edge_label); - free(edge_label); - if (node->str) { - fprintf(fd, "<TR><TD>\"%s\"</TD></TR>", node->str); - } - fprintf(fd, "</TABLE>>,shape=\"plaintext\"] ;\n"); - } else { - fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node); - } - - for (n_edges_t i = 0; i < node->n_children; ++i) { - ZixAMPatreeNode* const child = *zix_ampatree_get_child_ptr(node, i); - zix_ampatree_print_rec(child, fd); - fprintf(fd, "\t\"%p\" -> \"%p\" ;\n", (void*)node, (void*)child); - } -} - -void -zix_ampatree_print_dot(const ZixAMPatree* t, FILE* fd) -{ - fprintf(fd, "digraph Patree { \n"); - zix_ampatree_print_rec(t->root, fd); - fprintf(fd, "}\n"); -} - -ZIX_PRIVATE size_t -zix_ampatree_node_size(n_edges_t n_children) -{ - return sizeof(ZixAMPatreeNode) + (n_children * sizeof(ZixAMPatreeNode*)); -} - -ZIX_PRIVATE ZixAMPatreeNode* -realloc_node(ZixAMPatreeNode* n, n_edges_t n_children) -{ - return (ZixAMPatreeNode*)realloc(n, zix_ampatree_node_size(n_children)); -} - -ZixAMPatree* -zix_ampatree_new(void) -{ - ZixAMPatree* t = (ZixAMPatree*)calloc(1, sizeof(ZixAMPatree)); - - t->root = (ZixAMPatreeNode*)calloc(1, sizeof(ZixAMPatreeNode)); - - return t; -} - -ZIX_PRIVATE void -zix_ampatree_free_rec(ZixAMPatreeNode* n) -{ - if (n) { - for (n_edges_t i = 0; i < n->n_children; ++i) { - zix_ampatree_free_rec(*zix_ampatree_get_child_ptr(n, i)); - } - free(n); - } -} - -void -zix_ampatree_free(ZixAMPatree* t) -{ - zix_ampatree_free_rec(t->root); - free(t); -} - -ZIX_PRIVATE inline bool -patree_is_leaf(const ZixAMPatreeNode* n) -{ - return n->n_children == 0; -} - -ZIX_PRIVATE size_t -ampatree_find_edge(const ZixAMPatreeNode* n, const uint8_t c) -{ - return zix_bitset_count_up_to_if(n->bits, n->tally, c); -} - -#ifndef NDEBUG -ZIX_PRIVATE bool -zix_ampatree_node_check(ZixAMPatreeNode* const n) -{ - for (n_edges_t i = 0; i < n->n_children; ++i) { - assert(*zix_ampatree_get_child_ptr(n, i) != NULL); - } - return true; -} -#endif - -ZIX_PRIVATE void -patree_add_edge(ZixAMPatreeNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const uint8_t* last) -{ - ZixAMPatreeNode* n = *n_ptr; - - assert(last >= first); - - ZixAMPatreeNode* const child = realloc_node(NULL, 0); - zix_bitset_clear(child->bits, child->tally, ZIX_AMPATREE_BITS); - child->label_first = first; - child->label_last = last; - child->str = str; - child->n_children = 0; - - n = (ZixAMPatreeNode*)realloc_node(n, n->n_children + 1u); - ZixAMPatreeNode* m = (ZixAMPatreeNode*)malloc(zix_ampatree_node_size(n->n_children + 1)); - m->label_first = n->label_first; - m->label_last = n->label_last; - m->str = n->str; - m->n_children = n->n_children + 1; - - memcpy(m->bits, n->bits, ZIX_AMPATREE_ELEMS * sizeof(ZixBitset)); - memcpy(m->tally, n->tally, ZIX_AMPATREE_ELEMS * sizeof(ZixBitsetTally)); - zix_bitset_set(m->bits, m->tally, first[0]); - - const n_edges_t l = ampatree_find_edge(m, first[0]); - memcpy(zix_ampatree_get_child_ptr(m, 0), - zix_ampatree_get_child_ptr(n, 0), - l * sizeof(ZixAMPatreeNode*)); - *zix_ampatree_get_child_ptr(m, l) = child; - memcpy(zix_ampatree_get_child_ptr(m, l + 1), - zix_ampatree_get_child_ptr(n, l), - (n->n_children - l) * sizeof(ZixAMPatreeNode*)); - free(n); - n = m; - - *n_ptr = n; - - assert(zix_ampatree_node_check(n)); - assert(zix_ampatree_node_check(child)); -} - -/* Split an outgoing edge (to a child) at the given index */ -ZIX_PRIVATE void -patree_split_edge(ZixAMPatreeNode** child_ptr, - size_t index) -{ - ZixAMPatreeNode* child = *child_ptr; - - const size_t grandchild_size = zix_ampatree_node_size(child->n_children); - - ZixAMPatreeNode* const grandchild = realloc_node(NULL, child->n_children); - memcpy(grandchild, child, grandchild_size); - grandchild->label_first = child->label_first + index; - - child = realloc_node(child, 1); - child->n_children = 1; - child->label_last = child->label_first + (index - 1); - child->str = NULL; - *zix_ampatree_get_child_ptr(child, 0) = grandchild; - zix_bitset_clear(child->bits, child->tally, ZIX_AMPATREE_BITS); - zix_bitset_set(child->bits, child->tally, grandchild->label_first[0]); - - *child_ptr = child; - - assert(zix_ampatree_node_check(child)); - assert(zix_ampatree_node_check(grandchild)); -} - -ZIX_PRIVATE ZixStatus -patree_insert_internal(ZixAMPatreeNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const size_t len) -{ - ZixAMPatreeNode* n = *n_ptr; - const n_edges_t child_i = ampatree_find_edge(n, *first); - if (child_i != (n_edges_t)-1) { - ZixAMPatreeNode** child_ptr = zix_ampatree_get_child_ptr(n, child_i); - ZixAMPatreeNode* child = *child_ptr; - const uint8_t* const label_first = child->label_first; - const uint8_t* const label_last = child->label_last; - const size_t label_len = label_last - label_first + 1; - const size_t split_i = zix_trie_change_index(first, label_first, label_len); - - if (first[split_i]) { - if (split_i == label_len) { - return patree_insert_internal(child_ptr, str, first + label_len, len); - } else { - patree_split_edge(child_ptr, split_i); - return patree_insert_internal(child_ptr, str, first + split_i, len); - } - } else if (label_len != split_i) { - patree_split_edge(child_ptr, split_i); - if (first[split_i]) { - patree_add_edge(child_ptr, str, first + split_i, str + len - 1); - } else { - assert(!(*child_ptr)->str); - (*child_ptr)->str = str; - } - return ZIX_STATUS_SUCCESS; - } else if (label_first[split_i] && !child->str) { - child->str = str; - } else { - return ZIX_STATUS_EXISTS; - } - } else { - patree_add_edge(n_ptr, str, first, str + len - 1); - } - - return ZIX_STATUS_SUCCESS; -} - -ZixStatus -zix_ampatree_insert(ZixAMPatree* t, const char* str, size_t len) -{ - assert(strlen(str) == len); - - const uint8_t* ustr = (const uint8_t*)str; - return patree_insert_internal(&t->root, ustr, ustr, len); -} - -ZixStatus -zix_ampatree_find(const ZixAMPatree* t, const char* const str, const char** match) -{ - ZixAMPatreeNode* n = t->root; - const uint8_t* p = (const uint8_t*)str; - n_edges_t child_i; - while ((child_i = ampatree_find_edge(n, p[0])) != (n_edges_t)-1) { - assert(child_i <= n->n_children); - ZixAMPatreeNode* const child = *zix_ampatree_get_child_ptr(n, child_i); - - /* Prefix compare search string and label */ - const uint8_t* const l = child->label_first; - const size_t len = child->label_last - l + 1; - const size_t change_index = zix_trie_change_index(p, l, len); - - p += change_index; - - if (!*p) { - /* Reached end of search string */ - if (l + change_index == child->label_last + 1) { - /* Reached end of label string as well, match */ - *match = (const char*)child->str; - return *match ? ZIX_STATUS_SUCCESS : ZIX_STATUS_NOT_FOUND; - } else { - /* Label string is longer, no match (prefix match) */ - return ZIX_STATUS_NOT_FOUND; - } - } else { - /* Didn't reach end of search string */ - if (patree_is_leaf(n)) { - /* Hit leaf early, no match */ - return ZIX_STATUS_NOT_FOUND; - } else { - /* Match at this node, continue search downwards (or match) */ - n = child; - } - } - } - - return ZIX_STATUS_NOT_FOUND; -} diff --git a/zix/ampatree.h b/zix/ampatree.h deleted file mode 100644 index c7f7eb7..0000000 --- a/zix/ampatree.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef ZIX_AMPATREE_H -#define ZIX_AMPATREE_H - -#include "zix/common.h" - -#include <stdio.h> - -/** - @addtogroup zix - @{ - @name AMPatree - @{ -*/ - -typedef struct _ZixAMPatree ZixAMPatree; - -/** - Construct a new Patree. -*/ -ZIX_API ZixAMPatree* -zix_ampatree_new(void); - -/** - Destroy `t`. -*/ -ZIX_API void -zix_ampatree_free(ZixAMPatree* t); - -/** - Print a DOT description of `t` to `fd`. -*/ -ZIX_API void -zix_ampatree_print_dot(const ZixAMPatree* t, FILE* fd); - -/** - Insert `str` into `t`. -*/ -ZIX_API ZixStatus -zix_ampatree_insert(ZixAMPatree* t, const char* str, size_t len); - -/** - Search for `str` in `t`. -*/ -ZIX_API ZixStatus -zix_ampatree_find(const ZixAMPatree* t, const char* str, const char** match); - -/** - @} - @} -*/ - -#endif /* ZIX_AMPATREE_H */ diff --git a/zix/patree.c b/zix/patree.c deleted file mode 100644 index a7d7e45..0000000 --- a/zix/patree.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - Copyright 2011-2014 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#define _XOPEN_SOURCE 500 - -#include "zix/patree.h" - -#include "zix/common.h" -#include "zix/trie_util.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// #define ZIX_TRIE_LINEAR_SEARCH 1 - -typedef size_t n_edges_t; - -typedef struct { - const uint8_t* label_first; /* Start of incoming label */ - const uint8_t* label_last; /* End of incoming label */ - const uint8_t* str; /* String stored at this node */ - n_edges_t n_children; /* Number of outgoing edges */ - - /** Array of first child characters, followed by parallel array of pointers - to ZixPatreeNode pointers. */ - uint8_t children[]; -} ZixPatreeNode; - -struct _ZixPatree { - ZixPatreeNode* root; /* Root of the tree */ -}; - -/** Round `size` up to the next multiple of the word size. */ -ZIX_PRIVATE n_edges_t -zix_patree_pad(n_edges_t size) -{ - return (size + sizeof(void*) - 1) & (~(sizeof(void*) - 1)); -} - -ZIX_PRIVATE ZixPatreeNode** -zix_patree_get_children(ZixPatreeNode* node) -{ - return (ZixPatreeNode**)(node->children + - zix_patree_pad(node->n_children)); -} - -ZIX_PRIVATE ZixPatreeNode** -zix_patree_get_child_ptr(ZixPatreeNode* node, n_edges_t index) -{ - return zix_patree_get_children(node) + index; -} - -ZIX_PRIVATE void -zix_patree_print_rec(ZixPatreeNode* node, FILE* fd) -{ - if (node->label_first) { - size_t edge_label_len = node->label_last - node->label_first + 1; - char* edge_label = (char*)calloc(1, edge_label_len + 1); - strncpy(edge_label, (const char*)node->label_first, edge_label_len); - fprintf(fd, "\t\"%p\" [label=<" - "<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">" - "<TR><TD>%s</TD></TR>", (void*)node, edge_label); - free(edge_label); - if (node->str) { - fprintf(fd, "<TR><TD>\"%s\"</TD></TR>", node->str); - } - fprintf(fd, "</TABLE>>,shape=\"plaintext\"] ;\n"); - } else { - fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node); - } - - - for (n_edges_t i = 0; i < node->n_children; ++i) { - ZixPatreeNode* const child = *zix_patree_get_child_ptr(node, i); - zix_patree_print_rec(child, fd); - fprintf(fd, "\t\"%p\" -> \"%p\" ;\n", (void*)node, (void*)child); - } -} - -ZIX_API -void -zix_patree_print_dot(const ZixPatree* t, FILE* fd) -{ - fprintf(fd, "digraph Patree { \n"); - zix_patree_print_rec(t->root, fd); - fprintf(fd, "}\n"); -} - -ZIX_PRIVATE size_t -zix_patree_node_size(n_edges_t n_children) -{ - return (sizeof(ZixPatreeNode) + - zix_patree_pad(n_children) + - (n_children * sizeof(ZixPatreeNode*))); -} - -ZIX_PRIVATE ZixPatreeNode* -realloc_node(ZixPatreeNode* n, n_edges_t n_children) -{ - return (ZixPatreeNode*)realloc(n, zix_patree_node_size(n_children)); -} - -ZixPatree* -zix_patree_new(void) -{ - ZixPatree* t = (ZixPatree*)calloc(1, sizeof(ZixPatree)); - - t->root = (ZixPatreeNode*)calloc(1, sizeof(ZixPatreeNode)); - - return t; -} - -ZIX_PRIVATE void -zix_patree_free_rec(ZixPatreeNode* n) -{ - if (n) { - for (n_edges_t i = 0; i < n->n_children; ++i) { - zix_patree_free_rec(*zix_patree_get_child_ptr(n, i)); - } - free(n); - } -} - -void -zix_patree_free(ZixPatree* t) -{ - zix_patree_free_rec(t->root); - free(t); -} - -ZIX_PRIVATE inline bool -patree_is_leaf(const ZixPatreeNode* n) -{ - return n->n_children == 0; -} - -ZIX_PRIVATE bool -patree_find_edge(const ZixPatreeNode* n, const uint8_t c, n_edges_t* const index) -{ - *index = zix_trie_find_key(n->children, n->n_children, c); - return *index < n->n_children && n->children[*index] == c; -} - -#ifndef NDEBUG -ZIX_PRIVATE bool -zix_patree_node_check(ZixPatreeNode* const n) -{ - for (n_edges_t i = 0; i < n->n_children; ++i) { - assert(n->children[i] != '\0'); - assert(*zix_patree_get_child_ptr(n, i) != NULL); - } - return true; -} -#endif - -ZIX_PRIVATE void -patree_add_edge(ZixPatreeNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const uint8_t* last) -{ - ZixPatreeNode* n = *n_ptr; - - assert(last >= first); - - ZixPatreeNode* const child = realloc_node(NULL, 0); - child->label_first = first; - child->label_last = last; - child->str = str; - child->n_children = 0; - - n = realloc_node(n, n->n_children + 1u); -#ifdef ZIX_TRIE_LINEAR_SEARCH - memmove(n->children + zix_patree_pad(n->n_children + 1), - n->children + zix_patree_pad(n->n_children), - n->n_children * sizeof(ZixPatreeNode*)); - ++n->n_children; - n->children[n->n_children - 1] = first[0]; - *zix_patree_get_child_ptr(n, n->n_children - 1) = child; -#else - const n_edges_t l = zix_trie_find_key(n->children, n->n_children, first[0]); - ZixPatreeNode* m = (ZixPatreeNode*)malloc(zix_patree_node_size(n->n_children + 1)); - m->label_first = n->label_first; - m->label_last = n->label_last; - m->str = n->str; - m->n_children = n->n_children + 1; - - memcpy(m->children, n->children, l); - m->children[l] = first[0]; - memcpy(m->children + l + 1, n->children + l, n->n_children - l); - - memcpy(zix_patree_get_child_ptr(m, 0), - zix_patree_get_child_ptr(n, 0), - l * sizeof(ZixPatreeNode*)); - *zix_patree_get_child_ptr(m, l) = child; - memcpy(zix_patree_get_child_ptr(m, l + 1), - zix_patree_get_child_ptr(n, l), - (n->n_children - l) * sizeof(ZixPatreeNode*)); - free(n); - n = m; -#endif - - *n_ptr = n; - - assert(zix_patree_node_check(n)); - assert(zix_patree_node_check(child)); -} - -/* Split an outgoing edge (to a child) at the given index */ -ZIX_PRIVATE void -patree_split_edge(ZixPatreeNode** child_ptr, - size_t index) -{ - ZixPatreeNode* child = *child_ptr; - - const size_t grandchild_size = zix_patree_node_size(child->n_children); - - ZixPatreeNode* const grandchild = realloc_node(NULL, child->n_children); - memcpy(grandchild, child, grandchild_size); - grandchild->label_first = child->label_first + index; - - child = realloc_node(child, 1); - child->n_children = 1; - child->children[0] = grandchild->label_first[0]; - *zix_patree_get_child_ptr(child, 0) = grandchild; - child->label_last = child->label_first + (index - 1); - child->str = NULL; - - *child_ptr = child; - - assert(zix_patree_node_check(child)); - assert(zix_patree_node_check(grandchild)); -} - -ZIX_PRIVATE ZixStatus -patree_insert_internal(ZixPatreeNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const size_t len) -{ - ZixPatreeNode* n = *n_ptr; - n_edges_t child_i; - if (patree_find_edge(n, *first, &child_i)) { - ZixPatreeNode** child_ptr = zix_patree_get_child_ptr(n, child_i); - ZixPatreeNode* child = *child_ptr; - const uint8_t* const label_first = child->label_first; - const uint8_t* const label_last = child->label_last; - const size_t label_len = label_last - label_first + 1; - const size_t split_i = zix_trie_change_index(first, label_first, label_len); - - if (first[split_i]) { - if (split_i == label_len) { - return patree_insert_internal(child_ptr, str, first + label_len, len); - } else { - patree_split_edge(child_ptr, split_i); - return patree_insert_internal(child_ptr, str, first + split_i, len); - } - } else if (label_len != split_i) { - patree_split_edge(child_ptr, split_i); - if (first[split_i]) { - patree_add_edge(child_ptr, str, first + split_i, str + len - 1); - } else { - assert(!(*child_ptr)->str); - (*child_ptr)->str = str; - } - return ZIX_STATUS_SUCCESS; - } else if (label_first[split_i] && !child->str) { - child->str = str; - } else { - return ZIX_STATUS_EXISTS; - } - } else { - patree_add_edge(n_ptr, str, first, str + len - 1); - } - - return ZIX_STATUS_SUCCESS; -} - -ZixStatus -zix_patree_insert(ZixPatree* t, const char* str, size_t len) -{ - assert(strlen(str) == len); - - const uint8_t* ustr = (const uint8_t*)str; - return patree_insert_internal(&t->root, ustr, ustr, len); -} - -ZixStatus -zix_patree_find(const ZixPatree* t, const char* const str, const char** match) -{ - ZixPatreeNode* n = t->root; - n_edges_t child_i; - - const uint8_t* p = (const uint8_t*)str; - - while (patree_find_edge(n, p[0], &child_i)) { - assert(child_i <= n->n_children); - ZixPatreeNode* const child = *zix_patree_get_child_ptr(n, child_i); - - /* Prefix compare search string and label */ - const uint8_t* const l = child->label_first; - const size_t len = child->label_last - l + 1; - const size_t change_index = zix_trie_change_index(p, l, len); - - p += change_index; - - if (!*p) { - /* Reached end of search string */ - if (l + change_index == child->label_last + 1) { - /* Reached end of label string as well, match */ - *match = (const char*)child->str; - return *match ? ZIX_STATUS_SUCCESS : ZIX_STATUS_NOT_FOUND; - } else { - /* Label string is longer, no match (prefix match) */ - return ZIX_STATUS_NOT_FOUND; - } - } else { - /* Didn't reach end of search string */ - if (patree_is_leaf(n)) { - /* Hit leaf early, no match */ - return ZIX_STATUS_NOT_FOUND; - } else { - /* Match at this node, continue search downwards (or match) */ - n = child; - } - } - } - - return ZIX_STATUS_NOT_FOUND; -} diff --git a/zix/patree.h b/zix/patree.h deleted file mode 100644 index 406fbf0..0000000 --- a/zix/patree.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright 2011-2014 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef ZIX_PATREE_H -#define ZIX_PATREE_H - -#include "zix/common.h" - -#include <stdio.h> - -/** - @addtogroup zix - @{ - @name Patree - @{ -*/ - -typedef struct _ZixPatree ZixPatree; - -/** - Construct a new Patree. -*/ -ZIX_API ZixPatree* -zix_patree_new(void); - -/** - Destroy `t`. -*/ -ZIX_API void -zix_patree_free(ZixPatree* t); - -/** - Print a DOT description of `t` to `fd`. -*/ -ZIX_API void -zix_patree_print_dot(const ZixPatree* t, FILE* fd); - -/** - Insert `str` into `t`. -*/ -ZIX_API ZixStatus -zix_patree_insert(ZixPatree* t, const char* str, size_t len); - -/** - Search for `str` in `t`. -*/ -ZIX_API ZixStatus -zix_patree_find(const ZixPatree* t, const char* str, const char** match); - -/** - @} - @} -*/ - -#endif /* ZIX_PATREE_H */ diff --git a/zix/trie.c b/zix/trie.c deleted file mode 100644 index 1a2d3f6..0000000 --- a/zix/trie.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#define _XOPEN_SOURCE 500 - -#include "zix/trie.h" - -#include "zix/common.h" -#include "zix/trie_util.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -// #define ZIX_TRIE_LINEAR_SEARCH 1 - -//typedef uint_fast8_t n_edges_t; -typedef uintptr_t n_edges_t; - -typedef struct { - const uint8_t* str; /* String stored at this node */ - n_edges_t num_children; /* Number of outgoing edges */ - - /** Array of first child characters, followed by parallel array of pointers - to ZixTrieNode pointers. */ - uint8_t children[]; -} ZixTrieNode; - -struct _ZixTrie { - ZixTrieNode* root; /* Root of the tree */ -}; - -/** Round `size` up to the next multiple of the word size. */ -ZIX_PRIVATE n_edges_t -zix_trie_pad(n_edges_t size) -{ - return (size + sizeof(void*) - 1) & (~(sizeof(void*) - 1)); -} - -ZIX_PRIVATE ZixTrieNode** -zix_trie_get_children(ZixTrieNode* node) -{ - return (ZixTrieNode**)(node->children + - zix_trie_pad(node->num_children)); -} - -ZIX_PRIVATE ZixTrieNode** -zix_trie_get_child_ptr(ZixTrieNode* node, n_edges_t index) -{ - return zix_trie_get_children(node) + index; -} - -ZIX_PRIVATE void -zix_trie_print_rec(ZixTrieNode* node, FILE* fd) -{ - if (node->str) { - fprintf(fd, "\t\"%p\" [label=\"%s\"] ;\n", (void*)node, node->str); - } else { - fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node); - } - - for (n_edges_t i = 0; i < node->num_children; ++i) { - ZixTrieNode* const child = *zix_trie_get_child_ptr(node, i); - zix_trie_print_rec(child, fd); - fprintf(fd, "\t\"%p\" -> \"%p\" [ label = \"%c\" ];\n", - (void*)node, (void*)child, node->children[i]); - } -} - -void -zix_trie_print_dot(const ZixTrie* t, FILE* fd) -{ - fprintf(fd, "digraph Trie { \n"); - zix_trie_print_rec(t->root, fd); - fprintf(fd, "}\n"); -} - -ZIX_PRIVATE size_t -zix_trie_node_size(n_edges_t num_children) -{ - return (sizeof(ZixTrieNode) + - zix_trie_pad(num_children) + - (num_children * sizeof(ZixTrieNode*))); -} - -ZIX_PRIVATE ZixTrieNode* -realloc_node(ZixTrieNode* n, n_edges_t num_children) -{ - return (ZixTrieNode*)realloc(n, zix_trie_node_size(num_children)); -} - -ZixTrie* -zix_trie_new(void) -{ - ZixTrie* t = (ZixTrie*)calloc(1, sizeof(ZixTrie)); - - t->root = (ZixTrieNode*)calloc(1, sizeof(ZixTrieNode)); - - return t; -} - -ZIX_PRIVATE void -zix_trie_free_rec(ZixTrieNode* n) -{ - if (n) { - for (n_edges_t i = 0; i < n->num_children; ++i) { - zix_trie_free_rec(*zix_trie_get_child_ptr(n, i)); - } - free(n); - } -} - -void -zix_trie_free(ZixTrie* t) -{ - zix_trie_free_rec(t->root); - free(t); -} - -ZIX_PRIVATE inline bool -trie_is_leaf(const ZixTrieNode* n) -{ - return n->num_children == 0; -} - -ZIX_PRIVATE bool -trie_find_edge(const ZixTrieNode* n, const uint8_t c, n_edges_t* const index) -{ - *index = zix_trie_find_key(n->children, n->num_children, c); - return *index < n->num_children && n->children[*index] == c; -} - -#ifndef NDEBUG -ZIX_PRIVATE bool -zix_trie_node_check(ZixTrieNode* const n) -{ - for (n_edges_t i = 0; i < n->num_children; ++i) { - assert(n->children[i] != '\0'); - assert(*zix_trie_get_child_ptr(n, i) != NULL); - } - return true; -} -#endif - -ZIX_PRIVATE size_t -zix_trie_add_child(ZixTrieNode** n_ptr, - ZixTrieNode* child, - const uint8_t first) -{ - ZixTrieNode* n = *n_ptr; - - n = realloc_node(n, n->num_children + 1); -#ifdef ZIX_TRIE_LINEAR_SEARCH - const n_edges_t l = n->num_children; - memmove(n->children + zix_trie_pad(n->num_children + 1), - n->children + zix_trie_pad(n->num_children), - n->num_children * sizeof(ZixTrieNode*)); - ++n->num_children; - n->children[n->num_children - 1] = first; - *zix_trie_get_child_ptr(n, n->num_children - 1) = child; -#else - const n_edges_t l = zix_trie_find_key(n->children, n->num_children, first); - ZixTrieNode* m = (ZixTrieNode*)malloc(zix_trie_node_size(n->num_children + 1)); - m->str = n->str; - m->num_children = n->num_children + 1; - - memcpy(m->children, n->children, l); - m->children[l] = first; - memcpy(m->children + l + 1, n->children + l, n->num_children - l); - - memcpy(zix_trie_get_child_ptr(m, 0), - zix_trie_get_child_ptr(n, 0), - l * sizeof(ZixTrieNode*)); - *zix_trie_get_child_ptr(m, l) = child; - memcpy(zix_trie_get_child_ptr(m, l + 1), - zix_trie_get_child_ptr(n, l), - (n->num_children - l) * sizeof(ZixTrieNode*)); - free(n); - n = m; -#endif - - *n_ptr = n; - - assert(zix_trie_node_check(n)); - //assert(zix_trie_node_check(child)); - - return l; -} - -ZIX_PRIVATE ZixTrieNode** -trie_insert_tail(ZixTrieNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const size_t ZIX_UNUSED(len)) -{ - assert(first[0]); - ZixTrieNode* c = NULL; - while (first[0]) { - assert(zix_trie_node_check(*n_ptr)); - - c = realloc_node(NULL, 1); - if (!c) { - return NULL; - } - - c->str = NULL; - c->num_children = 0; - - const size_t l = zix_trie_add_child(n_ptr, c, first[0]); - n_ptr = zix_trie_get_child_ptr(*n_ptr, l); - assert(*n_ptr == c); - - ++first; - } - - if (!c) { - return NULL; - } - - c->num_children = 0; - c->str = str; - assert(zix_trie_node_check(c)); - assert(*n_ptr == c); - return n_ptr; -} - -ZIX_PRIVATE ZixStatus -trie_insert_internal(ZixTrieNode** n_ptr, - const uint8_t* str, - const uint8_t* first, - const size_t len) -{ - assert(zix_trie_node_check(*n_ptr)); - ZixTrieNode* n = *n_ptr; - n_edges_t child_i; - while (trie_find_edge(n, first[0], &child_i)) { - assert(child_i <= n->num_children); - ZixTrieNode** const child_ptr = zix_trie_get_child_ptr(n, child_i); - ZixTrieNode* const child = *child_ptr; - assert(child_ptr); - - assert(zix_trie_node_check(child)); - - if (!first[0]) { - /* Reached end of search string */ - if (child->str) { - /* Found exact string in trie. */ - return ZIX_STATUS_EXISTS; - } else { - /* Set node value to inserted string */ - child->str = str; - return ZIX_STATUS_SUCCESS; - } - } else { - /* Didn't reach end of search string */ - if (trie_is_leaf(n)) { - /* Hit leaf early, insert underneath */ - ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len); - assert(zix_trie_node_check(*n_ptr)); - assert(zix_trie_node_check(*c)); - (void)c; - return ZIX_STATUS_SUCCESS; - } else { - /* Match at this node, continue search downwards (or match) */ - n_ptr = child_ptr; - assert(*n_ptr); - n = *n_ptr; - assert(n == child); - ++first; - } - } - } - - if (!first[0]) { - if ((*n_ptr)->str) { - return ZIX_STATUS_EXISTS; - } else { - (*n_ptr)->str = str; - return ZIX_STATUS_SUCCESS; - } - } - - /* Hit leaf early, insert underneath */ - assert(zix_trie_node_check(*n_ptr)); - ZixTrieNode** c = trie_insert_tail(n_ptr, str, first, len); - assert(zix_trie_node_check(*n_ptr)); - assert(zix_trie_node_check(*c)); - (void)c; - return ZIX_STATUS_SUCCESS; -} - -ZixStatus -zix_trie_insert(ZixTrie* t, const char* str, size_t len) -{ - assert(strlen(str) == len); - - const uint8_t* ustr = (const uint8_t*)str; - return trie_insert_internal(&t->root, ustr, ustr, len); -} - -ZixStatus -zix_trie_find(const ZixTrie* t, const char* const str, const char** match) -{ - ZixTrieNode* n = t->root; - const char* p = str; - n_edges_t child_i; - - while (trie_find_edge(n, p[0], &child_i)) { - assert(child_i <= n->num_children); - ZixTrieNode* const child = *zix_trie_get_child_ptr(n, child_i); - - ++p; - - if (!*p) { - /* Reached end of search string */ - if (child->str) { - /* Found exact string in trie. */ - *match = (const char*)child->str; - return ZIX_STATUS_SUCCESS; - } else { - /* Prefix match, but exact string not present. */ - return ZIX_STATUS_NOT_FOUND; - } - } else { - /* Didn't reach end of search string */ - if (trie_is_leaf(n)) { - /* Hit leaf early, no match */ - return ZIX_STATUS_NOT_FOUND; - } else { - /* Match at this node, continue search downwards (or match) */ - n = child; - } - } - } - - return ZIX_STATUS_NOT_FOUND; -} diff --git a/zix/trie.h b/zix/trie.h deleted file mode 100644 index 5e38df6..0000000 --- a/zix/trie.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright 2011-2016 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef ZIX_TRIE_H -#define ZIX_TRIE_H - -#include "zix/common.h" - -#include <stdio.h> - -/** - @addtogroup zix - @{ - @name Trie - @{ -*/ - -typedef struct _ZixTrie ZixTrie; - -/** - Construct a new Trie. -*/ -ZIX_API ZixTrie* -zix_trie_new(void); - -/** - Destroy `t`. -*/ -ZIX_API void -zix_trie_free(ZixTrie* t); - -/** - Print a DOT description of `t` to `fd`. -*/ -ZIX_API void -zix_trie_print_dot(const ZixTrie* t, FILE* fd); - -/** - Insert `str` into `t`. -*/ -ZIX_API ZixStatus -zix_trie_insert(ZixTrie* t, const char* str, size_t len); - -/** - Search for `str` in `t`. -*/ -ZIX_API ZixStatus -zix_trie_find(const ZixTrie* t, const char* str, const char** match); - -/** - @} - @} -*/ - -#endif /* ZIX_TRIE_H */ diff --git a/zix/trie_util.h b/zix/trie_util.h deleted file mode 100644 index 6ac8b35..0000000 --- a/zix/trie_util.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - Copyright 2014 David Robillard <http://drobilla.net> - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ - -#ifndef ZIX_TRIE_UTIL_H -#define ZIX_TRIE_UTIL_H - -#include "zix/common.h" - -#ifdef __SSE4_2__ -# include <smmintrin.h> -#endif - -#include <assert.h> -#include <stddef.h> -#include <stdint.h> - -#ifdef ZIX_TRIE_LINEAR_SEARCH - -/** Return the index of `k` in `keys`, or `n` if not found. */ -ZIX_PRIVATE size_t -zix_trie_find_key(const uint8_t* const keys, const size_t n, const uint8_t k) -{ - for (size_t i = 0; i < n; ++i) { - if (keys[i] == k) { - assert(keys[i] >= k); - return i; - } - } - return n; -} - -#else - -/** Return the index of the first element in `keys` >= `k`, or `n`. */ -static inline ZIX_PURE_FUNC size_t -zix_trie_find_key(const uint8_t* const keys, const size_t n, const uint8_t k) -{ - size_t first = 0; - size_t len = n; - while (len > 0) { - const size_t half = len >> 1u; - const size_t i = first + half; - if (keys[i] < k) { - const size_t chop = half + 1; - first += chop; - len -= chop; - } else { - len = half; - } - } - assert(first == n || keys[first] >= k); - return first; -} - -#endif - -#if !defined (__SSE4_2__) || !defined(NDEBUG) - -ZIX_PRIVATE size_t -zix_trie_change_index_c(const uint8_t* a, const uint8_t* b, size_t len) -{ - for (size_t i = 0; i < len; ++i) { - if (a[i] != b[i]) { - return i; - } - } - return len; -} - -#endif - -#ifdef __SSE4_2__ - -ZIX_PRIVATE size_t -zix_trie_change_index_sse(const uint8_t* a, const uint8_t* b, const size_t len) -{ - const size_t veclen = len & ~(sizeof(__m128i) - 1); - size_t i = 0; - for (; i < veclen; i += sizeof(__m128i)) { - const __m128i r = _mm_loadu_si128((const __m128i*)(a + i)); - const __m128i s = _mm_loadu_si128((const __m128i*)(b + i)); - const int index = _mm_cmpistri( - r, s, _SIDD_SBYTE_OPS|_SIDD_CMP_EQUAL_EACH|_SIDD_NEGATIVE_POLARITY); - - if (index != sizeof(__m128i)) { - assert(i + index == change_index_c(a, b, len)); - return i + index; - } - } - - const __m128i r = _mm_loadu_si128((const __m128i*)(a + i)); - const __m128i s = _mm_loadu_si128((const __m128i*)(b + i)); - const size_t l = len - i; - const int index = _mm_cmpestri( - r, l, s, l, - _SIDD_SBYTE_OPS|_SIDD_CMP_EQUAL_EACH|_SIDD_MASKED_NEGATIVE_POLARITY); - - assert(i + index == change_index_c(a, b, len)); - return i + index; -} - -#endif - -static inline size_t -zix_trie_change_index(const uint8_t* a, const uint8_t* b, const size_t len) -{ -#ifdef __SSE4_2__ - return zix_trie_change_index_sse(a, b, len); -#else - return zix_trie_change_index_c(a, b, len); -#endif -} - -#endif /* ZIX_TRIE_UTIL_H */ |