From 2b6c9941272b53c978d418c63669e52e39d9fe4a Mon Sep 17 00:00:00 2001 From: David Robillard Date: Mon, 19 Sep 2011 03:20:05 +0000 Subject: Add ZixPatree. git-svn-id: http://svn.drobilla.net/zix/trunk@21 df6676b4-ccc9-40e5-b5d6-7c4628a128e3 --- plot.py | 29 ++++-- src/patree.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/strindex.c | 9 +- test/patree_bench.c | 141 +++++++++++++++++++++++++++ test/patree_test.c | 100 +++++++++++++++++++ wscript | 15 ++- zix/patree.h | 71 ++++++++++++++ 7 files changed, 622 insertions(+), 17 deletions(-) create mode 100644 src/patree.c create mode 100644 test/patree_bench.c create mode 100644 test/patree_test.c create mode 100644 zix/patree.h diff --git a/plot.py b/plot.py index 52e614d..1534bc4 100755 --- a/plot.py +++ b/plot.py @@ -14,13 +14,18 @@ for i in range(n_plots): filename = sys.argv[i+1] file = open(filename, 'r') - math.ceil + header = file.readline() + columns = header[1:].split() + pyplot.subplot(math.ceil(math.sqrt(n_plots)), math.ceil(math.sqrt(n_plots)), i + 1) pyplot.xlabel('# Elements') pyplot.ylabel('Time (s)') + times = [] + for i in columns: + times.append([]) ns = [] zix_tree_times = [] zix_sorted_array_times = [] @@ -28,16 +33,22 @@ for i in range(n_plots): for line in file: if line[0] == '#': continue; - (n, zix_tree, zix_sorted_array, glib) = line.split() - ns.append(int(n)) - zix_tree_times.append(float(zix_tree)) - zix_sorted_array_times.append(float(zix_sorted_array)) - glib_times.append(float(glib)) + #(n, zix_tree, zix_sorted_array, glib) = line.split() + fields = line.split() + num = 0 + for i in fields: + times[num].append([float(i)]) + num += 1 + + #ns.append(int(n)) + #zix_tree_times.append(float(zix_tree)) + #zix_sorted_array_times.append(float(zix_sorted_array)) + #glib_times.append(float(glib)) file.close() - matplotlib.pyplot.plot(ns, zix_tree_times, '-o', label='ZixTree') - matplotlib.pyplot.plot(ns, zix_sorted_array_times, '-o', label='ZixSortedArray') - matplotlib.pyplot.plot(ns, glib_times, '-x', label='GSequence') + for i in range(len(times) - 1): + matplotlib.pyplot.plot(times[0], times[i + 1], '-o', label=columns[i + 1]) + pyplot.legend(loc='upper left') pyplot.title(os.path.splitext(os.path.basename(filename))[0].title()) diff --git a/src/patree.c b/src/patree.c new file mode 100644 index 0000000..2ca0613 --- /dev/null +++ b/src/patree.c @@ -0,0 +1,274 @@ +/* + Copyright 2011 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#define _XOPEN_SOURCE 500 + +#include +#include +#include +#include +#include +#include + +#include "zix/common.h" +#include "zix/patree.h" + +typedef uint_fast8_t n_edges_t; + +typedef struct _ZixPatreeNode { + char* label_first; /* Start of incoming label */ + char* label_last; /* End of incoming label */ + char* str; /* String stored at this node */ + struct _ZixPatreeNode* children; /* Children of this node */ + n_edges_t num_children; /* Number of outgoing edges */ +} ZixPatreeNode; + +struct _ZixPatree { + ZixPatreeNode* root; /* Root of the tree */ +}; + +static void +zix_patree_print_rec(ZixPatreeNode* node, FILE* fd) +{ + if (node->label_first) { + size_t edge_label_len = node->label_last - node->label_first + 1; + char* edge_label = malloc(edge_label_len + 1); + strncpy(edge_label, node->label_first, edge_label_len); + fprintf(fd, "\t\"%p\" [label=<" + "" + "", (void*)node, edge_label); + if (node->str) { + fprintf(fd, "", node->str); + } + fprintf(fd, "
%s
\"%s\"
>,shape=\"plaintext\"] ;\n"); + } else { + fprintf(fd, "\t\"%p\" [label=\"\"] ;\n", (void*)node); + } + + + for (n_edges_t i = 0; i < node->num_children; ++i) { + ZixPatreeNode* const child = &node->children[i]; + zix_patree_print_rec(child, fd); + fprintf(fd, "\t\"%p\" -> \"%p\" ;\n", (void*)node, (void*)child); + } +} + +ZIX_API +void +zix_patree_print_dot(const ZixPatree* t, FILE* fd) +{ + fprintf(fd, "digraph Patree { \n"); + zix_patree_print_rec(t->root, fd); + fprintf(fd, "}\n"); +} + +ZIX_API +ZixPatree* +zix_patree_new(void) +{ + ZixPatree* t = (ZixPatree*)malloc(sizeof(ZixPatree)); + memset(t, '\0', sizeof(struct _ZixPatree)); + + t->root = (ZixPatreeNode*)malloc(sizeof(ZixPatreeNode)); + memset(t->root, '\0', sizeof(ZixPatreeNode)); + + return t; +} + +static void +zix_patree_free_rec(ZixPatreeNode* n) +{ + if (n) { + for (n_edges_t i = 0; i < n->num_children; ++i) { + zix_patree_free_rec(&n->children[i]); + } + free(n->children); + } +} + +ZIX_API +void +zix_patree_free(ZixPatree* t) +{ + zix_patree_free_rec(t->root); + free(t->root); + free(t); +} + +static inline bool +patree_is_leaf(ZixPatreeNode* n) +{ + return n->num_children == 0; +} + +static bool +patree_find_edge(ZixPatreeNode* n, char c, n_edges_t* index) +{ + for (n_edges_t i = 0; i < n->num_children; ++i) { + if (n->children[i].label_first[0] == c) { + *index = i; + return true; + } + } + return false; +} + +static void +patree_add_edge(ZixPatreeNode* n, + char* str, + char* first, + char* last) +{ + assert(last >= first); + n->children = realloc(n->children, + (n->num_children + 1) * sizeof(ZixPatreeNode)); + + n->children[n->num_children].label_first = first; + n->children[n->num_children].label_last = last; + n->children[n->num_children].str = str; + n->children[n->num_children].children = NULL; + n->children[n->num_children].num_children = 0; + ++n->num_children; +} + +/* Split an outgoing edge (to a child) at the given index */ +static void +patree_split_edge(ZixPatreeNode* child, + size_t index) +{ + ZixPatreeNode* const children = child->children; + const n_edges_t num_children = child->num_children; + + char* const first = child->label_first + index; + char* const last = child->label_last; + assert(last >= first); + + child->children = malloc(sizeof(ZixPatreeNode)); + child->children[0].children = children; + child->children[0].num_children = num_children; + child->children[0].str = child->str; + child->children[0].label_first = first; + child->children[0].label_last = last; + + child->label_last = child->label_first + (index - 1); // chop end + + child->num_children = 1; + + child->str = NULL; +} + +static ZixStatus +patree_insert_internal(ZixPatreeNode* n, char* str, char* first, char* last) +{ + n_edges_t child_i; + ZixPatreeNode* child; + assert(first <= last); + + if (patree_find_edge(n, *first, &child_i)) { + ZixPatreeNode* child = &n->children[child_i]; + char* const label_first = child->label_first; + char* const label_last = child->label_last; + size_t split_i = 0; + const size_t label_len = label_last - label_first + 1; + const size_t s_len = last - first + 1; + const size_t max_len = (s_len < label_len) ? s_len : label_len; + + while (split_i < max_len && first[split_i] == label_first[split_i]) { + ++split_i; + } + child = n->children + child_i; + + if (label_len < s_len) { + if (split_i == label_len) { + return patree_insert_internal(child, str, first + label_len, last); + } else { + patree_split_edge(child, split_i); + return patree_insert_internal(child, str, first + split_i, last); + } + } else if (label_len != split_i && split_i != s_len) { + patree_split_edge(child, split_i); + patree_add_edge(child, str, first + split_i, last); + return ZIX_STATUS_SUCCESS; + } else { + return ZIX_STATUS_EXISTS; + } + } else { + patree_add_edge(n, str, first, last); + } + + return ZIX_STATUS_SUCCESS; +} + +ZIX_API +ZixStatus +zix_patree_insert(ZixPatree* t, const char* str) +{ + const size_t len = strlen(str); + // FIXME: awful casts + return patree_insert_internal(t->root, (char*)str, (char*)str, (char*)str + len - 1); +} + +ZIX_API +ZixStatus +zix_patree_find(ZixPatree* t, const char* p, char** match) +{ + ZixPatreeNode* n = t->root; + n_edges_t child_i; + + *match = NULL; + + while (*p != '\0') { + if (patree_find_edge(n, p[0], &child_i)) { + assert(child_i <= n->num_children); + ZixPatreeNode* const child = &n->children[child_i]; + const char* const label_first = child->label_first; + const char* const label_last = child->label_last; + + /* Prefix compare search string and label */ + const char* l = label_first; + while (*p != '\0' && l <= label_last) { + if (*l++ != *p++) { + return ZIX_STATUS_NOT_FOUND; + } + } + + if (!*p) { + /* Reached end of search string */ + if (!*l) { + /* Reached end of label string as well, match */ + *match = child->str; + return *match ? ZIX_STATUS_SUCCESS : ZIX_STATUS_NOT_FOUND; + } else { + /* Label string is longer, no match (prefix match) */ + return ZIX_STATUS_NOT_FOUND; + } + } else { + /* Didn't reach end of search string */ + if (patree_is_leaf(n)) { + /* Hit leaf early, no match */ + return ZIX_STATUS_NOT_FOUND; + } else { + /* Match at this node, continue search downwards (or match) */ + n = child; + } + } + } else { + return ZIX_STATUS_NOT_FOUND; + } + } + + return ZIX_STATUS_NOT_FOUND; +} diff --git a/src/strindex.c b/src/strindex.c index 8df1fea..eae98b2 100644 --- a/src/strindex.c +++ b/src/strindex.c @@ -37,10 +37,11 @@ struct _ZixStrindex { ZixStrindexNode* root; /* Root of the tree */ }; -static ZixStatus strindex_insert(ZixStrindexNode* n, - char* suffix_first, - char* first, - char* last); +static ZixStatus +strindex_insert(ZixStrindexNode* n, + char* suffix_first, + char* first, + char* last); ZIX_API ZixStrindex* diff --git a/test/patree_bench.c b/test/patree_bench.c new file mode 100644 index 0000000..559110a --- /dev/null +++ b/test/patree_bench.c @@ -0,0 +1,141 @@ +/* + Copyright 2011 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include "bench.h" + +#include +#include +#include +#include + +#include + +#include "zix/patree.h" + +static int +test_fail(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + return 1; +} + +int +main(int argc, char** argv) +{ + if (argc != 2) { + return test_fail("Usage: %s INPUT_FILE\n", argv[0]); + } + + const char* file = argv[1]; + FILE* fd = fopen(file, "r"); + if (!fd) { + return test_fail("Failed to open file %s\n", file); + } + + /* Read input strings */ + char** strings = NULL; + size_t n_strings = 0; + char* buf = calloc(1, 1); + size_t buf_len = 1; + size_t this_str_len = 0; + for (char c; (c = fgetc(fd)) != EOF;) { + if (c == '\n') { + if (this_str_len == 0) { + continue; + } + strings = realloc(strings, (n_strings + 1) * sizeof(char*)); + strings[n_strings] = malloc(buf_len); + memcpy(strings[n_strings], buf, buf_len); + this_str_len = 0; + ++n_strings; + } else { + ++this_str_len; + if (buf_len < this_str_len + 1) { + buf_len = this_str_len + 1; + buf = realloc(buf, buf_len); + } + buf[this_str_len - 1] = c; + buf[this_str_len] = '\0'; + } + } + + FILE* insert_dat = fopen("insert.dat", "w"); + FILE* search_dat = fopen("search.dat", "w"); + fprintf(insert_dat, "# n\tGHashTable\tZixPatree\n"); + fprintf(search_dat, "# n\tGHashTable\tZixPatree\n"); + + for (size_t n = 1; n <= n_strings; n *= 2) { + ZixPatree* patree = zix_patree_new(); + GHashTable* hash = g_hash_table_new(g_str_hash, g_str_equal); + fprintf(insert_dat, "%zu", n); + fprintf(search_dat, "%zu", n); + + // Benchmark insertion + + // GHashTable + struct timespec insert_start = bench_start(); + for (size_t i = 0; i < n; ++i) { + g_hash_table_insert(hash, strings[i], strings[i]); + } + fprintf(insert_dat, "\t%lf", bench_end(&insert_start)); + + // ZixPatree + insert_start = bench_start(); + for (size_t i = 0; i < n; ++i) { + if (zix_patree_insert(patree, strings[i])) { + return test_fail("Failed to insert `%s'\n", strings[i]); + } + } + fprintf(insert_dat, "\t%lf\n", bench_end(&insert_start)); + + // Benchmark search + + // GHashTable + struct timespec search_start = bench_start(); + for (size_t i = 0; i < n; ++i) { + char* match = g_hash_table_lookup(hash, strings[i]); + if (match != strings[i]) { + return test_fail("Bad match for `%s'\n", strings[i]); + } + } + fprintf(search_dat, "\t%lf", bench_end(&search_start)); + + // ZixPatree + search_start = bench_start(); + for (size_t i = 0; i < n; ++i) { + char* match = NULL; + if (zix_patree_find(patree, strings[i], &match)) { + return test_fail("Failed to find `%s'\n", strings[i]); + } + if (match != strings[i]) { + return test_fail("Bad match for `%s'\n", strings[i]); + } + } + fprintf(search_dat, "\t%lf\n", bench_end(&search_start)); + + zix_patree_free(patree); + g_hash_table_unref(hash); + } + + fclose(insert_dat); + fclose(search_dat); + + return 0; +} diff --git a/test/patree_test.c b/test/patree_test.c new file mode 100644 index 0000000..b1f94fe --- /dev/null +++ b/test/patree_test.c @@ -0,0 +1,100 @@ +/* + Copyright 2011 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include +#include +#include + +#include "zix/patree.h" + +static const char* strings[] = { + "http://example.org/foo", + "http://example.org/bar", + "http://example.org/baz", + "http://example.net/foo", + "http://example.net/bar", + "http://example.net/baz", + "http://drobilla.net/", + "http://drobilla.net/software/zix", +}; + +static int +test_fail(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); + va_end(args); + return 1; +} + +int +main(int argc, char** argv) +{ + ZixPatree* patree = zix_patree_new(); + + const size_t n_strings = sizeof(strings) / sizeof(char*); + + // Insert each string + for (size_t i = 0; i < n_strings; ++i) { + ZixStatus st = zix_patree_insert(patree, strings[i]); + if (st) { + return test_fail("Failed to insert `%s'\n", strings[i]); + } + } + + //zix_patree_print_dot(patree, stdout); + + // Attempt to insert each string again + for (size_t i = 0; i < n_strings; ++i) { + ZixStatus st = zix_patree_insert(patree, strings[i]); + if (st != ZIX_STATUS_EXISTS) { + return test_fail("Double inserted `%s'\n", strings[i]); + } + } + + // Search for each string + for (size_t i = 0; i < n_strings; ++i) { + char* match = NULL; + ZixStatus st = zix_patree_find(patree, strings[i], &match); + if (st) { + return test_fail("Failed to find `%s'\n", strings[i]); + } + if (match != strings[i]) { + return test_fail("Bad match for `%s'\n", strings[i]); + } + } + + // Try some false matches + const char* not_indexed[] = { + "ftp://example.org/not-there-at-all", + "http://example.org/foobar", + "http://" + }; + const size_t n_not_indexed = sizeof(not_indexed) / sizeof(char*); + for (size_t i = 0; i < n_not_indexed; ++i) { + char* match = NULL; + ZixStatus st = zix_patree_find(patree, not_indexed[i], &match); + if (st != ZIX_STATUS_NOT_FOUND) { + return test_fail("Unexpectedly found `%s'\n", not_indexed[i]); + } + } + + zix_patree_free(patree); + + return 0; +} diff --git a/wscript b/wscript index 6e58123..d2d0d85 100644 --- a/wscript +++ b/wscript @@ -62,7 +62,13 @@ def configure(conf): autowaf.display_msg(conf, "Benchmarks", str(conf.env['BUILD_BENCHx'])) print('') -tests = ['ring_test', 'sorted_array_test', 'strindex_test', 'tree_test'] +tests = [ + 'patree_test', + 'ring_test', + 'sorted_array_test', + 'strindex_test', + 'tree_test' +] def build(bld): # C Headers @@ -72,10 +78,11 @@ def build(bld): autowaf.build_pc(bld, 'ZIX', ZIX_VERSION, []) lib_source = ''' + src/patree.c src/ring.c - src/tree.c - src/strindex.c src/sorted_array.c + src/strindex.c + src/tree.c ''' # Library @@ -112,7 +119,7 @@ def build(bld): if bld.env['BUILD_BENCH']: # Benchmark programs - for i in ['tree_bench']: + for i in ['tree_bench', 'patree_bench']: obj = bld(features = 'c cprogram') obj.source = 'test/%s.c' % i obj.includes = ['.'] diff --git a/zix/patree.h b/zix/patree.h new file mode 100644 index 0000000..2a5f6cf --- /dev/null +++ b/zix/patree.h @@ -0,0 +1,71 @@ +/* + Copyright 2011 David Robillard + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#ifndef ZIX_PATREE_H +#define ZIX_PATREE_H + +#include "zix/common.h" + +/** + @addtogroup zix + @{ + @name Patree + @{ +*/ + +typedef struct _ZixPatree ZixPatree; + +/** + Construct a new Patree. +*/ +ZIX_API +ZixPatree* +zix_patree_new(void); + +/** + Destroy @a t. +*/ +ZIX_API +void +zix_patree_free(ZixPatree* t); + +/** + Print a DOT description of @a t to @a fd. +*/ +ZIX_API +void +zix_patree_print_dot(const ZixPatree* t, FILE* fd); + +/** + Insert @a str into @a t. +*/ +ZIX_API +ZixStatus +zix_patree_insert(ZixPatree* t, const char* str); + +/** + Search for @a str in @a t. +*/ +ZIX_API +ZixStatus +zix_patree_find(ZixPatree* t, const char* str, char** match); + +/** + @} + @} +*/ + +#endif /* ZIX_PATREE_H */ -- cgit v1.2.1