summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Robillard <d@drobilla.net>2011-09-18 19:28:15 +0000
committerDavid Robillard <d@drobilla.net>2011-09-18 19:28:15 +0000
commite1808bb467f88679f3fae2bb220043b7cd6ba124 (patch)
treec7681efcdf6219407a675dc1523c28c3debcf16f
parent6651d4dbf74dc4591fbaf5250bfd6f7d58ce0363 (diff)
downloadzix-e1808bb467f88679f3fae2bb220043b7cd6ba124.tar.gz
zix-e1808bb467f88679f3fae2bb220043b7cd6ba124.tar.bz2
zix-e1808bb467f88679f3fae2bb220043b7cd6ba124.zip
Add ZixStrindex.
git-svn-id: http://svn.drobilla.net/zix/trunk@15 df6676b4-ccc9-40e5-b5d6-7c4628a128e3
-rw-r--r--src/strindex.c245
-rw-r--r--test/strindex_test.c59
-rw-r--r--wscript5
-rw-r--r--zix/strindex.h56
4 files changed, 363 insertions, 2 deletions
diff --git a/src/strindex.c b/src/strindex.c
new file mode 100644
index 0000000..8bb2f7a
--- /dev/null
+++ b/src/strindex.c
@@ -0,0 +1,245 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define _XOPEN_SOURCE 500
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "zix/common.h"
+#include "zix/strindex.h"
+
+typedef struct _ZixStrindexNode {
+ struct _ZixStrindexNode* children; /* Children of this node */
+ size_t num_children; /* Number of outgoing edges */
+ char* first; /* Pointer to start of this suffix */
+ char* label_first; /* First character of incoming label */
+ char* label_last; /* Last character of incoming label */
+} ZixStrindexNode;
+
+struct _ZixStrindex {
+ char* s; /* String contained in this tree */
+ ZixStrindexNode* root; /* Root of the tree */
+};
+
+static ZixStatus strindex_insert(ZixStrindexNode* n,
+ char* suffix_first,
+ char* first,
+ char* last);
+
+ZIX_API
+ZixStrindex*
+zix_strindex_new(const char* s)
+{
+ ZixStrindex* t = (ZixStrindex*)malloc(sizeof(ZixStrindex));
+ memset(t, '\0', sizeof(struct _ZixStrindex));
+ t->s = strdup(s);
+ t->root = (ZixStrindexNode*)malloc(sizeof(ZixStrindexNode));
+ memset(t->root, '\0', sizeof(ZixStrindexNode));
+ t->root->num_children = 0;
+ t->root->first = t->s;
+
+ const size_t len = strlen(t->s);
+ for (size_t i = 0; i < len; ++i) {
+ strindex_insert(t->root, t->s + i, t->s + i, t->s + (len - 1));
+ }
+
+ return t;
+}
+
+static void
+zix_strindex_free_rec(ZixStrindexNode* n)
+{
+ if (n) {
+ for (size_t i = 0; i < n->num_children; ++i) {
+ zix_strindex_free_rec(&n->children[i]);
+ }
+ free(n->children);
+ }
+}
+
+ZIX_API
+void
+zix_strindex_free(ZixStrindex* t)
+{
+ zix_strindex_free_rec(t->root);
+ free(t->s);
+ free(t->root);
+ free(t);
+}
+
+static inline int
+strindex_is_leaf(ZixStrindexNode* n)
+{
+ return n->num_children == 0;
+}
+
+static int
+strindex_find_edge(ZixStrindexNode* n, char c, size_t* index)
+{
+ for (size_t i = 0; i < n->num_children; ++i) {
+ if (n->children[i].label_first[0] == c) {
+ *index = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void
+strindex_add_edge(ZixStrindexNode* n, char* suffix_first, char* first, char* last)
+{
+ assert(last > first);
+ n->children = realloc(n->children, (n->num_children + 1) * sizeof(ZixStrindexNode));
+
+ memset(&n->children[n->num_children], '\0', sizeof(ZixStrindexNode));
+
+ n->children[n->num_children].first = suffix_first;
+ n->children[n->num_children].label_first = first;
+ n->children[n->num_children].label_last = last;
+ ++n->num_children;
+}
+
+/* Split an outgoing edge (to a child) at the given index */
+static void
+strindex_split_edge(ZixStrindexNode* child,
+ size_t index)
+{
+ ZixStrindexNode* children = child->children;
+ size_t num_children = child->num_children;
+
+ char* first = child->label_first + index;
+ char* last = child->label_last;
+ assert(last > first);
+ assert(child->first);
+
+ child->children = malloc(sizeof(ZixStrindexNode));
+ child->children[0].children = children;
+ child->children[0].num_children = num_children;
+ child->children[0].first = child->first;
+ child->children[0].label_first = first;
+ child->children[0].label_last = last;
+
+ child->label_last = child->label_first + (index - 1); // chop end
+
+ child->num_children = 1;
+}
+
+ZIX_API
+ZixStatus
+strindex_insert(ZixStrindexNode* n, char* suffix_first, char* first, char* last)
+{
+ size_t child_i;
+ ZixStrindexNode* child;
+ assert(first <= last);
+
+ if (strindex_find_edge(n, *first, &child_i)) {
+ char* label_first = n->children[child_i].label_first;
+ char* label_last = n->children[child_i].label_last;
+ size_t split_i = 0;
+ const size_t label_len = label_last - label_first + 1;
+ const size_t s_len = last - first + 1;
+ const size_t max_len = (s_len < label_len) ? s_len : label_len;
+
+ while (split_i < max_len && first[split_i] == label_first[split_i]) {
+ ++split_i;
+ }
+ child = n->children + child_i;
+
+ if (label_len < s_len) {
+ if (split_i == label_len) {
+ strindex_insert(child, suffix_first, first + label_len, last);
+ } else {
+ strindex_split_edge(child, split_i);
+ strindex_insert(child, suffix_first, first + split_i, last);
+ }
+ } else if ((label_len != split_i) && (label_len != s_len) ) {
+ strindex_split_edge(child, split_i);
+ if (split_i != s_len) {
+ strindex_add_edge(child, suffix_first, first + split_i, last);
+ }
+ }
+ } else {
+ strindex_add_edge(n, suffix_first, first, last);
+ }
+
+ return ZIX_STATUS_SUCCESS;
+}
+
+ZIX_API
+ZixStatus
+zix_strindex_find(ZixStrindex* t, const char* p, char** match)
+{
+ ZixStrindexNode* n = t->root;
+ const char* orig_p = p;
+ size_t child_i;
+
+ *match = NULL;
+
+ while (*p != '\0') {
+ size_t p_len = strlen(p);
+ if (strindex_find_edge(n, p[0], &child_i)) {
+ char* label_first = n->children[child_i].label_first;
+ char* label_last = n->children[child_i].label_last;
+ size_t label_len = label_last - label_first + 1;
+ size_t max_len = (p_len < label_len) ? p_len : label_len;
+ assert(child_i <= n->num_children);
+ assert(max_len > 0);
+
+ /* Set match to the start of substring
+ (but may not actually be a complete match yet)
+ */
+ if (*match == NULL) {
+ assert(p[0] == orig_p[0]);
+ assert(orig_p[0] == label_first[0]);
+ *match = n->children[child_i].first;
+ assert((*match)[0] == orig_p[0]);
+ }
+
+ if (strncmp(p, label_first, max_len)) {
+ /* no match */
+ *match = NULL;
+ return ZIX_STATUS_NOT_FOUND;
+ }
+
+ if (p_len <= label_len) {
+ /* At the last node, match */
+ *match = n->children[child_i].first;
+ assert(!strncmp(*match, orig_p, strlen(orig_p)));
+ return ZIX_STATUS_SUCCESS;
+ } else if (strindex_is_leaf(n)) {
+ /* Hit leaf early, no match */
+ return ZIX_STATUS_NOT_FOUND;
+ } else {
+ /* Match at this node, continue search downwards (or match) */
+ p += label_len;
+ n = &n->children[child_i];
+ if (label_len >= p_len) {
+ assert(strstr(t->s, orig_p) != NULL);
+ assert(strncmp(orig_p, *match, max_len));
+ return ZIX_STATUS_SUCCESS;
+ }
+ }
+
+ } else {
+ assert(strstr(t->s, orig_p) == NULL);
+ return ZIX_STATUS_NOT_FOUND;
+ }
+ }
+ return ZIX_STATUS_NOT_FOUND;
+}
diff --git a/test/strindex_test.c b/test/strindex_test.c
new file mode 100644
index 0000000..91273b8
--- /dev/null
+++ b/test/strindex_test.c
@@ -0,0 +1,59 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zix/strindex.h"
+
+static int
+test_fail(const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "error: ");
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ return 1;
+}
+
+int
+main(int argc, char** argv)
+{
+ const char* str = "BANANA";
+ const size_t str_len = strlen(str);
+ ZixStrindex* strindex = zix_strindex_new(str);
+
+ for (size_t l = 1; l <= str_len; ++l) {
+ for (size_t i = 0; i < str_len - l; ++i) {
+ char* match = NULL;
+ ZixStatus ret = zix_strindex_find(strindex, str + i, &match);
+ if (ret) {
+ return test_fail("No match for substring at %zu length %zu\n",
+ i, l);
+ }
+
+ if (strncmp(str + i, match, l)) {
+ return test_fail("Bad match for substring at %zu length %zu\n",
+ i, l);
+ }
+ }
+ }
+
+ zix_strindex_free(strindex);
+ return 0;
+}
diff --git a/wscript b/wscript
index d3cc8dc..6e58123 100644
--- a/wscript
+++ b/wscript
@@ -62,7 +62,7 @@ def configure(conf):
autowaf.display_msg(conf, "Benchmarks", str(conf.env['BUILD_BENCHx']))
print('')
-tests = ['ring_test', 'sorted_array_test', 'tree_test']
+tests = ['ring_test', 'sorted_array_test', 'strindex_test', 'tree_test']
def build(bld):
# C Headers
@@ -73,8 +73,9 @@ def build(bld):
lib_source = '''
src/ring.c
- src/sorted_array.c
src/tree.c
+ src/strindex.c
+ src/sorted_array.c
'''
# Library
diff --git a/zix/strindex.h b/zix/strindex.h
new file mode 100644
index 0000000..e6195d7
--- /dev/null
+++ b/zix/strindex.h
@@ -0,0 +1,56 @@
+/*
+ Copyright 2011 David Robillard <http://drobilla.net>
+
+ Permission to use, copy, modify, and/or distribute this software for any
+ purpose with or without fee is hereby granted, provided that the above
+ copyright notice and this permission notice appear in all copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef ZIX_STRINDEX_H
+#define ZIX_STRINDEX_H
+
+#include "zix/common.h"
+
+/**
+ @addtogroup zix
+ @{
+ @name Strindex
+ @{
+*/
+
+typedef struct _ZixStrindex ZixStrindex;
+
+/**
+ Construct a new strindex that contains all suffixes of the string @a s.
+ A copy of @a s is taken and stored for the lifetime of the strindex.
+*/
+ZixStrindex*
+zix_strindex_new(const char* s);
+
+/**
+ Destroy @a t.
+*/
+void
+zix_strindex_free(ZixStrindex* strindex);
+
+/**
+ Check if the string in @a strindex contains the substring @a str.
+ If such a substring is found, @a match is pointed at an occurrence of it.
+*/
+ZixStatus
+zix_strindex_find(ZixStrindex* strindex, const char* str, char** match);
+
+/**
+ @}
+ @}
+*/
+
+#endif /* ZIX_STRINDEX_H */