diff options
Diffstat (limited to 'zix/strindex.c')
-rw-r--r-- | zix/strindex.c | 316 |
1 files changed, 159 insertions, 157 deletions
diff --git a/zix/strindex.c b/zix/strindex.c index 2fdd04f..8cb05c9 100644 --- a/zix/strindex.c +++ b/zix/strindex.c @@ -26,16 +26,16 @@ #include <string.h> typedef struct _ZixStrindexNode { - struct _ZixStrindexNode* children; /* Children of this node */ - size_t num_children; /* Number of outgoing edges */ - char* first; /* Start of this suffix */ - char* label_first; /* Start of incoming label */ - char* label_last; /* End of incoming label */ + struct _ZixStrindexNode* children; /* Children of this node */ + size_t num_children; /* Number of outgoing edges */ + char* first; /* Start of this suffix */ + char* label_first; /* Start of incoming label */ + char* label_last; /* End of incoming label */ } ZixStrindexNode; struct _ZixStrindex { - char* s; /* String contained in this tree */ - ZixStrindexNode* root; /* Root of the tree */ + char* s; /* String contained in this tree */ + ZixStrindexNode* root; /* Root of the tree */ }; static ZixStatus @@ -47,62 +47,64 @@ strindex_insert(ZixStrindexNode* n, ZixStrindex* zix_strindex_new(const char* s) { - const size_t len = strlen(s); + const size_t len = strlen(s); - ZixStrindex* t = (ZixStrindex*)malloc(sizeof(ZixStrindex)); - memset(t, '\0', sizeof(struct _ZixStrindex)); + ZixStrindex* t = (ZixStrindex*)malloc(sizeof(ZixStrindex)); + memset(t, '\0', sizeof(struct _ZixStrindex)); - t->s = (char*)calloc(1, len + 1); - memcpy(t->s, s, len); + t->s = (char*)calloc(1, len + 1); + memcpy(t->s, s, len); - t->root = (ZixStrindexNode*)malloc(sizeof(ZixStrindexNode)); - memset(t->root, '\0', sizeof(ZixStrindexNode)); - t->root->num_children = 0; - t->root->first = t->s; + t->root = (ZixStrindexNode*)malloc(sizeof(ZixStrindexNode)); + memset(t->root, '\0', sizeof(ZixStrindexNode)); + t->root->num_children = 0; + t->root->first = t->s; - for (size_t i = 0; i < len; ++i) { - strindex_insert(t->root, t->s + i, t->s + i, t->s + (len - 1)); - } + for (size_t i = 0; i < len; ++i) { + strindex_insert(t->root, t->s + i, t->s + i, t->s + (len - 1)); + } - return t; + return t; } static void zix_strindex_free_rec(ZixStrindexNode* n) { - if (n) { - for (size_t i = 0; i < n->num_children; ++i) { - zix_strindex_free_rec(&n->children[i]); - } - free(n->children); - } + if (n) { + for (size_t i = 0; i < n->num_children; ++i) { + zix_strindex_free_rec(&n->children[i]); + } + + free(n->children); + } } void zix_strindex_free(ZixStrindex* strindex) { - zix_strindex_free_rec(strindex->root); - free(strindex->s); - free(strindex->root); - free(strindex); + zix_strindex_free_rec(strindex->root); + free(strindex->s); + free(strindex->root); + free(strindex); } static inline int strindex_is_leaf(ZixStrindexNode* n) { - return n->num_children == 0; + return n->num_children == 0; } static int strindex_find_edge(ZixStrindexNode* n, char c, size_t* index) { - for (size_t i = 0; i < n->num_children; ++i) { - if (n->children[i].label_first[0] == c) { - *index = i; - return 1; - } - } - return 0; + for (size_t i = 0; i < n->num_children; ++i) { + if (n->children[i].label_first[0] == c) { + *index = i; + return 1; + } + } + + return 0; } static void @@ -111,149 +113,149 @@ strindex_add_edge(ZixStrindexNode* n, char* first, char* last) { - assert(last > first); - n->children = (ZixStrindexNode*)realloc( - n->children, (n->num_children + 1) * sizeof(ZixStrindexNode)); + assert(last > first); + n->children = (ZixStrindexNode*)realloc( + n->children, (n->num_children + 1) * sizeof(ZixStrindexNode)); - memset(&n->children[n->num_children], '\0', sizeof(ZixStrindexNode)); + memset(&n->children[n->num_children], '\0', sizeof(ZixStrindexNode)); - n->children[n->num_children].first = suffix_first; - n->children[n->num_children].label_first = first; - n->children[n->num_children].label_last = last; - ++n->num_children; + n->children[n->num_children].first = suffix_first; + n->children[n->num_children].label_first = first; + n->children[n->num_children].label_last = last; + ++n->num_children; } /* Split an outgoing edge (to a child) at the given index */ static void -strindex_split_edge(ZixStrindexNode* child, - size_t index) +strindex_split_edge(ZixStrindexNode* child, size_t index) { - ZixStrindexNode* children = child->children; - size_t num_children = child->num_children; + ZixStrindexNode* children = child->children; + size_t num_children = child->num_children; - char* first = child->label_first + index; - char* last = child->label_last; - assert(last > first); - assert(child->first); + char* first = child->label_first + index; + char* last = child->label_last; + assert(last > first); + assert(child->first); - child->children = (ZixStrindexNode*)malloc(sizeof(ZixStrindexNode)); + child->children = (ZixStrindexNode*)malloc(sizeof(ZixStrindexNode)); - child->children[0].children = children; - child->children[0].num_children = num_children; - child->children[0].first = child->first; - child->children[0].label_first = first; - child->children[0].label_last = last; + child->children[0].children = children; + child->children[0].num_children = num_children; + child->children[0].first = child->first; + child->children[0].label_first = first; + child->children[0].label_last = last; - child->label_last = child->label_first + (index - 1); // chop end + child->label_last = child->label_first + (index - 1); // chop end - child->num_children = 1; + child->num_children = 1; } static ZixStatus strindex_insert(ZixStrindexNode* n, char* suffix_first, char* first, char* last) { - size_t child_i; - ZixStrindexNode* child; - assert(first <= last); - - if (strindex_find_edge(n, *first, &child_i)) { - char* label_first = n->children[child_i].label_first; - char* label_last = n->children[child_i].label_last; - size_t split_i = 0; - const size_t label_len = label_last - label_first + 1; - const size_t s_len = last - first + 1; - const size_t max_len = (s_len < label_len) ? s_len : label_len; - - while (split_i < max_len && first[split_i] == label_first[split_i]) { - ++split_i; - } - child = n->children + child_i; - - if (label_len < s_len) { - if (split_i == label_len) { - strindex_insert(child, suffix_first, first + label_len, last); - } else { - strindex_split_edge(child, split_i); - strindex_insert(child, suffix_first, first + split_i, last); - } - } else if ((label_len != split_i) && (label_len != s_len)) { - strindex_split_edge(child, split_i); - if (split_i != s_len) { - strindex_add_edge(child, suffix_first, first + split_i, last); - } - } - } else { - strindex_add_edge(n, suffix_first, first, last); - } - - return ZIX_STATUS_SUCCESS; + size_t child_i; + ZixStrindexNode* child; + assert(first <= last); + + if (strindex_find_edge(n, *first, &child_i)) { + char* label_first = n->children[child_i].label_first; + char* label_last = n->children[child_i].label_last; + size_t split_i = 0; + const size_t label_len = label_last - label_first + 1; + const size_t s_len = last - first + 1; + const size_t max_len = (s_len < label_len) ? s_len : label_len; + + while (split_i < max_len && first[split_i] == label_first[split_i]) { + ++split_i; + } + child = n->children + child_i; + + if (label_len < s_len) { + if (split_i == label_len) { + strindex_insert(child, suffix_first, first + label_len, last); + } else { + strindex_split_edge(child, split_i); + strindex_insert(child, suffix_first, first + split_i, last); + } + } else if ((label_len != split_i) && (label_len != s_len)) { + strindex_split_edge(child, split_i); + if (split_i != s_len) { + strindex_add_edge(child, suffix_first, first + split_i, last); + } + } + } else { + strindex_add_edge(n, suffix_first, first, last); + } + + return ZIX_STATUS_SUCCESS; } ZixStatus zix_strindex_find(ZixStrindex* strindex, const char* const str, char** match) { - const char* p = str; + const char* p = str; #ifndef NDEBUG - const char* orig_p = p; + const char* orig_p = p; #endif - ZixStrindexNode* n = strindex->root; - size_t child_i; - - *match = NULL; - - while (*p != '\0') { - size_t p_len = strlen(p); - if (strindex_find_edge(n, p[0], &child_i)) { - char* label_first = n->children[child_i].label_first; - char* label_last = n->children[child_i].label_last; - size_t label_len = label_last - label_first + 1; - size_t max_len = (p_len < label_len) ? p_len : label_len; - assert(child_i <= n->num_children); - assert(max_len > 0); - - /* Set match to the start of substring - (but may not actually be a complete match yet) - */ - if (*match == NULL) { - assert(p[0] == orig_p[0]); - assert(orig_p[0] == label_first[0]); - *match = n->children[child_i].first; - assert((*match)[0] == orig_p[0]); - } - - if (strncmp(p, label_first, max_len)) { - /* no match */ - *match = NULL; - return ZIX_STATUS_NOT_FOUND; - } - - if (p_len <= label_len) { - /* At the last node, match */ - *match = n->children[child_i].first; - assert(!strncmp(*match, orig_p, strlen(orig_p))); - return ZIX_STATUS_SUCCESS; - } - - if (strindex_is_leaf(n)) { - /* Hit leaf early, no match */ - return ZIX_STATUS_NOT_FOUND; - } - - /* Match at this node, continue search downwards (or match) */ - p += label_len; - n = &n->children[child_i]; - if (label_len >= p_len) { - assert(strstr(strindex->s, orig_p) != NULL); - assert(strncmp(orig_p, *match, max_len)); - return ZIX_STATUS_SUCCESS; - } - - } else { - assert(strstr(strindex->s, orig_p) == NULL); - return ZIX_STATUS_NOT_FOUND; - } - } - return ZIX_STATUS_NOT_FOUND; + ZixStrindexNode* n = strindex->root; + size_t child_i; + + *match = NULL; + + while (*p != '\0') { + size_t p_len = strlen(p); + if (strindex_find_edge(n, p[0], &child_i)) { + char* label_first = n->children[child_i].label_first; + char* label_last = n->children[child_i].label_last; + size_t label_len = label_last - label_first + 1; + size_t max_len = (p_len < label_len) ? p_len : label_len; + assert(child_i <= n->num_children); + assert(max_len > 0); + + /* Set match to the start of substring + (but may not actually be a complete match yet) + */ + if (*match == NULL) { + assert(p[0] == orig_p[0]); + assert(orig_p[0] == label_first[0]); + *match = n->children[child_i].first; + assert((*match)[0] == orig_p[0]); + } + + if (strncmp(p, label_first, max_len)) { + /* no match */ + *match = NULL; + return ZIX_STATUS_NOT_FOUND; + } + + if (p_len <= label_len) { + /* At the last node, match */ + *match = n->children[child_i].first; + assert(!strncmp(*match, orig_p, strlen(orig_p))); + return ZIX_STATUS_SUCCESS; + } + + if (strindex_is_leaf(n)) { + /* Hit leaf early, no match */ + return ZIX_STATUS_NOT_FOUND; + } + + /* Match at this node, continue search downwards (or match) */ + p += label_len; + n = &n->children[child_i]; + if (label_len >= p_len) { + assert(strstr(strindex->s, orig_p) != NULL); + assert(strncmp(orig_p, *match, max_len)); + return ZIX_STATUS_SUCCESS; + } + + } else { + assert(strstr(strindex->s, orig_p) == NULL); + return ZIX_STATUS_NOT_FOUND; + } + } + + return ZIX_STATUS_NOT_FOUND; } |