# HG changeset patch # User Eris Caffee # Date 1349226038 18000 # Node ID ef73b96fdcaefe85ac854dd21bb80e8851399c77 # Parent 7886d2da8cc4714407195fb9dd42589ea721066b Many ctrie updates diff -r 7886d2da8cc4 -r ef73b96fdcae CMakeLists.txt --- a/CMakeLists.txt Tue Oct 02 10:13:07 2012 -0500 +++ b/CMakeLists.txt Tue Oct 02 20:00:38 2012 -0500 @@ -104,3 +104,10 @@ ${trie_SRCS} ${list_SRCS} ) + +set (ctrie_SRCS src/ctrie.h src/ctrie.c) +add_executable (ctrie_test + src/ctrie_test.c + ${ctrie_SRCS} + ${list_SRCS} + ) diff -r 7886d2da8cc4 -r ef73b96fdcae src/ctrie.c --- a/src/ctrie.c Tue Oct 02 10:13:07 2012 -0500 +++ b/src/ctrie.c Tue Oct 02 20:00:38 2012 -0500 @@ -27,19 +27,22 @@ static struct list * subkey_list = NULL; static size_t subkey_list_limit = 0; -struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char k); +char * ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data); +struct ctrie_node * ctrie_longest_matching_child(struct ctrie_node * node, char * key); +struct ctrie_node * ctrie_node_new(char * key, void * data); +int ctrie_str_common(char * s1, char * s2); + +void ctrie_print_node(struct ctrie_node * node); +void ctrie_print_key (struct ctrie_node * node); +void ctrie_node_dump (struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *)); + void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key); -void ctrie_print_node(struct ctrie_node * node); -void ctrie_print_node(struct ctrie_node * node); -void ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth); -void ctrie_node_dump_raw(struct ctrie_node * node, int depth); + +struct ctrie_node * ctrie_find_node (struct ctrie_node * node, char * key); +struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char * k); + + void ctrie_subtree_list_push(char * k, void * d); -struct ctrie_node * ctrie_find_node(struct ctrie * ctrie, char * key); - -struct ctrie_node * ctrie_search_children_longest_match(struct ctrie_node * node, char * k); - -#undef MAX -#define MAX (a, b) ((a) > (b) ? (a) : (b)) //////////////////////////////////////////////////////////////////////////////// struct ctrie * @@ -54,10 +57,11 @@ return NULL; ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL; - ctrie->root->key = NULL; + ctrie->root->key = ""; ctrie->root->data = NULL; ctrie->size = ctrie->count = 0; + printf("The root node is %p\n", ctrie->root); return ctrie; } @@ -72,6 +76,43 @@ } //////////////////////////////////////////////////////////////////////////////// +struct ctrie_node * +ctrie_node_new(char * key, void * data) + { + if (NULL == key) + return NULL; + + struct ctrie_node * new = malloc(sizeof(struct ctrie_node)); + if (NULL == new) + return NULL; + + new->key = malloc(strlen(key)); + if (NULL == new->key) + { + free(new); + return NULL; + } + strcpy(new->key, key); + + new->data = data; + new->parent = new->child = new->prev_sibling = new->next_sibling = NULL; + + return new; + } + +//////////////////////////////////////////////////////////////////////////////// +void * +ctrie_node_free(struct ctrie_node * node) + { + if (NULL == node) + return NULL; + void * data = node->data; + free(node->key); + free(node); + return data; + } + +//////////////////////////////////////////////////////////////////////////////// size_t ctrie_size(struct ctrie * ctrie) { @@ -96,131 +137,342 @@ if ( (NULL == ctrie) || (NULL == key) ) return NULL; - struct ctrie_node * curr = ctrie->root; - struct ctrie_node * result; + printf("root node is "); + ctrie_print_node(ctrie->root); + return ctrie_insert_node(ctrie, ctrie->root, key, data); + } - size_t len = strlen(key); - for (int i = 0; i <= len; ++i) +//////////////////////////////////////////////////////////////////////////////// +char * +ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data) + { + if ( (NULL == node) || (NULL == key) ) + return NULL; + + struct ctrie_node * p = ctrie_longest_matching_child(node, key); + + /* + printf("Longest match is "); + if (NULL != p) + ctrie_print_node(p); + else + printf("(nil)\n"); + */ + + if (NULL == p) { - result = ctrie_search_children_longest_match(curr, key+i); - if (NULL == result) + // No matching child found. Create new(key, data) as child of node + struct ctrie_node * new = ctrie_node_new(key, data); + if (NULL == new) + return NULL; + new->parent = node; + + if (NULL == node->child) + node->child = new; + else { - // current char is not in the ctrie yet, allocate a new node and attach it to the children - struct ctrie_node * new = malloc(sizeof(struct ctrie_node)); - if (NULL == new) - return NULL; - new->key = tolower(key[i]); - new->child = new->next_sibling = new->prev_sibling = NULL; - new->data = NULL; + struct ctrie_node * s = node->child; + while (NULL != s->next_sibling) + s = s->next_sibling; + s->next_sibling = new; + new->prev_sibling = s; + } + + ctrie->size++; + ctrie->count++; - if (NULL == curr->child) - curr->child = new; - else - { - struct ctrie_node * p = curr->child; - while (p->next_sibling) - p = p->next_sibling; - new->prev_sibling = p; - p->next_sibling = new; - } + /* + printf("Added "); + ctrie_print_node(new); + */ - new->parent = curr; - curr = new; - ctrie->size++; + return key; + } + else if (ctrie_str_common(p->key, key) == strlen(p->key)) + { + // Found a node whose key matches for it's entire length + if (0 == strcmp(p->key, key)) + { + // Key already exists. Error + return NULL; } else - curr = result; + { + // The node found has a key that is a prefix of the key we are inserting. + // Recurse with the remainder of the key + // printf("Recursing because node's entire key %s is a prefix of %s\n", p->key, key); + return ctrie_insert_node(ctrie, p, key+strlen(p->key), data); + } + } + else + { + // Found a node that matches for only part of it's length. + // Split the node and then recurse. + + /* + printf("splitting this node: "); + ctrie_print_node(p); + */ + + // Get the new shorter key for p. + int matchlen = ctrie_str_common(p->key, key); + char * prefix = malloc(matchlen+1); + if (NULL == prefix) + return NULL; + + strncpy(prefix, key, matchlen); + prefix[matchlen] = '\0'; + + // Create a new node to hold the remainder of the old key and take over the existing children. + struct ctrie_node * new = ctrie_node_new((p->key)+matchlen, p->data); + if (NULL == new) + { + free(prefix); + return NULL; + } + new->parent = p; + new->child = p->child; + new->data = p->data; + + // Update p to point to it's new child and have it's new (shorter) key + free(p->key); + p->key = prefix; + p->child = new; + p->data = NULL; + + ctrie->size++; + + /* + printf("Node was split into these\n"); + ctrie_print_node(p); + ctrie_print_node(new); + */ + + // Now add the user requested new key as a child of p + // printf("Recursing with key %s as a prefix of %s\n", p->key, key); + return ctrie_insert_node(ctrie, p, key + matchlen, data); } - // At this point, curr is either pointing to a new \0 node with a NULL data pointer, - // or an existing \0 node with a valid data pointer (in which case we were asked to "insert" - // a key that was already present in the ctrie. - // For now I am simply ignoring collisions. We will update the data. - - curr->data = data; - - ctrie->count++; - - return key; + // should not get here + return NULL; } //////////////////////////////////////////////////////////////////////////////// struct ctrie_node * -ctrie_search_children_longest_match(struct ctrie_node * node, char * k) +ctrie_longest_matching_child(struct ctrie_node * node, char * key) { + struct ctrie_node * longest = NULL; + int len = 0; + int l; + + printf("Searching for match in children of %p\n", node); + struct ctrie_node * p = node->child; - if (NULL == p) - return NULL; - - size_t klen = strlen(k); - struct ctrie_node * longest = NULL; - size_t length = 0; while (NULL != p) { - size_t len = strlen(p->key); - while (len > 0) + l = ctrie_str_common(p->key, key); + printf("common length is %d\n", l); + if (l > len) { - if ( (0 == strncmp(p->key, k, MAX(len, klen))) && - (length < MAX(len, klen)) ) - { - longest = p; - length = MAX(len, klen); - break; - } - len--; + len = l; + longest = p; } p = p->next_sibling; } - - return p; + return longest; } //////////////////////////////////////////////////////////////////////////////// -struct ctrie_node * -ctrie_search_children(struct ctrie_node * node, char k) +// Returns the number of characters of s1 (starting at the beginning) +// that match the corresponding characters in s2. +// +// eg: ctrie_str_common("asdf", "asdf") returns 4 +// ctrie_str_common("asdf", "asdfer") returns 4 +// ctrie_str_common("asdf", "asgg") returns 2 +// ctrie_str_common("asdf", "gggg") returns 0 + +int +ctrie_str_common(char * s1, char * s2) { - struct ctrie_node * p = node->child; - if (NULL == p) - return NULL; + int i = 0; + while ( (*s1 != '\0') && (*s2 != '\0') && + (*(s1+i) == *(s2+i)) ) + i++; - while ( (NULL != p) && (p->key != tolower(k)) ) - p = p->next_sibling; - return p; + return i; } //////////////////////////////////////////////////////////////////////////////// -struct ctrie_node * -ctrie_find_node(struct ctrie * ctrie, char * key) +void +ctrie_print_node(struct ctrie_node * node) { - if ( (NULL == ctrie) || (NULL == key) ) - return NULL; + if (NULL == node) + printf("(nil node)\n"); + else + printf("%s \t%p\t parent:%p \tchild:%p \tprev:%p \tnext:%p \tdata:%p\n", node->key, node, node->parent, node->child, node->prev_sibling, node->next_sibling, node->data); - struct ctrie_node * node = ctrie->root; - size_t len = strlen(key); - for (int i = 0; i <= len; i++) + } + +//////////////////////////////////////////////////////////////////////////////// +void +ctrie_print_key(struct ctrie_node * node) + { + if (NULL == node) + printf("(nil node)\n"); + else if (NULL == node->key) + printf("(nil key)\n"); + else + printf("%s\n", node->key); + } + +//////////////////////////////////////////////////////////////////////////////// +void ctrie_dump(struct ctrie * ctrie) + { + if (NULL == ctrie) + return; + + int depth = 0; + ctrie_node_dump(ctrie->root, depth, ctrie_print_key); + } + +//////////////////////////////////////////////////////////////////////////////// +void ctrie_dump_raw(struct ctrie * ctrie) + { + if (NULL == ctrie) + return; + + int depth = 0; + ctrie_node_dump(ctrie->root, depth, ctrie_print_node); + } + +//////////////////////////////////////////////////////////////////////////////// +void +ctrie_node_dump(struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *)) + { + for (int i=0; i < depth; i++) + printf(" "); + op(node); + if (node->child) { - node = ctrie_search_children(node, key[i]); - if (NULL == node) - return NULL; - if ('\0' == key[i]) - return node; + int len = 0; + if (NULL != node->key) + len = strlen(node->key); + ctrie_node_dump(node->child, depth+len, op); + } + if (NULL != node->next_sibling) + ctrie_node_dump(node->next_sibling, depth, op); + } + +//////////////////////////////////////////////////////////////////////////////// +void +ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d)) + { + if (NULL == ctrie) + return; + ctrie_visit_node(ctrie, ctrie->root, op, ""); + } + +//////////////////////////////////////////////////////////////////////////////// +void +ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * prefix) + { + if (NULL != node->data) + { + char * k = malloc(strlen(prefix) + strlen(node->key)); + if (NULL == k) + return; + k[0] = '\0'; + strcat(k, prefix); + strcat(k, node->key); + op(k, node->data); + free(k); } - // Should never get here - return NULL; + if(NULL != node->child) + { + char * k = malloc(strlen(prefix) + strlen(node->key)); + if (NULL == k) + return; + k[0] = '\0'; + strcat(k, prefix); + strcat(k, node->key); + struct ctrie_node * p = node->child; + while (NULL != p) + { + ctrie_visit_node(ctrie, p, op, k); + p = p->next_sibling; + } + free(k); + } } //////////////////////////////////////////////////////////////////////////////// void * ctrie_find(struct ctrie * ctrie, char * key) { - struct ctrie_node * node = ctrie_find_node(ctrie, key); + struct ctrie_node * node = ctrie_find_node(ctrie->root, key); if (NULL == node) return NULL; return node->data; } //////////////////////////////////////////////////////////////////////////////// +struct ctrie_node * +ctrie_find_node(struct ctrie_node * node, char * key) + { + if ( (NULL == node) || (NULL == key) ) + return NULL; + + size_t len = strlen(key); + for (int i = 0; i <= len; i += strlen(node->key)) + { + node = ctrie_search_children(node, key+i); + if (NULL == node) + return NULL; + if (NULL != node->data) + return node; + } + + // Should never get here + return NULL; + + struct ctrie_node * p = ctrie_longest_matching_child(node, key); + } + +//////////////////////////////////////////////////////////////////////////////// +struct ctrie_node * +ctrie_search_children(struct ctrie_node * node, char * k) + { + struct ctrie_node * p = node->child; + if (NULL == p) + return NULL; + + size_t len = strlen(node->key); + while ( (NULL != p) && (0 != strncmp(node->key, k, len)) ) + p = p->next_sibling; + return p; + } + + + + + + + + + + + + + + + + + + + + +//////////////////////////////////////////////////////////////////////////////// void * ctrie_remove(struct ctrie * ctrie, char * key) { @@ -261,98 +513,6 @@ //////////////////////////////////////////////////////////////////////////////// void -ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d)) - { - if (NULL == ctrie) - return; - ctrie_visit_node(ctrie, ctrie->root, op, ""); - } - -//////////////////////////////////////////////////////////////////////////////// -void -ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key) - { - if ( (node->key == '\0') && (node != ctrie->root) ) - op(key, node->data); - else - { - size_t len = strlen(key); - char * k = malloc(len+2); - if (NULL == k) - return; - strcpy(k, key); - k[len] = node->key; - k[len+1] = '\0'; - struct ctrie_node * p = node->child; - while (NULL != p) - { - ctrie_visit_node(ctrie, p, op, k); - p = p->next_sibling; - } - free(k); - } - } - -//////////////////////////////////////////////////////////////////////////////// -void ctrie_dump_raw(struct ctrie * ctrie) - { - if (NULL == ctrie) - return; - - int depth = -1; - ctrie_node_dump_raw(ctrie->root, depth); - } - -//////////////////////////////////////////////////////////////////////////////// -void -ctrie_node_dump_raw(struct ctrie_node * node, int depth) - { - for (int i=0; i < depth; i++) - printf(" "); - ctrie_print_node(node); - if (node->child) - ctrie_node_dump_raw(node->child, depth+1); - if (NULL != node->next_sibling) - ctrie_node_dump_raw(node->next_sibling, depth+1); - } - -//////////////////////////////////////////////////////////////////////////////// -void ctrie_dump(struct ctrie * ctrie) - { - if (NULL == ctrie) - return; - - int depth = -1; - ctrie_node_dump_preorder(ctrie, ctrie->root, depth); - } - -//////////////////////////////////////////////////////////////////////////////// -void -ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth) - { - printf("%c", node->key); - if ( (node != ctrie->root) && ('\0' == node->key) ) - printf("\n"); - if (node->child) - ctrie_node_dump_preorder(ctrie, node->child, depth+1); - if (NULL != node->next_sibling) - { - for (int i=0; i < depth; i++) - printf(" "); - ctrie_node_dump_preorder(ctrie, node->next_sibling, depth); - } - } - -//////////////////////////////////////////////////////////////////////////////// -void -ctrie_print_node(struct ctrie_node * node) - { - printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child, - (void *) node->next_sibling, node->key, node->data); - } - -//////////////////////////////////////////////////////////////////////////////// -void ctrie_subtree_list_push(char * k, void * d) { if (NULL == subkey_list) diff -r 7886d2da8cc4 -r ef73b96fdcae src/ctrie_test.c --- a/src/ctrie_test.c Tue Oct 02 10:13:07 2012 -0500 +++ b/src/ctrie_test.c Tue Oct 02 20:00:38 2012 -0500 @@ -28,11 +28,14 @@ "anybody", "any person", "Apple", "A computer manufacturer.", "ABLE", "having necessary power, skill, resources, or qualifications", + /* + */ NULL, NULL }; int i = 0; while (words[i]) { + printf("Adding %s\n", words[i]); ctrie_insert(ctrie, words[i], words[i+1]); i += 2; } @@ -42,6 +45,8 @@ ctrie_walk_keys(ctrie, print_val); printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie)); + exit(EXIT_SUCCESS); + //////////////////////////////////////////// printf("===================================================\n"); char word[100];;