Mercurial > data_structures
changeset 13:7886d2da8cc4
Trie working OK. Started on compact trie, ctrie.
author | Eris Caffee <discordia@eldalin.com> |
---|---|
date | Tue, 02 Oct 2012 10:13:07 -0500 |
parents | d359966ed8de |
children | ef73b96fdcae |
files | CMakeLists.txt src/ctrie.c src/ctrie.h src/ctrie_test.c src/trie.c src/trie.h src/trie_test.c |
diffstat | 7 files changed, 814 insertions(+), 24 deletions(-) [+] |
line diff
1.1 --- a/CMakeLists.txt Mon Oct 01 15:50:30 2012 -0500 1.2 +++ b/CMakeLists.txt Tue Oct 02 10:13:07 2012 -0500 1.3 @@ -102,4 +102,5 @@ 1.4 add_executable (trie_test 1.5 src/trie_test.c 1.6 ${trie_SRCS} 1.7 + ${list_SRCS} 1.8 )
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/src/ctrie.c Tue Oct 02 10:13:07 2012 -0500 2.3 @@ -0,0 +1,410 @@ 2.4 +#include <ctype.h> 2.5 +#include <stdlib.h> 2.6 +#include <stdbool.h> 2.7 +#include <string.h> 2.8 +#include <stdio.h> 2.9 + 2.10 +#include "list.h" 2.11 + 2.12 +struct ctrie_node 2.13 + { 2.14 + char * key; 2.15 + struct ctrie_node * parent; 2.16 + struct ctrie_node * child; 2.17 + struct ctrie_node * next_sibling; 2.18 + struct ctrie_node * prev_sibling; 2.19 + void * data; 2.20 + }; 2.21 + 2.22 +struct ctrie 2.23 + { 2.24 + struct ctrie_node * root; 2.25 + size_t size; 2.26 + size_t count; 2.27 + }; 2.28 + 2.29 + 2.30 +static struct list * subkey_list = NULL; 2.31 +static size_t subkey_list_limit = 0; 2.32 + 2.33 +struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char k); 2.34 +void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key); 2.35 +void ctrie_print_node(struct ctrie_node * node); 2.36 +void ctrie_print_node(struct ctrie_node * node); 2.37 +void ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth); 2.38 +void ctrie_node_dump_raw(struct ctrie_node * node, int depth); 2.39 +void ctrie_subtree_list_push(char * k, void * d); 2.40 +struct ctrie_node * ctrie_find_node(struct ctrie * ctrie, char * key); 2.41 + 2.42 +struct ctrie_node * ctrie_search_children_longest_match(struct ctrie_node * node, char * k); 2.43 + 2.44 +#undef MAX 2.45 +#define MAX (a, b) ((a) > (b) ? (a) : (b)) 2.46 + 2.47 +//////////////////////////////////////////////////////////////////////////////// 2.48 +struct ctrie * 2.49 +ctrie_new() 2.50 + { 2.51 + struct ctrie * ctrie = malloc(sizeof(struct ctrie)); 2.52 + if (NULL == ctrie) 2.53 + return NULL; 2.54 + 2.55 + ctrie->root = malloc(sizeof(struct ctrie_node)); 2.56 + if (NULL == ctrie->root) 2.57 + return NULL; 2.58 + 2.59 + ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL; 2.60 + ctrie->root->key = NULL; 2.61 + ctrie->root->data = NULL; 2.62 + ctrie->size = ctrie->count = 0; 2.63 + 2.64 + return ctrie; 2.65 + } 2.66 + 2.67 +//////////////////////////////////////////////////////////////////////////////// 2.68 +void 2.69 +ctrie_delete(struct ctrie * ctrie) 2.70 + { 2.71 + if (NULL == ctrie) 2.72 + return; 2.73 + 2.74 + free(ctrie); 2.75 + } 2.76 + 2.77 +//////////////////////////////////////////////////////////////////////////////// 2.78 +size_t 2.79 +ctrie_size(struct ctrie * ctrie) 2.80 + { 2.81 + if (NULL == ctrie) 2.82 + return 0; 2.83 + return ctrie->size; 2.84 + } 2.85 + 2.86 +//////////////////////////////////////////////////////////////////////////////// 2.87 +size_t 2.88 +ctrie_count(struct ctrie * ctrie) 2.89 + { 2.90 + if (NULL == ctrie) 2.91 + return 0; 2.92 + return ctrie->count; 2.93 + } 2.94 + 2.95 +//////////////////////////////////////////////////////////////////////////////// 2.96 +char * 2.97 +ctrie_insert(struct ctrie * ctrie, char * key, void * data) 2.98 + { 2.99 + if ( (NULL == ctrie) || (NULL == key) ) 2.100 + return NULL; 2.101 + 2.102 + struct ctrie_node * curr = ctrie->root; 2.103 + struct ctrie_node * result; 2.104 + 2.105 + size_t len = strlen(key); 2.106 + for (int i = 0; i <= len; ++i) 2.107 + { 2.108 + result = ctrie_search_children_longest_match(curr, key+i); 2.109 + if (NULL == result) 2.110 + { 2.111 + // current char is not in the ctrie yet, allocate a new node and attach it to the children 2.112 + struct ctrie_node * new = malloc(sizeof(struct ctrie_node)); 2.113 + if (NULL == new) 2.114 + return NULL; 2.115 + new->key = tolower(key[i]); 2.116 + new->child = new->next_sibling = new->prev_sibling = NULL; 2.117 + new->data = NULL; 2.118 + 2.119 + if (NULL == curr->child) 2.120 + curr->child = new; 2.121 + else 2.122 + { 2.123 + struct ctrie_node * p = curr->child; 2.124 + while (p->next_sibling) 2.125 + p = p->next_sibling; 2.126 + new->prev_sibling = p; 2.127 + p->next_sibling = new; 2.128 + } 2.129 + 2.130 + new->parent = curr; 2.131 + curr = new; 2.132 + ctrie->size++; 2.133 + } 2.134 + else 2.135 + curr = result; 2.136 + } 2.137 + 2.138 + // At this point, curr is either pointing to a new \0 node with a NULL data pointer, 2.139 + // or an existing \0 node with a valid data pointer (in which case we were asked to "insert" 2.140 + // a key that was already present in the ctrie. 2.141 + // For now I am simply ignoring collisions. We will update the data. 2.142 + 2.143 + curr->data = data; 2.144 + 2.145 + ctrie->count++; 2.146 + 2.147 + return key; 2.148 + } 2.149 + 2.150 +//////////////////////////////////////////////////////////////////////////////// 2.151 +struct ctrie_node * 2.152 +ctrie_search_children_longest_match(struct ctrie_node * node, char * k) 2.153 + { 2.154 + struct ctrie_node * p = node->child; 2.155 + if (NULL == p) 2.156 + return NULL; 2.157 + 2.158 + size_t klen = strlen(k); 2.159 + struct ctrie_node * longest = NULL; 2.160 + size_t length = 0; 2.161 + while (NULL != p) 2.162 + { 2.163 + size_t len = strlen(p->key); 2.164 + while (len > 0) 2.165 + { 2.166 + if ( (0 == strncmp(p->key, k, MAX(len, klen))) && 2.167 + (length < MAX(len, klen)) ) 2.168 + { 2.169 + longest = p; 2.170 + length = MAX(len, klen); 2.171 + break; 2.172 + } 2.173 + len--; 2.174 + } 2.175 + p = p->next_sibling; 2.176 + } 2.177 + 2.178 + return p; 2.179 + } 2.180 + 2.181 +//////////////////////////////////////////////////////////////////////////////// 2.182 +struct ctrie_node * 2.183 +ctrie_search_children(struct ctrie_node * node, char k) 2.184 + { 2.185 + struct ctrie_node * p = node->child; 2.186 + if (NULL == p) 2.187 + return NULL; 2.188 + 2.189 + while ( (NULL != p) && (p->key != tolower(k)) ) 2.190 + p = p->next_sibling; 2.191 + return p; 2.192 + } 2.193 + 2.194 +//////////////////////////////////////////////////////////////////////////////// 2.195 +struct ctrie_node * 2.196 +ctrie_find_node(struct ctrie * ctrie, char * key) 2.197 + { 2.198 + if ( (NULL == ctrie) || (NULL == key) ) 2.199 + return NULL; 2.200 + 2.201 + struct ctrie_node * node = ctrie->root; 2.202 + size_t len = strlen(key); 2.203 + for (int i = 0; i <= len; i++) 2.204 + { 2.205 + node = ctrie_search_children(node, key[i]); 2.206 + if (NULL == node) 2.207 + return NULL; 2.208 + if ('\0' == key[i]) 2.209 + return node; 2.210 + } 2.211 + 2.212 + // Should never get here 2.213 + return NULL; 2.214 + } 2.215 + 2.216 +//////////////////////////////////////////////////////////////////////////////// 2.217 +void * 2.218 +ctrie_find(struct ctrie * ctrie, char * key) 2.219 + { 2.220 + struct ctrie_node * node = ctrie_find_node(ctrie, key); 2.221 + if (NULL == node) 2.222 + return NULL; 2.223 + return node->data; 2.224 + } 2.225 + 2.226 +//////////////////////////////////////////////////////////////////////////////// 2.227 +void * 2.228 +ctrie_remove(struct ctrie * ctrie, char * key) 2.229 + { 2.230 + if ( (NULL == ctrie) || (NULL == key) ) 2.231 + return NULL; 2.232 + 2.233 + struct ctrie_node * node = ctrie_find_node(ctrie, key); 2.234 + if (NULL == node) 2.235 + return NULL; 2.236 + 2.237 + void * data = node->data; 2.238 + 2.239 + while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) ) 2.240 + { 2.241 + struct ctrie_node * p = node; 2.242 + node = node->parent; 2.243 + free(p); 2.244 + ctrie->size--; 2.245 + } 2.246 + 2.247 + if (node == ctrie->root) 2.248 + return data; 2.249 + 2.250 + if (node->prev_sibling) 2.251 + node->prev_sibling->next_sibling = node->next_sibling; 2.252 + else 2.253 + node->parent->child = node->next_sibling; 2.254 + 2.255 + if (node->next_sibling) 2.256 + node->next_sibling->prev_sibling = node->prev_sibling; 2.257 + 2.258 + free(node); 2.259 + 2.260 + ctrie->count--; 2.261 + 2.262 + return data; 2.263 + } 2.264 + 2.265 +//////////////////////////////////////////////////////////////////////////////// 2.266 +void 2.267 +ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d)) 2.268 + { 2.269 + if (NULL == ctrie) 2.270 + return; 2.271 + ctrie_visit_node(ctrie, ctrie->root, op, ""); 2.272 + } 2.273 + 2.274 +//////////////////////////////////////////////////////////////////////////////// 2.275 +void 2.276 +ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key) 2.277 + { 2.278 + if ( (node->key == '\0') && (node != ctrie->root) ) 2.279 + op(key, node->data); 2.280 + else 2.281 + { 2.282 + size_t len = strlen(key); 2.283 + char * k = malloc(len+2); 2.284 + if (NULL == k) 2.285 + return; 2.286 + strcpy(k, key); 2.287 + k[len] = node->key; 2.288 + k[len+1] = '\0'; 2.289 + struct ctrie_node * p = node->child; 2.290 + while (NULL != p) 2.291 + { 2.292 + ctrie_visit_node(ctrie, p, op, k); 2.293 + p = p->next_sibling; 2.294 + } 2.295 + free(k); 2.296 + } 2.297 + } 2.298 + 2.299 +//////////////////////////////////////////////////////////////////////////////// 2.300 +void ctrie_dump_raw(struct ctrie * ctrie) 2.301 + { 2.302 + if (NULL == ctrie) 2.303 + return; 2.304 + 2.305 + int depth = -1; 2.306 + ctrie_node_dump_raw(ctrie->root, depth); 2.307 + } 2.308 + 2.309 +//////////////////////////////////////////////////////////////////////////////// 2.310 +void 2.311 +ctrie_node_dump_raw(struct ctrie_node * node, int depth) 2.312 + { 2.313 + for (int i=0; i < depth; i++) 2.314 + printf(" "); 2.315 + ctrie_print_node(node); 2.316 + if (node->child) 2.317 + ctrie_node_dump_raw(node->child, depth+1); 2.318 + if (NULL != node->next_sibling) 2.319 + ctrie_node_dump_raw(node->next_sibling, depth+1); 2.320 + } 2.321 + 2.322 +//////////////////////////////////////////////////////////////////////////////// 2.323 +void ctrie_dump(struct ctrie * ctrie) 2.324 + { 2.325 + if (NULL == ctrie) 2.326 + return; 2.327 + 2.328 + int depth = -1; 2.329 + ctrie_node_dump_preorder(ctrie, ctrie->root, depth); 2.330 + } 2.331 + 2.332 +//////////////////////////////////////////////////////////////////////////////// 2.333 +void 2.334 +ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth) 2.335 + { 2.336 + printf("%c", node->key); 2.337 + if ( (node != ctrie->root) && ('\0' == node->key) ) 2.338 + printf("\n"); 2.339 + if (node->child) 2.340 + ctrie_node_dump_preorder(ctrie, node->child, depth+1); 2.341 + if (NULL != node->next_sibling) 2.342 + { 2.343 + for (int i=0; i < depth; i++) 2.344 + printf(" "); 2.345 + ctrie_node_dump_preorder(ctrie, node->next_sibling, depth); 2.346 + } 2.347 + } 2.348 + 2.349 +//////////////////////////////////////////////////////////////////////////////// 2.350 +void 2.351 +ctrie_print_node(struct ctrie_node * node) 2.352 + { 2.353 + printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child, 2.354 + (void *) node->next_sibling, node->key, node->data); 2.355 + } 2.356 + 2.357 +//////////////////////////////////////////////////////////////////////////////// 2.358 +void 2.359 +ctrie_subtree_list_push(char * k, void * d) 2.360 + { 2.361 + if (NULL == subkey_list) 2.362 + return; 2.363 + 2.364 + if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) ) 2.365 + list_push_front(subkey_list, k); 2.366 + } 2.367 + 2.368 +//////////////////////////////////////////////////////////////////////////////// 2.369 +// Not reentrant since it uses the package static subkeys_list variable. 2.370 +// However, that variable is only used while building a subkey list, so you can 2.371 +// get more than one subkey if you want. Just don't try to get them simultaneously. 2.372 +struct list * 2.373 +ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit) 2.374 + { 2.375 + if ( (NULL == ctrie) || (NULL == prefix) ) 2.376 + return NULL; 2.377 + 2.378 + struct ctrie_node * node = ctrie_find_node(ctrie, prefix); 2.379 + if (NULL == node) 2.380 + return NULL; 2.381 + 2.382 + subkey_list = list_new(); 2.383 + if (NULL == subkey_list) 2.384 + return NULL; 2.385 + subkey_list_limit = limit; 2.386 + 2.387 + size_t len = strlen(prefix); 2.388 + char * pref = malloc(len+1); 2.389 + if (NULL == pref) 2.390 + return NULL; 2.391 + strncpy(pref, prefix, len-1); 2.392 + pref[len-1] = '\0'; 2.393 + 2.394 + ctrie_visit_node(ctrie, node->parent, ctrie_subtree_list_push, pref); 2.395 + free(pref); 2.396 + 2.397 + return subkey_list; 2.398 + } 2.399 + 2.400 +//////////////////////////////////////////////////////////////////////////////// 2.401 +void 2.402 +ctrie_free_subkey_list(struct list * list) 2.403 + { 2.404 + if (NULL == list) 2.405 + return; 2.406 + 2.407 + struct list_iterator * it = list_begin(list); 2.408 + void * data; 2.409 + while (data = list_remove(it)) 2.410 + free(data); 2.411 + 2.412 + list_delete(list); 2.413 + }
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/ctrie.h Tue Oct 02 10:13:07 2012 -0500 3.3 @@ -0,0 +1,27 @@ 3.4 +#ifndef CTRIE_H_ 3.5 +#define CTRIE_H_ 3.6 + 3.7 +// Compact trie - internal nodes are collapsed together if they have no siblings. 3.8 + 3.9 +struct ctrie; 3.10 + 3.11 +struct ctrie * ctrie_new(); 3.12 +void ctrie_delete(struct ctrie * ctrie); 3.13 + 3.14 +size_t ctrie_size(struct ctrie * ctrie); 3.15 +size_t ctrie_count(struct ctrie * ctrie); 3.16 + 3.17 +char * ctrie_insert(struct ctrie * ctrie, char * key, void * data); 3.18 +void * ctrie_find(struct ctrie * ctrie, char * key); 3.19 +void * ctrie_remove(struct ctrie * ctrie, char * key); 3.20 + 3.21 +void ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d) ); 3.22 + 3.23 +void ctrie_dump(struct ctrie * ctrie); 3.24 +void ctrie_dump_raw(struct ctrie * ctrie); 3.25 + 3.26 +struct list * ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit); 3.27 +void ctrie_free_subkey_list(struct list * list); 3.28 + 3.29 + 3.30 +#endif
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/src/ctrie_test.c Tue Oct 02 10:13:07 2012 -0500 4.3 @@ -0,0 +1,128 @@ 4.4 +#include <stdio.h> 4.5 +#include <stdlib.h> 4.6 +#include <string.h> 4.7 + 4.8 +#include "list.h" 4.9 +#include "ctrie.h" 4.10 + 4.11 +void print_val(char * key, void * data) 4.12 + { 4.13 + printf("%s\t\t%s\n", key, data); 4.14 + } 4.15 + 4.16 +int main(int argc, char ** argv) 4.17 + { 4.18 + struct ctrie * ctrie = ctrie_new(); 4.19 + 4.20 + char * words[] = { "hello", "interjection (used to express a greeting, answer a telephone, or attract attention.)", 4.21 + "help", "to give or provide what is necessary to accomplish a task or satisfy a need.", 4.22 + "helper", "a person or thing that helps or gives assistance, support, etc", 4.23 + "helping", "the act of a person or thing that helps.", 4.24 + "an", "indefinite article. the form of a before an initial vowel sound.", 4.25 + "a", "indefinite article. Used before an initial consonant sound.", 4.26 + "anne", "a female name", 4.27 + "any", "one, a, an, or some; one or more without specification or identification", 4.28 + "anyway", "in any case; anyhow; nonetheless; regardless", 4.29 + "anna", "a female name", 4.30 + "anyhow", "in any way whatever", 4.31 + "anybody", "any person", 4.32 + "Apple", "A computer manufacturer.", 4.33 + "ABLE", "having necessary power, skill, resources, or qualifications", 4.34 + NULL, NULL 4.35 + }; 4.36 + int i = 0; 4.37 + while (words[i]) 4.38 + { 4.39 + ctrie_insert(ctrie, words[i], words[i+1]); 4.40 + i += 2; 4.41 + } 4.42 + 4.43 + ctrie_dump_raw(ctrie); 4.44 + ctrie_dump(ctrie); 4.45 + ctrie_walk_keys(ctrie, print_val); 4.46 + printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie)); 4.47 + 4.48 + //////////////////////////////////////////// 4.49 + printf("===================================================\n"); 4.50 + char word[100];; 4.51 + strcpy(word, "any"); 4.52 + printf("searching for %s\n", word); 4.53 + 4.54 + void * data = ctrie_find(ctrie, word); 4.55 + if (NULL == data) 4.56 + printf("Failed to find 'any'\n"); 4.57 + else 4.58 + printf("any: %s\n", (char *) data); 4.59 + 4.60 + 4.61 + //////////////////////////////////////////// 4.62 + printf("===================================================\n"); 4.63 + strcpy(word, "an"); 4.64 + printf("Looking for completions of '%s':\n", word); 4.65 + struct list * list = ctrie_get_subkeys(ctrie, word, 0); 4.66 + if (NULL == list) 4.67 + printf("Nothing found for '%s'\n", word); 4.68 + else 4.69 + { 4.70 + printf("list size is %d\n", list_size(list)); 4.71 + struct list_iterator * it = list_begin(list); 4.72 + struct list_iterator * next = it; 4.73 + while (NULL != next) 4.74 + { 4.75 + printf("%s\n", list_value(it)); 4.76 + next = list_next(it); 4.77 + } 4.78 + ctrie_free_subkey_list(list); 4.79 + } 4.80 + 4.81 + //////////////////////////////////////////// 4.82 + printf("===================================================\n"); 4.83 + strcpy(word, "a"); 4.84 + printf("Looking for first 5 completions of '%s':\n", word); 4.85 + list = ctrie_get_subkeys(ctrie, word, 5); 4.86 + if (NULL == list) 4.87 + printf("Nothing found for '%s'\n", word); 4.88 + else 4.89 + { 4.90 + printf("list size is %d\n", list_size(list)); 4.91 + struct list_iterator * it = list_begin(list); 4.92 + struct list_iterator * next = it; 4.93 + while (NULL != next) 4.94 + { 4.95 + printf("%s\n", list_value(it)); 4.96 + next = list_next(it); 4.97 + } 4.98 + ctrie_free_subkey_list(list); 4.99 + } 4.100 + 4.101 + //////////////////////////////////////////// 4.102 + printf("===================================================\n"); 4.103 + strcpy(word, "anybody"); 4.104 + printf("remove '%s'\n", word); 4.105 + ctrie_remove(ctrie, word); 4.106 + 4.107 + strcpy(word, "ann"); 4.108 + printf("remove '%s'\n", word); 4.109 + ctrie_remove(ctrie, word); 4.110 + 4.111 + strcpy(word, "help"); 4.112 + printf("remove '%s'\n", word); 4.113 + ctrie_remove(ctrie, word); 4.114 + 4.115 + strcpy(word, "a"); 4.116 + printf("remove '%s'\n", word); 4.117 + ctrie_remove(ctrie, word); 4.118 + 4.119 + ctrie_dump(ctrie); 4.120 + printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie)); 4.121 + 4.122 + //////////////////////////////////////////// 4.123 + printf("===================================================\n"); 4.124 + strcpy(word, "anybody"); 4.125 + printf("re-inserting '%s'\n", word); 4.126 + ctrie_insert(ctrie, word, "fake definition"); 4.127 + ctrie_dump(ctrie); 4.128 + printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie)); 4.129 + 4.130 + exit(EXIT_SUCCESS); 4.131 + }
5.1 --- a/src/trie.c Mon Oct 01 15:50:30 2012 -0500 5.2 +++ b/src/trie.c Tue Oct 02 10:13:07 2012 -0500 5.3 @@ -2,30 +2,39 @@ 5.4 #include <stdlib.h> 5.5 #include <stdbool.h> 5.6 #include <string.h> 5.7 - 5.8 - 5.9 #include <stdio.h> 5.10 5.11 +#include "list.h" 5.12 5.13 struct trie_node 5.14 { 5.15 char key; 5.16 + struct trie_node * parent; 5.17 struct trie_node * child; 5.18 - struct trie_node * sibling; 5.19 + struct trie_node * next_sibling; 5.20 + struct trie_node * prev_sibling; 5.21 void * data; 5.22 }; 5.23 5.24 struct trie 5.25 { 5.26 struct trie_node * root; 5.27 + size_t size; 5.28 + size_t count; 5.29 }; 5.30 5.31 + 5.32 +static struct list * subkey_list = NULL; 5.33 +static size_t subkey_list_limit = 0; 5.34 + 5.35 struct trie_node * trie_search_children(struct trie_node * node, char k); 5.36 void trie_visit_node(struct trie * trie, struct trie_node * node, void (*op)(char * k, void * d), char * key); 5.37 void trie_print_node(struct trie_node * node); 5.38 void trie_print_node(struct trie_node * node); 5.39 void trie_node_dump_preorder(struct trie * trie, struct trie_node * node, int depth); 5.40 void trie_node_dump_raw(struct trie_node * node, int depth); 5.41 +void trie_subtree_list_push(char * k, void * d); 5.42 +struct trie_node * trie_find_node(struct trie * trie, char * key); 5.43 5.44 //////////////////////////////////////////////////////////////////////////////// 5.45 struct trie * 5.46 @@ -39,9 +48,10 @@ 5.47 if (NULL == trie->root) 5.48 return NULL; 5.49 5.50 - trie->root->child = trie->root->sibling = NULL; 5.51 + trie->root->parent = trie->root->child = trie->root->next_sibling = NULL; 5.52 trie->root->key = '\0'; 5.53 trie->root->data = NULL; 5.54 + trie->size = trie->count = 0; 5.55 5.56 return trie; 5.57 } 5.58 @@ -57,6 +67,24 @@ 5.59 } 5.60 5.61 //////////////////////////////////////////////////////////////////////////////// 5.62 +size_t 5.63 +trie_size(struct trie * trie) 5.64 + { 5.65 + if (NULL == trie) 5.66 + return 0; 5.67 + return trie->size; 5.68 + } 5.69 + 5.70 +//////////////////////////////////////////////////////////////////////////////// 5.71 +size_t 5.72 +trie_count(struct trie * trie) 5.73 + { 5.74 + if (NULL == trie) 5.75 + return 0; 5.76 + return trie->count; 5.77 + } 5.78 + 5.79 +//////////////////////////////////////////////////////////////////////////////// 5.80 char * 5.81 trie_insert(struct trie * trie, char * key, void * data) 5.82 { 5.83 @@ -77,7 +105,7 @@ 5.84 if (NULL == new) 5.85 return NULL; 5.86 new->key = tolower(key[i]); 5.87 - new->child = new->sibling = NULL; 5.88 + new->child = new->next_sibling = new->prev_sibling = NULL; 5.89 new->data = NULL; 5.90 5.91 if (NULL == curr->child) 5.92 @@ -85,12 +113,15 @@ 5.93 else 5.94 { 5.95 struct trie_node * p = curr->child; 5.96 - while (p->sibling) 5.97 - p = p->sibling; 5.98 - p->sibling = new; 5.99 + while (p->next_sibling) 5.100 + p = p->next_sibling; 5.101 + new->prev_sibling = p; 5.102 + p->next_sibling = new; 5.103 } 5.104 5.105 + new->parent = curr; 5.106 curr = new; 5.107 + trie->size++; 5.108 } 5.109 else 5.110 curr = result; 5.111 @@ -102,6 +133,9 @@ 5.112 // For now I am simply ignoring collisions. We will update the data. 5.113 5.114 curr->data = data; 5.115 + 5.116 + trie->count++; 5.117 + 5.118 return key; 5.119 } 5.120 5.121 @@ -114,13 +148,13 @@ 5.122 return NULL; 5.123 5.124 while ( (NULL != p) && (p->key != tolower(k)) ) 5.125 - p = p->sibling; 5.126 + p = p->next_sibling; 5.127 return p; 5.128 } 5.129 5.130 //////////////////////////////////////////////////////////////////////////////// 5.131 -void * 5.132 -trie_find(struct trie * trie, char * key) 5.133 +struct trie_node * 5.134 +trie_find_node(struct trie * trie, char * key) 5.135 { 5.136 if ( (NULL == trie) || (NULL == key) ) 5.137 return NULL; 5.138 @@ -133,7 +167,7 @@ 5.139 if (NULL == node) 5.140 return NULL; 5.141 if ('\0' == key[i]) 5.142 - return node->data; 5.143 + return node; 5.144 } 5.145 5.146 // Should never get here 5.147 @@ -142,9 +176,51 @@ 5.148 5.149 //////////////////////////////////////////////////////////////////////////////// 5.150 void * 5.151 +trie_find(struct trie * trie, char * key) 5.152 + { 5.153 + struct trie_node * node = trie_find_node(trie, key); 5.154 + if (NULL == node) 5.155 + return NULL; 5.156 + return node->data; 5.157 + } 5.158 + 5.159 +//////////////////////////////////////////////////////////////////////////////// 5.160 +void * 5.161 trie_remove(struct trie * trie, char * key) 5.162 { 5.163 - return NULL; 5.164 + if ( (NULL == trie) || (NULL == key) ) 5.165 + return NULL; 5.166 + 5.167 + struct trie_node * node = trie_find_node(trie, key); 5.168 + if (NULL == node) 5.169 + return NULL; 5.170 + 5.171 + void * data = node->data; 5.172 + 5.173 + while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) ) 5.174 + { 5.175 + struct trie_node * p = node; 5.176 + node = node->parent; 5.177 + free(p); 5.178 + trie->size--; 5.179 + } 5.180 + 5.181 + if (node == trie->root) 5.182 + return data; 5.183 + 5.184 + if (node->prev_sibling) 5.185 + node->prev_sibling->next_sibling = node->next_sibling; 5.186 + else 5.187 + node->parent->child = node->next_sibling; 5.188 + 5.189 + if (node->next_sibling) 5.190 + node->next_sibling->prev_sibling = node->prev_sibling; 5.191 + 5.192 + free(node); 5.193 + 5.194 + trie->count--; 5.195 + 5.196 + return data; 5.197 } 5.198 5.199 //////////////////////////////////////////////////////////////////////////////// 5.200 @@ -175,8 +251,9 @@ 5.201 while (NULL != p) 5.202 { 5.203 trie_visit_node(trie, p, op, k); 5.204 - p = p->sibling; 5.205 + p = p->next_sibling; 5.206 } 5.207 + free(k); 5.208 } 5.209 } 5.210 5.211 @@ -199,8 +276,8 @@ 5.212 trie_print_node(node); 5.213 if (node->child) 5.214 trie_node_dump_raw(node->child, depth+1); 5.215 - if (NULL != node->sibling) 5.216 - trie_node_dump_raw(node->sibling, depth+1); 5.217 + if (NULL != node->next_sibling) 5.218 + trie_node_dump_raw(node->next_sibling, depth+1); 5.219 } 5.220 5.221 //////////////////////////////////////////////////////////////////////////////// 5.222 @@ -222,11 +299,11 @@ 5.223 printf("\n"); 5.224 if (node->child) 5.225 trie_node_dump_preorder(trie, node->child, depth+1); 5.226 - if (NULL != node->sibling) 5.227 + if (NULL != node->next_sibling) 5.228 { 5.229 for (int i=0; i < depth; i++) 5.230 printf(" "); 5.231 - trie_node_dump_preorder(trie, node->sibling, depth); 5.232 + trie_node_dump_preorder(trie, node->next_sibling, depth); 5.233 } 5.234 } 5.235 5.236 @@ -234,5 +311,64 @@ 5.237 void 5.238 trie_print_node(struct trie_node * node) 5.239 { 5.240 - printf("%p\tchild %p\tsibling %p\tkey %c\tdata %p\n", node, node->child, node->sibling, node->key, node->data); 5.241 + printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child, 5.242 + (void *) node->next_sibling, node->key, node->data); 5.243 } 5.244 + 5.245 +//////////////////////////////////////////////////////////////////////////////// 5.246 +void 5.247 +trie_subtree_list_push(char * k, void * d) 5.248 + { 5.249 + if (NULL == subkey_list) 5.250 + return; 5.251 + 5.252 + if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) ) 5.253 + list_push_front(subkey_list, k); 5.254 + } 5.255 + 5.256 +//////////////////////////////////////////////////////////////////////////////// 5.257 +// Not reentrant since it uses the package static subkeys_list variable. 5.258 +// However, that variable is only used while building a subkey list, so you can 5.259 +// get more than one subkey if you want. Just don't try to get them simultaneously. 5.260 +struct list * 5.261 +trie_get_subkeys(struct trie * trie, char * prefix, size_t limit) 5.262 + { 5.263 + if ( (NULL == trie) || (NULL == prefix) ) 5.264 + return NULL; 5.265 + 5.266 + struct trie_node * node = trie_find_node(trie, prefix); 5.267 + if (NULL == node) 5.268 + return NULL; 5.269 + 5.270 + subkey_list = list_new(); 5.271 + if (NULL == subkey_list) 5.272 + return NULL; 5.273 + subkey_list_limit = limit; 5.274 + 5.275 + size_t len = strlen(prefix); 5.276 + char * pref = malloc(len+1); 5.277 + if (NULL == pref) 5.278 + return NULL; 5.279 + strncpy(pref, prefix, len-1); 5.280 + pref[len-1] = '\0'; 5.281 + 5.282 + trie_visit_node(trie, node->parent, trie_subtree_list_push, pref); 5.283 + free(pref); 5.284 + 5.285 + return subkey_list; 5.286 + } 5.287 + 5.288 +//////////////////////////////////////////////////////////////////////////////// 5.289 +void 5.290 +trie_free_subkey_list(struct list * list) 5.291 + { 5.292 + if (NULL == list) 5.293 + return; 5.294 + 5.295 + struct list_iterator * it = list_begin(list); 5.296 + void * data; 5.297 + while (data = list_remove(it)) 5.298 + free(data); 5.299 + 5.300 + list_delete(list); 5.301 + }
6.1 --- a/src/trie.h Mon Oct 01 15:50:30 2012 -0500 6.2 +++ b/src/trie.h Tue Oct 02 10:13:07 2012 -0500 6.3 @@ -6,6 +6,9 @@ 6.4 struct trie * trie_new(); 6.5 void trie_delete(struct trie * trie); 6.6 6.7 +size_t trie_size(struct trie * trie); 6.8 +size_t trie_count(struct trie * trie); 6.9 + 6.10 char * trie_insert(struct trie * trie, char * key, void * data); 6.11 void * trie_find(struct trie * trie, char * key); 6.12 void * trie_remove(struct trie * trie, char * key); 6.13 @@ -15,4 +18,8 @@ 6.14 void trie_dump(struct trie * trie); 6.15 void trie_dump_raw(struct trie * trie); 6.16 6.17 +struct list * trie_get_subkeys(struct trie * trie, char * prefix, size_t limit); 6.18 +void trie_free_subkey_list(struct list * list); 6.19 + 6.20 + 6.21 #endif
7.1 --- a/src/trie_test.c Mon Oct 01 15:50:30 2012 -0500 7.2 +++ b/src/trie_test.c Tue Oct 02 10:13:07 2012 -0500 7.3 @@ -1,11 +1,13 @@ 7.4 #include <stdio.h> 7.5 #include <stdlib.h> 7.6 +#include <string.h> 7.7 7.8 +#include "list.h" 7.9 #include "trie.h" 7.10 7.11 void print_val(char * key, void * data) 7.12 { 7.13 - printf("%s\n", key); 7.14 + printf("%s\t\t%s\n", key, data); 7.15 } 7.16 7.17 int main(int argc, char ** argv) 7.18 @@ -16,12 +18,12 @@ 7.19 "help", "to give or provide what is necessary to accomplish a task or satisfy a need.", 7.20 "helper", "a person or thing that helps or gives assistance, support, etc", 7.21 "helping", "the act of a person or thing that helps.", 7.22 + "an", "indefinite article. the form of a before an initial vowel sound.", 7.23 "a", "indefinite article. Used before an initial consonant sound.", 7.24 - "an", "indefinite article. the form of a before an initial vowel sound.", 7.25 - "anna", "a female name", 7.26 "anne", "a female name", 7.27 "any", "one, a, an, or some; one or more without specification or identification", 7.28 - "anyway", "in any case; anyhow; nonetheless; regardless", 7.29 + "anyway", "in any case; anyhow; nonetheless; regardless", 7.30 + "anna", "a female name", 7.31 "anyhow", "in any way whatever", 7.32 "anybody", "any person", 7.33 "Apple", "A computer manufacturer.", 7.34 @@ -35,13 +37,92 @@ 7.35 i += 2; 7.36 } 7.37 7.38 + trie_dump_raw(trie); 7.39 trie_dump(trie); 7.40 + trie_walk_keys(trie, print_val); 7.41 + printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie)); 7.42 7.43 - void * data = trie_find(trie, "any"); 7.44 + //////////////////////////////////////////// 7.45 + printf("===================================================\n"); 7.46 + char word[100];; 7.47 + strcpy(word, "any"); 7.48 + printf("searching for %s\n", word); 7.49 + 7.50 + void * data = trie_find(trie, word); 7.51 if (NULL == data) 7.52 printf("Failed to find 'any'\n"); 7.53 else 7.54 printf("any: %s\n", (char *) data); 7.55 7.56 + 7.57 + //////////////////////////////////////////// 7.58 + printf("===================================================\n"); 7.59 + strcpy(word, "an"); 7.60 + printf("Looking for completions of '%s':\n", word); 7.61 + struct list * list = trie_get_subkeys(trie, word, 0); 7.62 + if (NULL == list) 7.63 + printf("Nothing found for '%s'\n", word); 7.64 + else 7.65 + { 7.66 + printf("list size is %d\n", list_size(list)); 7.67 + struct list_iterator * it = list_begin(list); 7.68 + struct list_iterator * next = it; 7.69 + while (NULL != next) 7.70 + { 7.71 + printf("%s\n", list_value(it)); 7.72 + next = list_next(it); 7.73 + } 7.74 + trie_free_subkey_list(list); 7.75 + } 7.76 + 7.77 + //////////////////////////////////////////// 7.78 + printf("===================================================\n"); 7.79 + strcpy(word, "a"); 7.80 + printf("Looking for first 5 completions of '%s':\n", word); 7.81 + list = trie_get_subkeys(trie, word, 5); 7.82 + if (NULL == list) 7.83 + printf("Nothing found for '%s'\n", word); 7.84 + else 7.85 + { 7.86 + printf("list size is %d\n", list_size(list)); 7.87 + struct list_iterator * it = list_begin(list); 7.88 + struct list_iterator * next = it; 7.89 + while (NULL != next) 7.90 + { 7.91 + printf("%s\n", list_value(it)); 7.92 + next = list_next(it); 7.93 + } 7.94 + trie_free_subkey_list(list); 7.95 + } 7.96 + 7.97 + //////////////////////////////////////////// 7.98 + printf("===================================================\n"); 7.99 + strcpy(word, "anybody"); 7.100 + printf("remove '%s'\n", word); 7.101 + trie_remove(trie, word); 7.102 + 7.103 + strcpy(word, "ann"); 7.104 + printf("remove '%s'\n", word); 7.105 + trie_remove(trie, word); 7.106 + 7.107 + strcpy(word, "help"); 7.108 + printf("remove '%s'\n", word); 7.109 + trie_remove(trie, word); 7.110 + 7.111 + strcpy(word, "a"); 7.112 + printf("remove '%s'\n", word); 7.113 + trie_remove(trie, word); 7.114 + 7.115 + trie_dump(trie); 7.116 + printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie)); 7.117 + 7.118 + //////////////////////////////////////////// 7.119 + printf("===================================================\n"); 7.120 + strcpy(word, "anybody"); 7.121 + printf("re-inserting '%s'\n", word); 7.122 + trie_insert(trie, word, "fake definition"); 7.123 + trie_dump(trie); 7.124 + printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie)); 7.125 + 7.126 exit(EXIT_SUCCESS); 7.127 }