changeset 13:7886d2da8cc4

Trie working OK. Started on compact trie, ctrie.
author Eris Caffee <discordia@eldalin.com>
date Tue, 02 Oct 2012 10:13:07 -0500
parents d359966ed8de
children ef73b96fdcae
files CMakeLists.txt src/ctrie.c src/ctrie.h src/ctrie_test.c src/trie.c src/trie.h src/trie_test.c
diffstat 7 files changed, 814 insertions(+), 24 deletions(-) [+]
line diff
     1.1 --- a/CMakeLists.txt	Mon Oct 01 15:50:30 2012 -0500
     1.2 +++ b/CMakeLists.txt	Tue Oct 02 10:13:07 2012 -0500
     1.3 @@ -102,4 +102,5 @@
     1.4  add_executable (trie_test
     1.5    src/trie_test.c
     1.6    ${trie_SRCS}
     1.7 +  ${list_SRCS}
     1.8    )
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/ctrie.c	Tue Oct 02 10:13:07 2012 -0500
     2.3 @@ -0,0 +1,410 @@
     2.4 +#include <ctype.h>
     2.5 +#include <stdlib.h>
     2.6 +#include <stdbool.h>
     2.7 +#include <string.h>
     2.8 +#include <stdio.h>
     2.9 +
    2.10 +#include "list.h"
    2.11 +
    2.12 +struct ctrie_node
    2.13 +   {
    2.14 +   char * key;
    2.15 +   struct ctrie_node * parent;
    2.16 +   struct ctrie_node * child;
    2.17 +   struct ctrie_node * next_sibling;
    2.18 +   struct ctrie_node * prev_sibling;
    2.19 +   void * data;
    2.20 +   };
    2.21 +
    2.22 +struct ctrie
    2.23 +   {
    2.24 +   struct ctrie_node * root;
    2.25 +   size_t size;
    2.26 +   size_t count;
    2.27 +   };
    2.28 +
    2.29 +
    2.30 +static struct list * subkey_list = NULL;
    2.31 +static size_t subkey_list_limit = 0;
    2.32 +
    2.33 +struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char k);
    2.34 +void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key);
    2.35 +void ctrie_print_node(struct ctrie_node * node);
    2.36 +void ctrie_print_node(struct ctrie_node * node);
    2.37 +void ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth);
    2.38 +void ctrie_node_dump_raw(struct ctrie_node * node, int depth);
    2.39 +void ctrie_subtree_list_push(char * k, void * d);
    2.40 +struct ctrie_node * ctrie_find_node(struct ctrie * ctrie, char * key);
    2.41 +
    2.42 +struct ctrie_node * ctrie_search_children_longest_match(struct ctrie_node * node, char * k);
    2.43 +
    2.44 +#undef MAX
    2.45 +#define MAX (a, b) ((a) > (b) ? (a) : (b))
    2.46 +
    2.47 +////////////////////////////////////////////////////////////////////////////////
    2.48 +struct ctrie *
    2.49 +ctrie_new()
    2.50 +   {
    2.51 +   struct ctrie * ctrie = malloc(sizeof(struct ctrie));
    2.52 +   if (NULL == ctrie)
    2.53 +      return NULL;
    2.54 +
    2.55 +   ctrie->root = malloc(sizeof(struct ctrie_node));
    2.56 +   if (NULL == ctrie->root)
    2.57 +      return NULL;
    2.58 +
    2.59 +   ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL;
    2.60 +   ctrie->root->key = NULL;
    2.61 +   ctrie->root->data = NULL;
    2.62 +   ctrie->size = ctrie->count = 0;
    2.63 +
    2.64 +   return ctrie;
    2.65 +   }
    2.66 +
    2.67 +////////////////////////////////////////////////////////////////////////////////
    2.68 +void
    2.69 +ctrie_delete(struct ctrie * ctrie)
    2.70 +   {
    2.71 +   if (NULL == ctrie)
    2.72 +      return;
    2.73 +
    2.74 +   free(ctrie);
    2.75 +   }
    2.76 +
    2.77 +////////////////////////////////////////////////////////////////////////////////
    2.78 +size_t
    2.79 +ctrie_size(struct ctrie * ctrie)
    2.80 +   {
    2.81 +   if (NULL == ctrie)
    2.82 +      return 0;
    2.83 +   return ctrie->size;
    2.84 +   }
    2.85 +
    2.86 +////////////////////////////////////////////////////////////////////////////////
    2.87 +size_t
    2.88 +ctrie_count(struct ctrie * ctrie)
    2.89 +   {
    2.90 +   if (NULL == ctrie)
    2.91 +      return 0;
    2.92 +   return ctrie->count;
    2.93 +   }
    2.94 +
    2.95 +////////////////////////////////////////////////////////////////////////////////
    2.96 +char *
    2.97 +ctrie_insert(struct ctrie * ctrie, char * key, void * data)
    2.98 +   {
    2.99 +   if ( (NULL == ctrie) || (NULL == key) )
   2.100 +      return NULL;
   2.101 +
   2.102 +   struct ctrie_node * curr = ctrie->root;
   2.103 +   struct ctrie_node * result;
   2.104 +
   2.105 +   size_t len = strlen(key);
   2.106 +   for (int i = 0; i <= len; ++i)
   2.107 +      {
   2.108 +      result = ctrie_search_children_longest_match(curr, key+i);
   2.109 +      if (NULL == result)
   2.110 +	 {
   2.111 +	 // current char is not in the ctrie yet, allocate a new node and attach it to the children
   2.112 +	 struct ctrie_node * new = malloc(sizeof(struct ctrie_node));
   2.113 +	 if (NULL == new)
   2.114 +	    return NULL;
   2.115 +	 new->key = tolower(key[i]);
   2.116 +	 new->child = new->next_sibling = new->prev_sibling = NULL;
   2.117 +	 new->data = NULL;
   2.118 +
   2.119 +	 if (NULL == curr->child)
   2.120 +	    curr->child = new;
   2.121 +	 else
   2.122 +	    {
   2.123 +	    struct ctrie_node * p = curr->child;
   2.124 +	    while (p->next_sibling)
   2.125 +	       p = p->next_sibling;
   2.126 +	    new->prev_sibling = p;
   2.127 +	    p->next_sibling = new;
   2.128 +	    }
   2.129 +
   2.130 +	 new->parent = curr;
   2.131 +	 curr = new;
   2.132 +	 ctrie->size++;
   2.133 +	 }
   2.134 +      else
   2.135 +	 curr = result;
   2.136 +      }
   2.137 +
   2.138 +   // At this point, curr is either pointing to a new \0 node with a NULL data pointer,
   2.139 +   // or an existing \0 node with a valid data pointer (in which case we were asked to "insert" 
   2.140 +   // a key that was already present in the ctrie.
   2.141 +   // For now I am simply ignoring collisions.  We will update the data.
   2.142 +
   2.143 +   curr->data = data;
   2.144 +
   2.145 +   ctrie->count++;
   2.146 +
   2.147 +   return key;
   2.148 +   }
   2.149 +
   2.150 +////////////////////////////////////////////////////////////////////////////////
   2.151 +struct ctrie_node *
   2.152 +ctrie_search_children_longest_match(struct ctrie_node * node, char * k)
   2.153 +   {
   2.154 +   struct ctrie_node * p = node->child;
   2.155 +   if (NULL == p)
   2.156 +      return NULL;
   2.157 +
   2.158 +   size_t klen = strlen(k);
   2.159 +   struct ctrie_node * longest = NULL;
   2.160 +   size_t length = 0;
   2.161 +   while (NULL != p)
   2.162 +      {
   2.163 +      size_t len = strlen(p->key);
   2.164 +      while (len > 0)
   2.165 +	 {
   2.166 +	 if ( (0 == strncmp(p->key, k, MAX(len, klen))) &&
   2.167 +	      (length < MAX(len, klen)) )
   2.168 +	    {
   2.169 +	    longest = p;
   2.170 +	    length = MAX(len, klen);
   2.171 +	    break;
   2.172 +	    }
   2.173 +	 len--;
   2.174 +	 }
   2.175 +      p = p->next_sibling;
   2.176 +      }
   2.177 +
   2.178 +   return p;
   2.179 +   }
   2.180 +
   2.181 +////////////////////////////////////////////////////////////////////////////////
   2.182 +struct ctrie_node *
   2.183 +ctrie_search_children(struct ctrie_node * node, char k)
   2.184 +   {
   2.185 +   struct ctrie_node * p = node->child;
   2.186 +   if (NULL == p)
   2.187 +      return NULL;
   2.188 +
   2.189 +   while ( (NULL != p) && (p->key != tolower(k)) )
   2.190 +      p = p->next_sibling;
   2.191 +   return p;
   2.192 +   }
   2.193 +
   2.194 +////////////////////////////////////////////////////////////////////////////////
   2.195 +struct ctrie_node *
   2.196 +ctrie_find_node(struct ctrie * ctrie, char * key)
   2.197 +   {
   2.198 +   if ( (NULL == ctrie) || (NULL == key) )
   2.199 +      return NULL;
   2.200 +
   2.201 +   struct ctrie_node * node = ctrie->root;
   2.202 +   size_t len = strlen(key);
   2.203 +   for (int i = 0; i <= len; i++)
   2.204 +      {
   2.205 +      node = ctrie_search_children(node, key[i]);
   2.206 +      if (NULL == node)
   2.207 +	 return NULL;
   2.208 +      if ('\0' == key[i])
   2.209 +	 return node;
   2.210 +      }
   2.211 +
   2.212 +   // Should never get here
   2.213 +   return NULL;
   2.214 +   }
   2.215 +
   2.216 +////////////////////////////////////////////////////////////////////////////////
   2.217 +void *
   2.218 +ctrie_find(struct ctrie * ctrie, char * key)
   2.219 +   {
   2.220 +   struct ctrie_node * node = ctrie_find_node(ctrie, key);
   2.221 +   if (NULL == node)
   2.222 +      return NULL;
   2.223 +   return node->data;
   2.224 +   }
   2.225 +
   2.226 +////////////////////////////////////////////////////////////////////////////////
   2.227 +void *
   2.228 +ctrie_remove(struct ctrie * ctrie, char * key)
   2.229 +   {
   2.230 +   if ( (NULL == ctrie) || (NULL == key) )
   2.231 +      return NULL;
   2.232 +
   2.233 +   struct ctrie_node * node = ctrie_find_node(ctrie, key);
   2.234 +   if (NULL == node)
   2.235 +      return NULL;
   2.236 +
   2.237 +   void * data = node->data;
   2.238 +
   2.239 +   while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) ) 
   2.240 +      {
   2.241 +      struct ctrie_node * p = node;
   2.242 +      node = node->parent;
   2.243 +      free(p);
   2.244 +      ctrie->size--;
   2.245 +      }
   2.246 +
   2.247 +   if (node == ctrie->root)
   2.248 +      return data;
   2.249 +
   2.250 +   if (node->prev_sibling)
   2.251 +      node->prev_sibling->next_sibling = node->next_sibling;
   2.252 +   else
   2.253 +      node->parent->child = node->next_sibling;
   2.254 +
   2.255 +   if (node->next_sibling)
   2.256 +      node->next_sibling->prev_sibling = node->prev_sibling;
   2.257 +
   2.258 +   free(node);
   2.259 +
   2.260 +   ctrie->count--;
   2.261 +
   2.262 +   return data;
   2.263 +   }
   2.264 +
   2.265 +////////////////////////////////////////////////////////////////////////////////
   2.266 +void
   2.267 +ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d))
   2.268 +   {
   2.269 +   if (NULL == ctrie)
   2.270 +      return;
   2.271 +   ctrie_visit_node(ctrie, ctrie->root, op, "");
   2.272 +   }
   2.273 +
   2.274 +////////////////////////////////////////////////////////////////////////////////
   2.275 +void
   2.276 +ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key)
   2.277 +   {
   2.278 +   if ( (node->key == '\0') && (node != ctrie->root) )
   2.279 +      op(key, node->data);
   2.280 +   else
   2.281 +      {
   2.282 +      size_t len = strlen(key);
   2.283 +      char * k = malloc(len+2);
   2.284 +      if (NULL == k)
   2.285 +	 return;
   2.286 +      strcpy(k, key);
   2.287 +      k[len] = node->key;
   2.288 +      k[len+1] = '\0';
   2.289 +      struct ctrie_node * p = node->child;
   2.290 +      while (NULL != p)
   2.291 +	 {
   2.292 +	 ctrie_visit_node(ctrie, p, op, k);
   2.293 +	 p = p->next_sibling;
   2.294 +	 }
   2.295 +      free(k);
   2.296 +      }
   2.297 +   }
   2.298 +
   2.299 +////////////////////////////////////////////////////////////////////////////////
   2.300 +void ctrie_dump_raw(struct ctrie * ctrie)
   2.301 +   { 
   2.302 +   if (NULL == ctrie)
   2.303 +      return;
   2.304 +  
   2.305 +   int depth = -1;
   2.306 +   ctrie_node_dump_raw(ctrie->root, depth);
   2.307 +   }
   2.308 +
   2.309 +////////////////////////////////////////////////////////////////////////////////
   2.310 +void
   2.311 +ctrie_node_dump_raw(struct ctrie_node * node, int depth)
   2.312 +   {
   2.313 +   for (int i=0; i < depth; i++)
   2.314 +      printf(" ");
   2.315 +   ctrie_print_node(node);
   2.316 +   if (node->child)
   2.317 +      ctrie_node_dump_raw(node->child, depth+1); 
   2.318 +   if (NULL != node->next_sibling)
   2.319 +      ctrie_node_dump_raw(node->next_sibling, depth+1);
   2.320 +   }
   2.321 +
   2.322 +////////////////////////////////////////////////////////////////////////////////
   2.323 +void ctrie_dump(struct ctrie * ctrie)
   2.324 +   { 
   2.325 +   if (NULL == ctrie)
   2.326 +      return;
   2.327 +  
   2.328 +   int depth = -1;
   2.329 +   ctrie_node_dump_preorder(ctrie, ctrie->root, depth);
   2.330 +   }
   2.331 +
   2.332 +////////////////////////////////////////////////////////////////////////////////
   2.333 +void
   2.334 +ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth)
   2.335 +   {
   2.336 +   printf("%c", node->key);
   2.337 +   if ( (node != ctrie->root) && ('\0' == node->key) )
   2.338 +      printf("\n");
   2.339 +   if (node->child)
   2.340 +      ctrie_node_dump_preorder(ctrie, node->child, depth+1);
   2.341 +   if (NULL != node->next_sibling)
   2.342 +      {
   2.343 +      for (int i=0; i < depth; i++)
   2.344 +	 printf(" ");
   2.345 +      ctrie_node_dump_preorder(ctrie, node->next_sibling, depth);
   2.346 +      }
   2.347 +   }
   2.348 +
   2.349 +////////////////////////////////////////////////////////////////////////////////
   2.350 +void
   2.351 +ctrie_print_node(struct ctrie_node * node)
   2.352 +   {
   2.353 +   printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child, 
   2.354 +	  (void *) node->next_sibling, node->key, node->data);
   2.355 +   }
   2.356 +
   2.357 +////////////////////////////////////////////////////////////////////////////////
   2.358 +void
   2.359 +ctrie_subtree_list_push(char * k, void * d)
   2.360 +   {
   2.361 +   if (NULL == subkey_list)
   2.362 +      return;
   2.363 +
   2.364 +   if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
   2.365 +      list_push_front(subkey_list, k);
   2.366 +   }
   2.367 +
   2.368 +////////////////////////////////////////////////////////////////////////////////
   2.369 +// Not reentrant since it uses the package static subkeys_list variable.
   2.370 +// However, that variable is only used while building a subkey list, so you can
   2.371 +// get more than one subkey if you want.  Just don't try to get them simultaneously.
   2.372 +struct list *
   2.373 +ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit)
   2.374 +   {
   2.375 +   if ( (NULL == ctrie) || (NULL == prefix) )
   2.376 +      return NULL;
   2.377 +
   2.378 +   struct ctrie_node * node = ctrie_find_node(ctrie, prefix);
   2.379 +   if (NULL == node)
   2.380 +      return NULL;
   2.381 +
   2.382 +   subkey_list = list_new();
   2.383 +   if (NULL == subkey_list)
   2.384 +      return NULL;
   2.385 +   subkey_list_limit = limit;
   2.386 +
   2.387 +   size_t len = strlen(prefix);
   2.388 +   char * pref = malloc(len+1);
   2.389 +   if (NULL == pref)
   2.390 +      return NULL;
   2.391 +   strncpy(pref, prefix, len-1);
   2.392 +   pref[len-1] = '\0';
   2.393 +
   2.394 +   ctrie_visit_node(ctrie, node->parent, ctrie_subtree_list_push, pref);
   2.395 +   free(pref);
   2.396 +
   2.397 +   return subkey_list;
   2.398 +   }
   2.399 +
   2.400 +////////////////////////////////////////////////////////////////////////////////
   2.401 +void
   2.402 +ctrie_free_subkey_list(struct list * list)
   2.403 +   {
   2.404 +   if (NULL == list)
   2.405 +      return;
   2.406 +
   2.407 +   struct list_iterator * it = list_begin(list);
   2.408 +   void * data;
   2.409 +   while (data = list_remove(it))
   2.410 +      free(data);
   2.411 +
   2.412 +   list_delete(list);
   2.413 +   }
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/ctrie.h	Tue Oct 02 10:13:07 2012 -0500
     3.3 @@ -0,0 +1,27 @@
     3.4 +#ifndef CTRIE_H_
     3.5 +#define CTRIE_H_
     3.6 +
     3.7 +// Compact trie - internal nodes are collapsed together if they have no siblings.
     3.8 +
     3.9 +struct ctrie;
    3.10 +
    3.11 +struct ctrie * ctrie_new();
    3.12 +void ctrie_delete(struct ctrie * ctrie);
    3.13 +
    3.14 +size_t ctrie_size(struct ctrie * ctrie);
    3.15 +size_t ctrie_count(struct ctrie * ctrie);
    3.16 +
    3.17 +char * ctrie_insert(struct ctrie * ctrie, char * key, void * data);
    3.18 +void * ctrie_find(struct ctrie * ctrie, char * key);
    3.19 +void * ctrie_remove(struct ctrie * ctrie, char * key);
    3.20 +
    3.21 +void ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d) );
    3.22 +
    3.23 +void ctrie_dump(struct ctrie * ctrie);
    3.24 +void ctrie_dump_raw(struct ctrie * ctrie);
    3.25 +
    3.26 +struct list * ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit);
    3.27 +void          ctrie_free_subkey_list(struct list * list);
    3.28 +
    3.29 +
    3.30 +#endif
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/ctrie_test.c	Tue Oct 02 10:13:07 2012 -0500
     4.3 @@ -0,0 +1,128 @@
     4.4 +#include <stdio.h>
     4.5 +#include <stdlib.h>
     4.6 +#include <string.h>
     4.7 +
     4.8 +#include "list.h"
     4.9 +#include "ctrie.h"
    4.10 +
    4.11 +void print_val(char * key, void * data)
    4.12 +   {
    4.13 +   printf("%s\t\t%s\n", key, data);
    4.14 +   }
    4.15 +
    4.16 +int main(int argc, char ** argv)
    4.17 +   {
    4.18 +   struct ctrie * ctrie = ctrie_new();
    4.19 +
    4.20 +   char * words[] = { "hello", "interjection (used to express a greeting, answer a telephone, or attract attention.)",
    4.21 +		      "help", "to give or provide what is necessary to accomplish a task or satisfy a need.",
    4.22 +		      "helper", "a person or thing that helps  or gives assistance, support, etc",
    4.23 +		      "helping", "the act of a person or thing that helps.",
    4.24 +		      "an", "indefinite article. the form of a before an initial vowel sound.",
    4.25 +		      "a", "indefinite article.  Used before an initial consonant sound.",
    4.26 +		      "anne", "a female name",
    4.27 +		      "any", "one, a, an, or some; one or more without specification or identification",
    4.28 +		      "anyway", "in any case; anyhow; nonetheless; regardless",	
    4.29 +		      "anna", "a female name",
    4.30 +		      "anyhow", "in any  way whatever",
    4.31 +		      "anybody", "any person",
    4.32 +		      "Apple", "A computer manufacturer.",
    4.33 +		      "ABLE", "having necessary power, skill, resources, or qualifications",
    4.34 +		      NULL, NULL
    4.35 +   };
    4.36 +   int i = 0;
    4.37 +   while (words[i])
    4.38 +      {
    4.39 +      ctrie_insert(ctrie, words[i], words[i+1]);
    4.40 +      i += 2;
    4.41 +      }
    4.42 +
    4.43 +   ctrie_dump_raw(ctrie);
    4.44 +   ctrie_dump(ctrie);
    4.45 +   ctrie_walk_keys(ctrie, print_val);
    4.46 +   printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie));
    4.47 +
    4.48 +   ////////////////////////////////////////////
    4.49 +   printf("===================================================\n");
    4.50 +   char word[100];;
    4.51 +   strcpy(word, "any");
    4.52 +   printf("searching for %s\n", word);
    4.53 +
    4.54 +   void * data = ctrie_find(ctrie, word);
    4.55 +   if (NULL == data)
    4.56 +      printf("Failed to find 'any'\n");
    4.57 +   else
    4.58 +      printf("any: %s\n", (char *) data);
    4.59 +
    4.60 +
    4.61 +   ////////////////////////////////////////////
    4.62 +   printf("===================================================\n");
    4.63 +   strcpy(word, "an");
    4.64 +   printf("Looking for completions of '%s':\n", word);
    4.65 +   struct list * list = ctrie_get_subkeys(ctrie, word, 0);
    4.66 +   if (NULL == list)
    4.67 +      printf("Nothing found for '%s'\n", word);
    4.68 +   else
    4.69 +      {
    4.70 +      printf("list size is %d\n", list_size(list));
    4.71 +      struct list_iterator * it = list_begin(list);
    4.72 +      struct list_iterator * next = it;
    4.73 +      while (NULL != next)
    4.74 +	 {
    4.75 +	 printf("%s\n", list_value(it));
    4.76 +	 next = list_next(it);
    4.77 +	 }
    4.78 +      ctrie_free_subkey_list(list);
    4.79 +      }
    4.80 +
    4.81 +   ////////////////////////////////////////////
    4.82 +   printf("===================================================\n");
    4.83 +   strcpy(word, "a");
    4.84 +   printf("Looking for first 5 completions of '%s':\n", word);
    4.85 +   list = ctrie_get_subkeys(ctrie, word, 5);
    4.86 +   if (NULL == list)
    4.87 +      printf("Nothing found for '%s'\n", word);
    4.88 +   else
    4.89 +      {
    4.90 +      printf("list size is %d\n", list_size(list));
    4.91 +      struct list_iterator * it = list_begin(list);
    4.92 +      struct list_iterator * next = it;
    4.93 +      while (NULL != next)
    4.94 +	 {
    4.95 +	 printf("%s\n", list_value(it));
    4.96 +	 next = list_next(it);
    4.97 +	 }
    4.98 +      ctrie_free_subkey_list(list);
    4.99 +      }
   4.100 +
   4.101 +   ////////////////////////////////////////////
   4.102 +   printf("===================================================\n");
   4.103 +   strcpy(word, "anybody");
   4.104 +   printf("remove '%s'\n", word);
   4.105 +   ctrie_remove(ctrie, word);
   4.106 +
   4.107 +   strcpy(word, "ann");
   4.108 +   printf("remove '%s'\n", word);
   4.109 +   ctrie_remove(ctrie, word);
   4.110 +
   4.111 +   strcpy(word, "help");
   4.112 +   printf("remove '%s'\n", word);
   4.113 +   ctrie_remove(ctrie, word);
   4.114 +
   4.115 +   strcpy(word, "a");
   4.116 +   printf("remove '%s'\n", word);
   4.117 +   ctrie_remove(ctrie, word);
   4.118 +
   4.119 +   ctrie_dump(ctrie);
   4.120 +   printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie));
   4.121 +
   4.122 +   ////////////////////////////////////////////
   4.123 +   printf("===================================================\n");
   4.124 +   strcpy(word, "anybody");
   4.125 +   printf("re-inserting '%s'\n", word);
   4.126 +   ctrie_insert(ctrie, word, "fake definition");
   4.127 +   ctrie_dump(ctrie);
   4.128 +   printf("ctrie is storing %zd words in %zd keys\n", ctrie_count(ctrie), ctrie_size(ctrie));
   4.129 +
   4.130 +   exit(EXIT_SUCCESS);
   4.131 +   }
     5.1 --- a/src/trie.c	Mon Oct 01 15:50:30 2012 -0500
     5.2 +++ b/src/trie.c	Tue Oct 02 10:13:07 2012 -0500
     5.3 @@ -2,30 +2,39 @@
     5.4  #include <stdlib.h>
     5.5  #include <stdbool.h>
     5.6  #include <string.h>
     5.7 -
     5.8 -
     5.9  #include <stdio.h>
    5.10  
    5.11 +#include "list.h"
    5.12  
    5.13  struct trie_node
    5.14     {
    5.15     char key;
    5.16 +   struct trie_node * parent;
    5.17     struct trie_node * child;
    5.18 -   struct trie_node * sibling;
    5.19 +   struct trie_node * next_sibling;
    5.20 +   struct trie_node * prev_sibling;
    5.21     void * data;
    5.22     };
    5.23  
    5.24  struct trie
    5.25     {
    5.26     struct trie_node * root;
    5.27 +   size_t size;
    5.28 +   size_t count;
    5.29     };
    5.30  
    5.31 +
    5.32 +static struct list * subkey_list = NULL;
    5.33 +static size_t subkey_list_limit = 0;
    5.34 +
    5.35  struct trie_node * trie_search_children(struct trie_node * node, char k);
    5.36  void trie_visit_node(struct trie * trie, struct trie_node * node, void (*op)(char * k, void * d), char * key);
    5.37  void trie_print_node(struct trie_node * node);
    5.38  void trie_print_node(struct trie_node * node);
    5.39  void trie_node_dump_preorder(struct trie * trie, struct trie_node * node, int depth);
    5.40  void trie_node_dump_raw(struct trie_node * node, int depth);
    5.41 +void trie_subtree_list_push(char * k, void * d);
    5.42 +struct trie_node * trie_find_node(struct trie * trie, char * key);
    5.43  
    5.44  ////////////////////////////////////////////////////////////////////////////////
    5.45  struct trie *
    5.46 @@ -39,9 +48,10 @@
    5.47     if (NULL == trie->root)
    5.48        return NULL;
    5.49  
    5.50 -   trie->root->child = trie->root->sibling = NULL;
    5.51 +   trie->root->parent = trie->root->child = trie->root->next_sibling = NULL;
    5.52     trie->root->key = '\0';
    5.53     trie->root->data = NULL;
    5.54 +   trie->size = trie->count = 0;
    5.55  
    5.56     return trie;
    5.57     }
    5.58 @@ -57,6 +67,24 @@
    5.59     }
    5.60  
    5.61  ////////////////////////////////////////////////////////////////////////////////
    5.62 +size_t
    5.63 +trie_size(struct trie * trie)
    5.64 +   {
    5.65 +   if (NULL == trie)
    5.66 +      return 0;
    5.67 +   return trie->size;
    5.68 +   }
    5.69 +
    5.70 +////////////////////////////////////////////////////////////////////////////////
    5.71 +size_t
    5.72 +trie_count(struct trie * trie)
    5.73 +   {
    5.74 +   if (NULL == trie)
    5.75 +      return 0;
    5.76 +   return trie->count;
    5.77 +   }
    5.78 +
    5.79 +////////////////////////////////////////////////////////////////////////////////
    5.80  char *
    5.81  trie_insert(struct trie * trie, char * key, void * data)
    5.82     {
    5.83 @@ -77,7 +105,7 @@
    5.84  	 if (NULL == new)
    5.85  	    return NULL;
    5.86  	 new->key = tolower(key[i]);
    5.87 -	 new->child = new->sibling = NULL;
    5.88 +	 new->child = new->next_sibling = new->prev_sibling = NULL;
    5.89  	 new->data = NULL;
    5.90  
    5.91  	 if (NULL == curr->child)
    5.92 @@ -85,12 +113,15 @@
    5.93  	 else
    5.94  	    {
    5.95  	    struct trie_node * p = curr->child;
    5.96 -	    while (p->sibling)
    5.97 -	       p = p->sibling;
    5.98 -	    p->sibling = new;
    5.99 +	    while (p->next_sibling)
   5.100 +	       p = p->next_sibling;
   5.101 +	    new->prev_sibling = p;
   5.102 +	    p->next_sibling = new;
   5.103  	    }
   5.104  
   5.105 +	 new->parent = curr;
   5.106  	 curr = new;
   5.107 +	 trie->size++;
   5.108  	 }
   5.109        else
   5.110  	 curr = result;
   5.111 @@ -102,6 +133,9 @@
   5.112     // For now I am simply ignoring collisions.  We will update the data.
   5.113  
   5.114     curr->data = data;
   5.115 +
   5.116 +   trie->count++;
   5.117 +
   5.118     return key;
   5.119     }
   5.120  
   5.121 @@ -114,13 +148,13 @@
   5.122        return NULL;
   5.123  
   5.124     while ( (NULL != p) && (p->key != tolower(k)) )
   5.125 -      p = p->sibling;
   5.126 +      p = p->next_sibling;
   5.127     return p;
   5.128     }
   5.129  
   5.130  ////////////////////////////////////////////////////////////////////////////////
   5.131 -void *
   5.132 -trie_find(struct trie * trie, char * key)
   5.133 +struct trie_node *
   5.134 +trie_find_node(struct trie * trie, char * key)
   5.135     {
   5.136     if ( (NULL == trie) || (NULL == key) )
   5.137        return NULL;
   5.138 @@ -133,7 +167,7 @@
   5.139        if (NULL == node)
   5.140  	 return NULL;
   5.141        if ('\0' == key[i])
   5.142 -	 return node->data;
   5.143 +	 return node;
   5.144        }
   5.145  
   5.146     // Should never get here
   5.147 @@ -142,9 +176,51 @@
   5.148  
   5.149  ////////////////////////////////////////////////////////////////////////////////
   5.150  void *
   5.151 +trie_find(struct trie * trie, char * key)
   5.152 +   {
   5.153 +   struct trie_node * node = trie_find_node(trie, key);
   5.154 +   if (NULL == node)
   5.155 +      return NULL;
   5.156 +   return node->data;
   5.157 +   }
   5.158 +
   5.159 +////////////////////////////////////////////////////////////////////////////////
   5.160 +void *
   5.161  trie_remove(struct trie * trie, char * key)
   5.162     {
   5.163 -   return NULL;
   5.164 +   if ( (NULL == trie) || (NULL == key) )
   5.165 +      return NULL;
   5.166 +
   5.167 +   struct trie_node * node = trie_find_node(trie, key);
   5.168 +   if (NULL == node)
   5.169 +      return NULL;
   5.170 +
   5.171 +   void * data = node->data;
   5.172 +
   5.173 +   while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) ) 
   5.174 +      {
   5.175 +      struct trie_node * p = node;
   5.176 +      node = node->parent;
   5.177 +      free(p);
   5.178 +      trie->size--;
   5.179 +      }
   5.180 +
   5.181 +   if (node == trie->root)
   5.182 +      return data;
   5.183 +
   5.184 +   if (node->prev_sibling)
   5.185 +      node->prev_sibling->next_sibling = node->next_sibling;
   5.186 +   else
   5.187 +      node->parent->child = node->next_sibling;
   5.188 +
   5.189 +   if (node->next_sibling)
   5.190 +      node->next_sibling->prev_sibling = node->prev_sibling;
   5.191 +
   5.192 +   free(node);
   5.193 +
   5.194 +   trie->count--;
   5.195 +
   5.196 +   return data;
   5.197     }
   5.198  
   5.199  ////////////////////////////////////////////////////////////////////////////////
   5.200 @@ -175,8 +251,9 @@
   5.201        while (NULL != p)
   5.202  	 {
   5.203  	 trie_visit_node(trie, p, op, k);
   5.204 -	 p = p->sibling;
   5.205 +	 p = p->next_sibling;
   5.206  	 }
   5.207 +      free(k);
   5.208        }
   5.209     }
   5.210  
   5.211 @@ -199,8 +276,8 @@
   5.212     trie_print_node(node);
   5.213     if (node->child)
   5.214        trie_node_dump_raw(node->child, depth+1); 
   5.215 -   if (NULL != node->sibling)
   5.216 -      trie_node_dump_raw(node->sibling, depth+1);
   5.217 +   if (NULL != node->next_sibling)
   5.218 +      trie_node_dump_raw(node->next_sibling, depth+1);
   5.219     }
   5.220  
   5.221  ////////////////////////////////////////////////////////////////////////////////
   5.222 @@ -222,11 +299,11 @@
   5.223        printf("\n");
   5.224     if (node->child)
   5.225        trie_node_dump_preorder(trie, node->child, depth+1);
   5.226 -   if (NULL != node->sibling)
   5.227 +   if (NULL != node->next_sibling)
   5.228        {
   5.229        for (int i=0; i < depth; i++)
   5.230  	 printf(" ");
   5.231 -      trie_node_dump_preorder(trie, node->sibling, depth);
   5.232 +      trie_node_dump_preorder(trie, node->next_sibling, depth);
   5.233        }
   5.234     }
   5.235  
   5.236 @@ -234,5 +311,64 @@
   5.237  void
   5.238  trie_print_node(struct trie_node * node)
   5.239     {
   5.240 -   printf("%p\tchild %p\tsibling %p\tkey %c\tdata %p\n", node, node->child, node->sibling, node->key, node->data);
   5.241 +   printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child, 
   5.242 +	  (void *) node->next_sibling, node->key, node->data);
   5.243     }
   5.244 +
   5.245 +////////////////////////////////////////////////////////////////////////////////
   5.246 +void
   5.247 +trie_subtree_list_push(char * k, void * d)
   5.248 +   {
   5.249 +   if (NULL == subkey_list)
   5.250 +      return;
   5.251 +
   5.252 +   if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
   5.253 +      list_push_front(subkey_list, k);
   5.254 +   }
   5.255 +
   5.256 +////////////////////////////////////////////////////////////////////////////////
   5.257 +// Not reentrant since it uses the package static subkeys_list variable.
   5.258 +// However, that variable is only used while building a subkey list, so you can
   5.259 +// get more than one subkey if you want.  Just don't try to get them simultaneously.
   5.260 +struct list *
   5.261 +trie_get_subkeys(struct trie * trie, char * prefix, size_t limit)
   5.262 +   {
   5.263 +   if ( (NULL == trie) || (NULL == prefix) )
   5.264 +      return NULL;
   5.265 +
   5.266 +   struct trie_node * node = trie_find_node(trie, prefix);
   5.267 +   if (NULL == node)
   5.268 +      return NULL;
   5.269 +
   5.270 +   subkey_list = list_new();
   5.271 +   if (NULL == subkey_list)
   5.272 +      return NULL;
   5.273 +   subkey_list_limit = limit;
   5.274 +
   5.275 +   size_t len = strlen(prefix);
   5.276 +   char * pref = malloc(len+1);
   5.277 +   if (NULL == pref)
   5.278 +      return NULL;
   5.279 +   strncpy(pref, prefix, len-1);
   5.280 +   pref[len-1] = '\0';
   5.281 +
   5.282 +   trie_visit_node(trie, node->parent, trie_subtree_list_push, pref);
   5.283 +   free(pref);
   5.284 +
   5.285 +   return subkey_list;
   5.286 +   }
   5.287 +
   5.288 +////////////////////////////////////////////////////////////////////////////////
   5.289 +void
   5.290 +trie_free_subkey_list(struct list * list)
   5.291 +   {
   5.292 +   if (NULL == list)
   5.293 +      return;
   5.294 +
   5.295 +   struct list_iterator * it = list_begin(list);
   5.296 +   void * data;
   5.297 +   while (data = list_remove(it))
   5.298 +      free(data);
   5.299 +
   5.300 +   list_delete(list);
   5.301 +   }
     6.1 --- a/src/trie.h	Mon Oct 01 15:50:30 2012 -0500
     6.2 +++ b/src/trie.h	Tue Oct 02 10:13:07 2012 -0500
     6.3 @@ -6,6 +6,9 @@
     6.4  struct trie * trie_new();
     6.5  void trie_delete(struct trie * trie);
     6.6  
     6.7 +size_t trie_size(struct trie * trie);
     6.8 +size_t trie_count(struct trie * trie);
     6.9 +
    6.10  char * trie_insert(struct trie * trie, char * key, void * data);
    6.11  void * trie_find(struct trie * trie, char * key);
    6.12  void * trie_remove(struct trie * trie, char * key);
    6.13 @@ -15,4 +18,8 @@
    6.14  void trie_dump(struct trie * trie);
    6.15  void trie_dump_raw(struct trie * trie);
    6.16  
    6.17 +struct list * trie_get_subkeys(struct trie * trie, char * prefix, size_t limit);
    6.18 +void          trie_free_subkey_list(struct list * list);
    6.19 +
    6.20 +
    6.21  #endif
     7.1 --- a/src/trie_test.c	Mon Oct 01 15:50:30 2012 -0500
     7.2 +++ b/src/trie_test.c	Tue Oct 02 10:13:07 2012 -0500
     7.3 @@ -1,11 +1,13 @@
     7.4  #include <stdio.h>
     7.5  #include <stdlib.h>
     7.6 +#include <string.h>
     7.7  
     7.8 +#include "list.h"
     7.9  #include "trie.h"
    7.10  
    7.11  void print_val(char * key, void * data)
    7.12     {
    7.13 -   printf("%s\n", key);
    7.14 +   printf("%s\t\t%s\n", key, data);
    7.15     }
    7.16  
    7.17  int main(int argc, char ** argv)
    7.18 @@ -16,12 +18,12 @@
    7.19  		      "help", "to give or provide what is necessary to accomplish a task or satisfy a need.",
    7.20  		      "helper", "a person or thing that helps  or gives assistance, support, etc",
    7.21  		      "helping", "the act of a person or thing that helps.",
    7.22 +		      "an", "indefinite article. the form of a before an initial vowel sound.",
    7.23  		      "a", "indefinite article.  Used before an initial consonant sound.",
    7.24 -		      "an", "indefinite article. the form of a before an initial vowel sound.",
    7.25 -		      "anna", "a female name",
    7.26  		      "anne", "a female name",
    7.27  		      "any", "one, a, an, or some; one or more without specification or identification",
    7.28 -		      "anyway", "in any case; anyhow; nonetheless; regardless",
    7.29 +		      "anyway", "in any case; anyhow; nonetheless; regardless",	
    7.30 +		      "anna", "a female name",
    7.31  		      "anyhow", "in any  way whatever",
    7.32  		      "anybody", "any person",
    7.33  		      "Apple", "A computer manufacturer.",
    7.34 @@ -35,13 +37,92 @@
    7.35        i += 2;
    7.36        }
    7.37  
    7.38 +   trie_dump_raw(trie);
    7.39     trie_dump(trie);
    7.40 +   trie_walk_keys(trie, print_val);
    7.41 +   printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie));
    7.42  
    7.43 -   void * data = trie_find(trie, "any");
    7.44 +   ////////////////////////////////////////////
    7.45 +   printf("===================================================\n");
    7.46 +   char word[100];;
    7.47 +   strcpy(word, "any");
    7.48 +   printf("searching for %s\n", word);
    7.49 +
    7.50 +   void * data = trie_find(trie, word);
    7.51     if (NULL == data)
    7.52        printf("Failed to find 'any'\n");
    7.53     else
    7.54        printf("any: %s\n", (char *) data);
    7.55  
    7.56 +
    7.57 +   ////////////////////////////////////////////
    7.58 +   printf("===================================================\n");
    7.59 +   strcpy(word, "an");
    7.60 +   printf("Looking for completions of '%s':\n", word);
    7.61 +   struct list * list = trie_get_subkeys(trie, word, 0);
    7.62 +   if (NULL == list)
    7.63 +      printf("Nothing found for '%s'\n", word);
    7.64 +   else
    7.65 +      {
    7.66 +      printf("list size is %d\n", list_size(list));
    7.67 +      struct list_iterator * it = list_begin(list);
    7.68 +      struct list_iterator * next = it;
    7.69 +      while (NULL != next)
    7.70 +	 {
    7.71 +	 printf("%s\n", list_value(it));
    7.72 +	 next = list_next(it);
    7.73 +	 }
    7.74 +      trie_free_subkey_list(list);
    7.75 +      }
    7.76 +
    7.77 +   ////////////////////////////////////////////
    7.78 +   printf("===================================================\n");
    7.79 +   strcpy(word, "a");
    7.80 +   printf("Looking for first 5 completions of '%s':\n", word);
    7.81 +   list = trie_get_subkeys(trie, word, 5);
    7.82 +   if (NULL == list)
    7.83 +      printf("Nothing found for '%s'\n", word);
    7.84 +   else
    7.85 +      {
    7.86 +      printf("list size is %d\n", list_size(list));
    7.87 +      struct list_iterator * it = list_begin(list);
    7.88 +      struct list_iterator * next = it;
    7.89 +      while (NULL != next)
    7.90 +	 {
    7.91 +	 printf("%s\n", list_value(it));
    7.92 +	 next = list_next(it);
    7.93 +	 }
    7.94 +      trie_free_subkey_list(list);
    7.95 +      }
    7.96 +
    7.97 +   ////////////////////////////////////////////
    7.98 +   printf("===================================================\n");
    7.99 +   strcpy(word, "anybody");
   7.100 +   printf("remove '%s'\n", word);
   7.101 +   trie_remove(trie, word);
   7.102 +
   7.103 +   strcpy(word, "ann");
   7.104 +   printf("remove '%s'\n", word);
   7.105 +   trie_remove(trie, word);
   7.106 +
   7.107 +   strcpy(word, "help");
   7.108 +   printf("remove '%s'\n", word);
   7.109 +   trie_remove(trie, word);
   7.110 +
   7.111 +   strcpy(word, "a");
   7.112 +   printf("remove '%s'\n", word);
   7.113 +   trie_remove(trie, word);
   7.114 +
   7.115 +   trie_dump(trie);
   7.116 +   printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie));
   7.117 +
   7.118 +   ////////////////////////////////////////////
   7.119 +   printf("===================================================\n");
   7.120 +   strcpy(word, "anybody");
   7.121 +   printf("re-inserting '%s'\n", word);
   7.122 +   trie_insert(trie, word, "fake definition");
   7.123 +   trie_dump(trie);
   7.124 +   printf("trie is storing %zd words in %zd keys\n", trie_count(trie), trie_size(trie));
   7.125 +
   7.126     exit(EXIT_SUCCESS);
   7.127     }