view src/ctrie.c @ 14:ef73b96fdcae

Many ctrie updates
author Eris Caffee <discordia@eldalin.com>
date Tue, 02 Oct 2012 20:00:38 -0500
parents 7886d2da8cc4
children d11dfc49b559
line source
1 #include <ctype.h>
2 #include <stdlib.h>
3 #include <stdbool.h>
4 #include <string.h>
5 #include <stdio.h>
7 #include "list.h"
9 struct ctrie_node
10 {
11 char * key;
12 struct ctrie_node * parent;
13 struct ctrie_node * child;
14 struct ctrie_node * next_sibling;
15 struct ctrie_node * prev_sibling;
16 void * data;
17 };
19 struct ctrie
20 {
21 struct ctrie_node * root;
22 size_t size;
23 size_t count;
24 };
27 static struct list * subkey_list = NULL;
28 static size_t subkey_list_limit = 0;
30 char * ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data);
31 struct ctrie_node * ctrie_longest_matching_child(struct ctrie_node * node, char * key);
32 struct ctrie_node * ctrie_node_new(char * key, void * data);
33 int ctrie_str_common(char * s1, char * s2);
35 void ctrie_print_node(struct ctrie_node * node);
36 void ctrie_print_key (struct ctrie_node * node);
37 void ctrie_node_dump (struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *));
39 void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key);
41 struct ctrie_node * ctrie_find_node (struct ctrie_node * node, char * key);
42 struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char * k);
45 void ctrie_subtree_list_push(char * k, void * d);
47 ////////////////////////////////////////////////////////////////////////////////
48 struct ctrie *
49 ctrie_new()
50 {
51 struct ctrie * ctrie = malloc(sizeof(struct ctrie));
52 if (NULL == ctrie)
53 return NULL;
55 ctrie->root = malloc(sizeof(struct ctrie_node));
56 if (NULL == ctrie->root)
57 return NULL;
59 ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL;
60 ctrie->root->key = "";
61 ctrie->root->data = NULL;
62 ctrie->size = ctrie->count = 0;
64 printf("The root node is %p\n", ctrie->root);
65 return ctrie;
66 }
68 ////////////////////////////////////////////////////////////////////////////////
69 void
70 ctrie_delete(struct ctrie * ctrie)
71 {
72 if (NULL == ctrie)
73 return;
75 free(ctrie);
76 }
78 ////////////////////////////////////////////////////////////////////////////////
79 struct ctrie_node *
80 ctrie_node_new(char * key, void * data)
81 {
82 if (NULL == key)
83 return NULL;
85 struct ctrie_node * new = malloc(sizeof(struct ctrie_node));
86 if (NULL == new)
87 return NULL;
89 new->key = malloc(strlen(key));
90 if (NULL == new->key)
91 {
92 free(new);
93 return NULL;
94 }
95 strcpy(new->key, key);
97 new->data = data;
98 new->parent = new->child = new->prev_sibling = new->next_sibling = NULL;
100 return new;
101 }
103 ////////////////////////////////////////////////////////////////////////////////
104 void *
105 ctrie_node_free(struct ctrie_node * node)
106 {
107 if (NULL == node)
108 return NULL;
109 void * data = node->data;
110 free(node->key);
111 free(node);
112 return data;
113 }
115 ////////////////////////////////////////////////////////////////////////////////
116 size_t
117 ctrie_size(struct ctrie * ctrie)
118 {
119 if (NULL == ctrie)
120 return 0;
121 return ctrie->size;
122 }
124 ////////////////////////////////////////////////////////////////////////////////
125 size_t
126 ctrie_count(struct ctrie * ctrie)
127 {
128 if (NULL == ctrie)
129 return 0;
130 return ctrie->count;
131 }
133 ////////////////////////////////////////////////////////////////////////////////
134 char *
135 ctrie_insert(struct ctrie * ctrie, char * key, void * data)
136 {
137 if ( (NULL == ctrie) || (NULL == key) )
138 return NULL;
140 printf("root node is ");
141 ctrie_print_node(ctrie->root);
142 return ctrie_insert_node(ctrie, ctrie->root, key, data);
143 }
145 ////////////////////////////////////////////////////////////////////////////////
146 char *
147 ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data)
148 {
149 if ( (NULL == node) || (NULL == key) )
150 return NULL;
152 struct ctrie_node * p = ctrie_longest_matching_child(node, key);
154 /*
155 printf("Longest match is ");
156 if (NULL != p)
157 ctrie_print_node(p);
158 else
159 printf("(nil)\n");
160 */
162 if (NULL == p)
163 {
164 // No matching child found. Create new(key, data) as child of node
165 struct ctrie_node * new = ctrie_node_new(key, data);
166 if (NULL == new)
167 return NULL;
168 new->parent = node;
170 if (NULL == node->child)
171 node->child = new;
172 else
173 {
174 struct ctrie_node * s = node->child;
175 while (NULL != s->next_sibling)
176 s = s->next_sibling;
177 s->next_sibling = new;
178 new->prev_sibling = s;
179 }
181 ctrie->size++;
182 ctrie->count++;
184 /*
185 printf("Added ");
186 ctrie_print_node(new);
187 */
189 return key;
190 }
191 else if (ctrie_str_common(p->key, key) == strlen(p->key))
192 {
193 // Found a node whose key matches for it's entire length
194 if (0 == strcmp(p->key, key))
195 {
196 // Key already exists. Error
197 return NULL;
198 }
199 else
200 {
201 // The node found has a key that is a prefix of the key we are inserting.
202 // Recurse with the remainder of the key
203 // printf("Recursing because node's entire key %s is a prefix of %s\n", p->key, key);
204 return ctrie_insert_node(ctrie, p, key+strlen(p->key), data);
205 }
206 }
207 else
208 {
209 // Found a node that matches for only part of it's length.
210 // Split the node and then recurse.
212 /*
213 printf("splitting this node: ");
214 ctrie_print_node(p);
215 */
217 // Get the new shorter key for p.
218 int matchlen = ctrie_str_common(p->key, key);
219 char * prefix = malloc(matchlen+1);
220 if (NULL == prefix)
221 return NULL;
223 strncpy(prefix, key, matchlen);
224 prefix[matchlen] = '\0';
226 // Create a new node to hold the remainder of the old key and take over the existing children.
227 struct ctrie_node * new = ctrie_node_new((p->key)+matchlen, p->data);
228 if (NULL == new)
229 {
230 free(prefix);
231 return NULL;
232 }
233 new->parent = p;
234 new->child = p->child;
235 new->data = p->data;
237 // Update p to point to it's new child and have it's new (shorter) key
238 free(p->key);
239 p->key = prefix;
240 p->child = new;
241 p->data = NULL;
243 ctrie->size++;
245 /*
246 printf("Node was split into these\n");
247 ctrie_print_node(p);
248 ctrie_print_node(new);
249 */
251 // Now add the user requested new key as a child of p
252 // printf("Recursing with key %s as a prefix of %s\n", p->key, key);
253 return ctrie_insert_node(ctrie, p, key + matchlen, data);
254 }
256 // should not get here
257 return NULL;
258 }
260 ////////////////////////////////////////////////////////////////////////////////
261 struct ctrie_node *
262 ctrie_longest_matching_child(struct ctrie_node * node, char * key)
263 {
264 struct ctrie_node * longest = NULL;
265 int len = 0;
266 int l;
268 printf("Searching for match in children of %p\n", node);
270 struct ctrie_node * p = node->child;
271 while (NULL != p)
272 {
273 l = ctrie_str_common(p->key, key);
274 printf("common length is %d\n", l);
275 if (l > len)
276 {
277 len = l;
278 longest = p;
279 }
280 p = p->next_sibling;
281 }
282 return longest;
283 }
285 ////////////////////////////////////////////////////////////////////////////////
286 // Returns the number of characters of s1 (starting at the beginning)
287 // that match the corresponding characters in s2.
288 //
289 // eg: ctrie_str_common("asdf", "asdf") returns 4
290 // ctrie_str_common("asdf", "asdfer") returns 4
291 // ctrie_str_common("asdf", "asgg") returns 2
292 // ctrie_str_common("asdf", "gggg") returns 0
294 int
295 ctrie_str_common(char * s1, char * s2)
296 {
297 int i = 0;
298 while ( (*s1 != '\0') && (*s2 != '\0') &&
299 (*(s1+i) == *(s2+i)) )
300 i++;
302 return i;
303 }
305 ////////////////////////////////////////////////////////////////////////////////
306 void
307 ctrie_print_node(struct ctrie_node * node)
308 {
309 if (NULL == node)
310 printf("(nil node)\n");
311 else
312 printf("%s \t%p\t parent:%p \tchild:%p \tprev:%p \tnext:%p \tdata:%p\n", node->key, node, node->parent, node->child, node->prev_sibling, node->next_sibling, node->data);
314 }
316 ////////////////////////////////////////////////////////////////////////////////
317 void
318 ctrie_print_key(struct ctrie_node * node)
319 {
320 if (NULL == node)
321 printf("(nil node)\n");
322 else if (NULL == node->key)
323 printf("(nil key)\n");
324 else
325 printf("%s\n", node->key);
326 }
328 ////////////////////////////////////////////////////////////////////////////////
329 void ctrie_dump(struct ctrie * ctrie)
330 {
331 if (NULL == ctrie)
332 return;
334 int depth = 0;
335 ctrie_node_dump(ctrie->root, depth, ctrie_print_key);
336 }
338 ////////////////////////////////////////////////////////////////////////////////
339 void ctrie_dump_raw(struct ctrie * ctrie)
340 {
341 if (NULL == ctrie)
342 return;
344 int depth = 0;
345 ctrie_node_dump(ctrie->root, depth, ctrie_print_node);
346 }
348 ////////////////////////////////////////////////////////////////////////////////
349 void
350 ctrie_node_dump(struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *))
351 {
352 for (int i=0; i < depth; i++)
353 printf(" ");
354 op(node);
355 if (node->child)
356 {
357 int len = 0;
358 if (NULL != node->key)
359 len = strlen(node->key);
360 ctrie_node_dump(node->child, depth+len, op);
361 }
362 if (NULL != node->next_sibling)
363 ctrie_node_dump(node->next_sibling, depth, op);
364 }
366 ////////////////////////////////////////////////////////////////////////////////
367 void
368 ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d))
369 {
370 if (NULL == ctrie)
371 return;
372 ctrie_visit_node(ctrie, ctrie->root, op, "");
373 }
375 ////////////////////////////////////////////////////////////////////////////////
376 void
377 ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * prefix)
378 {
379 if (NULL != node->data)
380 {
381 char * k = malloc(strlen(prefix) + strlen(node->key));
382 if (NULL == k)
383 return;
384 k[0] = '\0';
385 strcat(k, prefix);
386 strcat(k, node->key);
387 op(k, node->data);
388 free(k);
389 }
391 if(NULL != node->child)
392 {
393 char * k = malloc(strlen(prefix) + strlen(node->key));
394 if (NULL == k)
395 return;
396 k[0] = '\0';
397 strcat(k, prefix);
398 strcat(k, node->key);
399 struct ctrie_node * p = node->child;
400 while (NULL != p)
401 {
402 ctrie_visit_node(ctrie, p, op, k);
403 p = p->next_sibling;
404 }
405 free(k);
406 }
407 }
409 ////////////////////////////////////////////////////////////////////////////////
410 void *
411 ctrie_find(struct ctrie * ctrie, char * key)
412 {
413 struct ctrie_node * node = ctrie_find_node(ctrie->root, key);
414 if (NULL == node)
415 return NULL;
416 return node->data;
417 }
419 ////////////////////////////////////////////////////////////////////////////////
420 struct ctrie_node *
421 ctrie_find_node(struct ctrie_node * node, char * key)
422 {
423 if ( (NULL == node) || (NULL == key) )
424 return NULL;
426 size_t len = strlen(key);
427 for (int i = 0; i <= len; i += strlen(node->key))
428 {
429 node = ctrie_search_children(node, key+i);
430 if (NULL == node)
431 return NULL;
432 if (NULL != node->data)
433 return node;
434 }
436 // Should never get here
437 return NULL;
439 struct ctrie_node * p = ctrie_longest_matching_child(node, key);
440 }
442 ////////////////////////////////////////////////////////////////////////////////
443 struct ctrie_node *
444 ctrie_search_children(struct ctrie_node * node, char * k)
445 {
446 struct ctrie_node * p = node->child;
447 if (NULL == p)
448 return NULL;
450 size_t len = strlen(node->key);
451 while ( (NULL != p) && (0 != strncmp(node->key, k, len)) )
452 p = p->next_sibling;
453 return p;
454 }
475 ////////////////////////////////////////////////////////////////////////////////
476 void *
477 ctrie_remove(struct ctrie * ctrie, char * key)
478 {
479 if ( (NULL == ctrie) || (NULL == key) )
480 return NULL;
482 struct ctrie_node * node = ctrie_find_node(ctrie, key);
483 if (NULL == node)
484 return NULL;
486 void * data = node->data;
488 while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) )
489 {
490 struct ctrie_node * p = node;
491 node = node->parent;
492 free(p);
493 ctrie->size--;
494 }
496 if (node == ctrie->root)
497 return data;
499 if (node->prev_sibling)
500 node->prev_sibling->next_sibling = node->next_sibling;
501 else
502 node->parent->child = node->next_sibling;
504 if (node->next_sibling)
505 node->next_sibling->prev_sibling = node->prev_sibling;
507 free(node);
509 ctrie->count--;
511 return data;
512 }
514 ////////////////////////////////////////////////////////////////////////////////
515 void
516 ctrie_subtree_list_push(char * k, void * d)
517 {
518 if (NULL == subkey_list)
519 return;
521 if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
522 list_push_front(subkey_list, k);
523 }
525 ////////////////////////////////////////////////////////////////////////////////
526 // Not reentrant since it uses the package static subkeys_list variable.
527 // However, that variable is only used while building a subkey list, so you can
528 // get more than one subkey if you want. Just don't try to get them simultaneously.
529 struct list *
530 ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit)
531 {
532 if ( (NULL == ctrie) || (NULL == prefix) )
533 return NULL;
535 struct ctrie_node * node = ctrie_find_node(ctrie, prefix);
536 if (NULL == node)
537 return NULL;
539 subkey_list = list_new();
540 if (NULL == subkey_list)
541 return NULL;
542 subkey_list_limit = limit;
544 size_t len = strlen(prefix);
545 char * pref = malloc(len+1);
546 if (NULL == pref)
547 return NULL;
548 strncpy(pref, prefix, len-1);
549 pref[len-1] = '\0';
551 ctrie_visit_node(ctrie, node->parent, ctrie_subtree_list_push, pref);
552 free(pref);
554 return subkey_list;
555 }
557 ////////////////////////////////////////////////////////////////////////////////
558 void
559 ctrie_free_subkey_list(struct list * list)
560 {
561 if (NULL == list)
562 return;
564 struct list_iterator * it = list_begin(list);
565 void * data;
566 while (data = list_remove(it))
567 free(data);
569 list_delete(list);
570 }