view src/ctrie.c @ 15:d11dfc49b559

Seems to be working, but I need to prove the algorithms and also make ctrie_remove compress down, too. See the final output where the standalone 'n' node could be pushed into it's children
author Eris Caffee <discordia@eldalin.com>
date Wed, 03 Oct 2012 01:07:04 -0500
parents ef73b96fdcae
children
line source
1 #include <ctype.h>
2 #include <stdlib.h>
3 #include <stdbool.h>
4 #include <string.h>
5 #include <stdio.h>
7 #include "list.h"
9 struct ctrie_node
10 {
11 char * key;
12 struct ctrie_node * parent;
13 struct ctrie_node * child;
14 struct ctrie_node * next_sibling;
15 struct ctrie_node * prev_sibling;
16 void * data;
17 };
19 struct ctrie
20 {
21 struct ctrie_node * root;
22 size_t size;
23 size_t count;
24 };
27 static struct list * subkey_list = NULL;
28 static size_t subkey_list_limit = 0;
30 char * ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data);
31 struct ctrie_node * ctrie_longest_matching_child(struct ctrie_node * node, char * key);
32 struct ctrie_node * ctrie_node_new(char * key, void * data);
33 int ctrie_str_common(char * s1, char * s2);
35 void ctrie_print_node(struct ctrie_node * node);
36 void ctrie_print_key (struct ctrie_node * node);
37 void ctrie_node_dump (struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *));
39 void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key);
41 struct ctrie_node * ctrie_find_node (struct ctrie_node * node, char * key);
42 struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char * k);
45 void ctrie_subtree_list_push(char * k, void * d);
47 ////////////////////////////////////////////////////////////////////////////////
48 struct ctrie *
49 ctrie_new()
50 {
51 struct ctrie * ctrie = malloc(sizeof(struct ctrie));
52 if (NULL == ctrie)
53 return NULL;
55 ctrie->root = malloc(sizeof(struct ctrie_node));
56 if (NULL == ctrie->root)
57 return NULL;
59 ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL;
60 ctrie->root->key = "";
61 ctrie->root->data = NULL;
62 ctrie->size = ctrie->count = 0;
64 // printf("The root node is %p\n", (void *) ctrie->root);
65 return ctrie;
66 }
68 ////////////////////////////////////////////////////////////////////////////////
69 void
70 ctrie_delete(struct ctrie * ctrie)
71 {
72 if (NULL == ctrie)
73 return;
75 free(ctrie);
76 }
78 ////////////////////////////////////////////////////////////////////////////////
79 struct ctrie_node *
80 ctrie_node_new(char * key, void * data)
81 {
82 if (NULL == key)
83 return NULL;
85 struct ctrie_node * new = malloc(sizeof(struct ctrie_node));
86 if (NULL == new)
87 return NULL;
89 new->key = malloc(strlen(key));
90 if (NULL == new->key)
91 {
92 free(new);
93 return NULL;
94 }
95 strcpy(new->key, key);
97 new->data = data;
98 new->parent = new->child = new->prev_sibling = new->next_sibling = NULL;
100 return new;
101 }
103 ////////////////////////////////////////////////////////////////////////////////
104 void *
105 ctrie_node_free(struct ctrie_node * node)
106 {
107 if (NULL == node)
108 return NULL;
109 void * data = node->data;
110 free(node->key);
111 free(node);
112 return data;
113 }
115 ////////////////////////////////////////////////////////////////////////////////
116 size_t
117 ctrie_size(struct ctrie * ctrie)
118 {
119 if (NULL == ctrie)
120 return 0;
121 return ctrie->size;
122 }
124 ////////////////////////////////////////////////////////////////////////////////
125 size_t
126 ctrie_count(struct ctrie * ctrie)
127 {
128 if (NULL == ctrie)
129 return 0;
130 return ctrie->count;
131 }
133 ////////////////////////////////////////////////////////////////////////////////
134 char *
135 ctrie_insert(struct ctrie * ctrie, char * key, void * data)
136 {
137 if ( (NULL == ctrie) || (NULL == key) )
138 return NULL;
140 // printf("root node is "); ctrie_print_node(ctrie->root);
141 return ctrie_insert_node(ctrie, ctrie->root, key, data);
142 }
144 ////////////////////////////////////////////////////////////////////////////////
145 char *
146 ctrie_insert_node(struct ctrie * ctrie, struct ctrie_node * node, char * key, void * data)
147 {
148 if ( (NULL == node) || (NULL == key) )
149 return NULL;
151 struct ctrie_node * p = ctrie_longest_matching_child(node, key);
154 // printf("Longest match is ");
155 if (NULL != p)
156 ctrie_print_node(p);
157 else
158 printf("(nil)\n");
161 if (NULL == p)
162 {
163 // No matching child found. Create new(key, data) as child of node
164 struct ctrie_node * new = ctrie_node_new(key, data);
165 if (NULL == new)
166 return NULL;
167 new->parent = node;
169 if (NULL == node->child)
170 node->child = new;
171 else
172 {
173 struct ctrie_node * s = node->child;
174 while (NULL != s->next_sibling)
175 s = s->next_sibling;
176 s->next_sibling = new;
177 new->prev_sibling = s;
178 }
180 ctrie->size++;
181 ctrie->count++;
184 // printf("Added "); ctrie_print_node(new);
187 return key;
188 }
189 else if (ctrie_str_common(p->key, key) == strlen(p->key))
190 {
191 // Found a node whose key matches for it's entire length
192 // Does it match the entire rest of the key we are inserting?
193 if (ctrie_str_common(p->key, key) == strlen(key))
194 {
195 if (p->data)
196 {
197 // printf("This node already exists\n");
198 // Key already exists. Error
199 return NULL;
200 }
201 else
202 {
203 p->data = data;
204 ctrie->count++;
205 return key;
206 }
207 }
208 else
209 {
210 // The node found has a key that is a prefix of the key we are inserting.
211 // Recurse with the remainder of the key
212 // printf("Recursing because node's entire key %s is a prefix of %s\n", p->key, key);
213 return ctrie_insert_node(ctrie, p, key+strlen(p->key), data);
214 }
215 }
216 else
217 {
218 // Found a node that matches for only part of it's length.
219 // Split the node and then recurse.
221 // printf("splitting this node: "); ctrie_print_node(p);
223 // Get the new shorter key for p.
224 int matchlen = ctrie_str_common(p->key, key);
225 char * prefix = malloc(matchlen+1);
226 if (NULL == prefix)
227 return NULL;
229 strncpy(prefix, key, matchlen);
230 prefix[matchlen] = '\0';
232 // Create a new node to hold the remainder of the old key and take over the existing children.
233 struct ctrie_node * new = ctrie_node_new((p->key)+matchlen, p->data);
234 if (NULL == new)
235 {
236 free(prefix);
237 return NULL;
238 }
239 new->parent = p;
240 new->child = p->child;
241 new->data = p->data;
243 // Update p to point to it's new child and have it's new (shorter) key
244 free(p->key);
245 p->key = prefix;
246 p->child = new;
247 p->data = NULL;
249 ctrie->size++;
251 /*
252 printf("Node was split into these\n");
253 ctrie_print_node(p);
254 ctrie_print_node(new);
255 */
257 // Now add the user requested new key as a child of p
258 // printf("Recursing with node %p key %s as a prefix of %s\n", p->parent, p->parent->key, key);
259 return ctrie_insert_node(ctrie, p->parent, key, data);
260 }
262 // should not get here
263 return NULL;
264 }
266 ////////////////////////////////////////////////////////////////////////////////
267 struct ctrie_node *
268 ctrie_longest_matching_child(struct ctrie_node * node, char * key)
269 {
270 struct ctrie_node * longest = NULL;
271 int len = 0;
272 int l;
274 //printf("Searching for >%s< in children of %p (%s)\n", key, (void *) node, node->key);
276 struct ctrie_node * p = node->child;
277 while (NULL != p)
278 {
279 l = ctrie_str_common(p->key, key);
280 //printf(" Checking >%s< common length is %d\n", p->key, l);
281 if (l > len)
282 {
283 len = l;
284 longest = p;
285 }
286 p = p->next_sibling;
287 }
289 //printf("Returning longest match %p\n", longest);
290 return longest;
291 }
293 ////////////////////////////////////////////////////////////////////////////////
294 // Returns the number of characters of s1 (starting at the beginning)
295 // that match the corresponding characters in s2.
296 //
297 // eg: ctrie_str_common("asdf", "asdf") returns 4
298 // ctrie_str_common("asdf", "asdfer") returns 4
299 // ctrie_str_common("asdf", "asgg") returns 2
300 // ctrie_str_common("asdf", "gggg") returns 0
302 int
303 ctrie_str_common(char * s1, char * s2)
304 {
305 // printf("s1 >%s< s1 >%s<\n", s1, s2);
306 int i = 0;
307 while ( (*(s1+i) != '\0') && (*(s2+i) != '\0') &&
308 (*(s1+i) == *(s2+i)) )
309 {
310 // printf("*s1+%d %c *s2+%d %c\n", i, (*(s1+i) == '\0' ? "." : *(s1+i)), i, (*(s2+i) == '\0' ? "." : *(s2+i)));
311 i++;
312 }
314 return i;
315 }
317 ////////////////////////////////////////////////////////////////////////////////
318 void
319 ctrie_print_node(struct ctrie_node * node)
320 {
321 if (NULL == node)
322 printf("(nil node)\n");
323 else
324 printf("%s \t%p\t parent:%p \tchild:%p \tprev:%p \tnext:%p \tdata:%p\n",
325 node->key, (void *) node, (void *) node->parent, (void *) node->child,
326 (void *) node->prev_sibling, (void *) node->next_sibling, node->data);
328 }
330 ////////////////////////////////////////////////////////////////////////////////
331 void
332 ctrie_print_key(struct ctrie_node * node)
333 {
334 if (NULL == node)
335 printf("(nil node)\n");
336 else if (NULL == node->key)
337 printf("(nil key)\n");
338 else
339 printf("%s\n", node->key);
340 }
342 ////////////////////////////////////////////////////////////////////////////////
343 void ctrie_dump(struct ctrie * ctrie)
344 {
345 if (NULL == ctrie)
346 return;
348 int depth = 0;
349 ctrie_node_dump(ctrie->root, depth, ctrie_print_key);
350 }
352 ////////////////////////////////////////////////////////////////////////////////
353 void ctrie_dump_raw(struct ctrie * ctrie)
354 {
355 if (NULL == ctrie)
356 return;
358 int depth = 0;
359 ctrie_node_dump(ctrie->root, depth, ctrie_print_node);
360 }
362 ////////////////////////////////////////////////////////////////////////////////
363 void
364 ctrie_node_dump(struct ctrie_node * node, int depth, void (*op)(struct ctrie_node *))
365 {
366 for (int i=0; i < depth; i++)
367 printf(" ");
368 op(node);
369 if (node->child)
370 {
371 int len = 0;
372 if (NULL != node->key)
373 len = strlen(node->key);
374 ctrie_node_dump(node->child, depth+len, op);
375 }
376 if (NULL != node->next_sibling)
377 ctrie_node_dump(node->next_sibling, depth, op);
378 }
380 ////////////////////////////////////////////////////////////////////////////////
381 void
382 ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d))
383 {
384 if (NULL == ctrie)
385 return;
386 ctrie_visit_node(ctrie, ctrie->root, op, "");
387 }
389 ////////////////////////////////////////////////////////////////////////////////
390 void
391 ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * prefix)
392 {
393 // printf("Visiting node %p with prefix >%s<\n", node, prefix);
394 if (NULL != node->data)
395 {
396 // printf("node has data\n");
397 char * k = malloc(strlen(prefix) + strlen(node->key));
398 if (NULL == k)
399 return;
400 k[0] = '\0';
401 strcat(k, prefix);
402 strcat(k, node->key);
403 op(k, node->data);
404 free(k);
405 }
407 if(NULL != node->child)
408 {
409 // printf("node has children\n");
410 char * k = malloc(strlen(prefix) + strlen(node->key));
411 if (NULL == k)
412 return;
413 k[0] = '\0';
414 strcat(k, prefix);
415 strcat(k, node->key);
416 struct ctrie_node * p = node->child;
417 while (NULL != p)
418 {
419 ctrie_visit_node(ctrie, p, op, k);
420 p = p->next_sibling;
421 }
422 free(k);
423 }
424 }
426 ////////////////////////////////////////////////////////////////////////////////
427 void *
428 ctrie_find(struct ctrie * ctrie, char * key)
429 {
430 struct ctrie_node * node = ctrie_find_node(ctrie->root, key);
431 if (NULL == node)
432 return NULL;
433 return node->data;
434 }
436 ////////////////////////////////////////////////////////////////////////////////
437 struct ctrie_node *
438 ctrie_find_node(struct ctrie_node * node, char * key)
439 {
440 if ( (NULL == node) || (NULL == key) )
441 return NULL;
443 size_t len = strlen(key);
444 for (int i = 0; i <= len; i += strlen(node->key))
445 {
446 node = ctrie_longest_matching_child(node, key+i);
447 if (NULL == node)
448 return NULL;
449 if (strlen(key+i) == strlen(node->key))
450 return node;
451 }
453 // Should never get here
454 return NULL;
455 }
457 ////////////////////////////////////////////////////////////////////////////////
458 struct ctrie_node *
459 ctrie_search_children(struct ctrie_node * node, char * k)
460 {
461 struct ctrie_node * p = node->child;
462 if (NULL == p)
463 return NULL;
465 size_t len = strlen(node->key);
466 while ( (NULL != p) && (0 != strncmp(node->key, k, len)) )
467 p = p->next_sibling;
468 return p;
469 }
471 ////////////////////////////////////////////////////////////////////////////////
472 void
473 ctrie_subtree_list_push(char * key, void * data)
474 {
475 if (NULL == subkey_list)
476 return;
477 char * k = malloc(strlen(key)+1);
478 if (NULL == k)
479 return;
480 strcpy(k, key);
481 if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
482 {
483 list_push_front(subkey_list, k);
484 // printf("list push >%s<\n", k);
485 }
486 }
488 ////////////////////////////////////////////////////////////////////////////////
489 // Not reentrant since it uses the package static subkeys_list variable.
490 // However, that variable is only used while building a subkey list, so you can
491 // get more than one subkey if you want. Just don't try to get them simultaneously.
492 struct list *
493 ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit)
494 {
495 if ( (NULL == ctrie) || (NULL == prefix) )
496 return NULL;
498 // struct ctrie_node * node = ctrie_find_node(ctrie, prefix);
500 struct ctrie_node * p = NULL;
501 struct ctrie_node * node = ctrie->root;
502 size_t len = strlen(prefix);
503 // printf("len %d\n", len);
504 int index;
505 for (int i = 0; i < len; i += ctrie_str_common(prefix+i, node->key))
506 {
507 // printf("i %d\n", i);
508 p = node;
509 node = ctrie_longest_matching_child(node, prefix+i);
510 // printf("longest returned %p %d\n", node, strlen(node->key));
511 if (NULL == node)
512 {
513 // printf("breaking\n");
514 break;
515 }
516 index = i;
517 }
519 if (NULL == node)
520 return NULL;
522 // printf("Found longest match as %p >%s< index is %d\n", node, node->key, index);
524 subkey_list = list_new();
525 if (NULL == subkey_list)
526 return NULL;
527 subkey_list_limit = limit;
529 len = strlen(prefix);
530 char * pref = malloc(len+1);
531 if (NULL == pref)
532 return NULL;
533 strncpy(pref, prefix, index);
534 pref[index] = '\0';
535 // printf("Built pref as >%s<\n", pref);
536 ctrie_visit_node(ctrie, node, ctrie_subtree_list_push, pref);
537 free(pref);
538 // printf("Returning\n");
539 return subkey_list;
540 }
542 ////////////////////////////////////////////////////////////////////////////////
543 void
544 ctrie_free_subkey_list(struct list * list)
545 {
546 if (NULL == list)
547 return;
549 struct list_iterator * it = list_begin(list);
550 void * data;
551 while (data = list_remove(it))
552 free(data);
554 list_delete(list);
555 }
557 ////////////////////////////////////////////////////////////////////////////////
558 void *
559 ctrie_remove(struct ctrie * ctrie, char * key)
560 {
561 if ( (NULL == ctrie) || (NULL == key) )
562 return NULL;
564 // printf("Looking for %s\n", key);
565 struct ctrie_node * node = ctrie_find_node(ctrie->root, key);
566 if (NULL == node)
567 return NULL;
569 // printf("Found node %p ", node); ctrie_print_node(node);
571 // We found a node, but is it an actual data node or just a prefix?
572 if (NULL == node->data)
573 return NULL;
575 // OK, so it's a data node. If it has no children we can delete it.
576 // Otherwise we just remove the data and maybe collapse nodes
578 void * data = node->data;
579 struct ctrie_node * parent = node->parent;
581 if (NULL == node->child)
582 {
583 // Remove from sibling list.
584 if (node->prev_sibling)
585 node->prev_sibling->next_sibling = node->next_sibling;
586 if (node->next_sibling)
587 node->next_sibling->prev_sibling = node->prev_sibling;
589 // Point the parent to the new first child, if needed.
590 if (parent->child == node)
591 {
592 if (node->prev_sibling)
593 parent->child = node->prev_sibling;
594 else
595 parent->child = node->next_sibling;
596 }
598 free(node->key);
599 free(node);
600 ctrie->size--;
601 ctrie->count--;
602 }
603 else
604 {
605 node->data = NULL;
606 ctrie->count--;
607 }
610 // printf("merging nodes\n"); ctrie_dump_raw(ctrie);
612 // At this point we may have nodes that need to be collapsed together.
613 // This happens when the parent has only one child.
614 if (NULL == parent->child->next_sibling)
615 {
616 char * k = malloc(strlen(parent->key) + strlen(parent->child->key) + 1);
617 // UGH! I need a real error signalling mechanism to handle this.
618 if (NULL == k)
619 return data;
620 k[0] = '\0';
621 strcat(k, parent->key);
622 strcat(k, parent->child->key);
623 free(parent->key);
624 parent->key = k;
626 node = parent->child;
627 parent->child = node->child;
628 free(node->key);
629 free(node);
630 ctrie->size--;
631 }
633 return data;
634 }