view src/ctrie.c @ 13:7886d2da8cc4

Trie working OK. Started on compact trie, ctrie.
author Eris Caffee <discordia@eldalin.com>
date Tue, 02 Oct 2012 10:13:07 -0500
parents
children ef73b96fdcae
line source
1 #include <ctype.h>
2 #include <stdlib.h>
3 #include <stdbool.h>
4 #include <string.h>
5 #include <stdio.h>
7 #include "list.h"
9 struct ctrie_node
10 {
11 char * key;
12 struct ctrie_node * parent;
13 struct ctrie_node * child;
14 struct ctrie_node * next_sibling;
15 struct ctrie_node * prev_sibling;
16 void * data;
17 };
19 struct ctrie
20 {
21 struct ctrie_node * root;
22 size_t size;
23 size_t count;
24 };
27 static struct list * subkey_list = NULL;
28 static size_t subkey_list_limit = 0;
30 struct ctrie_node * ctrie_search_children(struct ctrie_node * node, char k);
31 void ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key);
32 void ctrie_print_node(struct ctrie_node * node);
33 void ctrie_print_node(struct ctrie_node * node);
34 void ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth);
35 void ctrie_node_dump_raw(struct ctrie_node * node, int depth);
36 void ctrie_subtree_list_push(char * k, void * d);
37 struct ctrie_node * ctrie_find_node(struct ctrie * ctrie, char * key);
39 struct ctrie_node * ctrie_search_children_longest_match(struct ctrie_node * node, char * k);
41 #undef MAX
42 #define MAX (a, b) ((a) > (b) ? (a) : (b))
44 ////////////////////////////////////////////////////////////////////////////////
45 struct ctrie *
46 ctrie_new()
47 {
48 struct ctrie * ctrie = malloc(sizeof(struct ctrie));
49 if (NULL == ctrie)
50 return NULL;
52 ctrie->root = malloc(sizeof(struct ctrie_node));
53 if (NULL == ctrie->root)
54 return NULL;
56 ctrie->root->parent = ctrie->root->child = ctrie->root->next_sibling = NULL;
57 ctrie->root->key = NULL;
58 ctrie->root->data = NULL;
59 ctrie->size = ctrie->count = 0;
61 return ctrie;
62 }
64 ////////////////////////////////////////////////////////////////////////////////
65 void
66 ctrie_delete(struct ctrie * ctrie)
67 {
68 if (NULL == ctrie)
69 return;
71 free(ctrie);
72 }
74 ////////////////////////////////////////////////////////////////////////////////
75 size_t
76 ctrie_size(struct ctrie * ctrie)
77 {
78 if (NULL == ctrie)
79 return 0;
80 return ctrie->size;
81 }
83 ////////////////////////////////////////////////////////////////////////////////
84 size_t
85 ctrie_count(struct ctrie * ctrie)
86 {
87 if (NULL == ctrie)
88 return 0;
89 return ctrie->count;
90 }
92 ////////////////////////////////////////////////////////////////////////////////
93 char *
94 ctrie_insert(struct ctrie * ctrie, char * key, void * data)
95 {
96 if ( (NULL == ctrie) || (NULL == key) )
97 return NULL;
99 struct ctrie_node * curr = ctrie->root;
100 struct ctrie_node * result;
102 size_t len = strlen(key);
103 for (int i = 0; i <= len; ++i)
104 {
105 result = ctrie_search_children_longest_match(curr, key+i);
106 if (NULL == result)
107 {
108 // current char is not in the ctrie yet, allocate a new node and attach it to the children
109 struct ctrie_node * new = malloc(sizeof(struct ctrie_node));
110 if (NULL == new)
111 return NULL;
112 new->key = tolower(key[i]);
113 new->child = new->next_sibling = new->prev_sibling = NULL;
114 new->data = NULL;
116 if (NULL == curr->child)
117 curr->child = new;
118 else
119 {
120 struct ctrie_node * p = curr->child;
121 while (p->next_sibling)
122 p = p->next_sibling;
123 new->prev_sibling = p;
124 p->next_sibling = new;
125 }
127 new->parent = curr;
128 curr = new;
129 ctrie->size++;
130 }
131 else
132 curr = result;
133 }
135 // At this point, curr is either pointing to a new \0 node with a NULL data pointer,
136 // or an existing \0 node with a valid data pointer (in which case we were asked to "insert"
137 // a key that was already present in the ctrie.
138 // For now I am simply ignoring collisions. We will update the data.
140 curr->data = data;
142 ctrie->count++;
144 return key;
145 }
147 ////////////////////////////////////////////////////////////////////////////////
148 struct ctrie_node *
149 ctrie_search_children_longest_match(struct ctrie_node * node, char * k)
150 {
151 struct ctrie_node * p = node->child;
152 if (NULL == p)
153 return NULL;
155 size_t klen = strlen(k);
156 struct ctrie_node * longest = NULL;
157 size_t length = 0;
158 while (NULL != p)
159 {
160 size_t len = strlen(p->key);
161 while (len > 0)
162 {
163 if ( (0 == strncmp(p->key, k, MAX(len, klen))) &&
164 (length < MAX(len, klen)) )
165 {
166 longest = p;
167 length = MAX(len, klen);
168 break;
169 }
170 len--;
171 }
172 p = p->next_sibling;
173 }
175 return p;
176 }
178 ////////////////////////////////////////////////////////////////////////////////
179 struct ctrie_node *
180 ctrie_search_children(struct ctrie_node * node, char k)
181 {
182 struct ctrie_node * p = node->child;
183 if (NULL == p)
184 return NULL;
186 while ( (NULL != p) && (p->key != tolower(k)) )
187 p = p->next_sibling;
188 return p;
189 }
191 ////////////////////////////////////////////////////////////////////////////////
192 struct ctrie_node *
193 ctrie_find_node(struct ctrie * ctrie, char * key)
194 {
195 if ( (NULL == ctrie) || (NULL == key) )
196 return NULL;
198 struct ctrie_node * node = ctrie->root;
199 size_t len = strlen(key);
200 for (int i = 0; i <= len; i++)
201 {
202 node = ctrie_search_children(node, key[i]);
203 if (NULL == node)
204 return NULL;
205 if ('\0' == key[i])
206 return node;
207 }
209 // Should never get here
210 return NULL;
211 }
213 ////////////////////////////////////////////////////////////////////////////////
214 void *
215 ctrie_find(struct ctrie * ctrie, char * key)
216 {
217 struct ctrie_node * node = ctrie_find_node(ctrie, key);
218 if (NULL == node)
219 return NULL;
220 return node->data;
221 }
223 ////////////////////////////////////////////////////////////////////////////////
224 void *
225 ctrie_remove(struct ctrie * ctrie, char * key)
226 {
227 if ( (NULL == ctrie) || (NULL == key) )
228 return NULL;
230 struct ctrie_node * node = ctrie_find_node(ctrie, key);
231 if (NULL == node)
232 return NULL;
234 void * data = node->data;
236 while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) )
237 {
238 struct ctrie_node * p = node;
239 node = node->parent;
240 free(p);
241 ctrie->size--;
242 }
244 if (node == ctrie->root)
245 return data;
247 if (node->prev_sibling)
248 node->prev_sibling->next_sibling = node->next_sibling;
249 else
250 node->parent->child = node->next_sibling;
252 if (node->next_sibling)
253 node->next_sibling->prev_sibling = node->prev_sibling;
255 free(node);
257 ctrie->count--;
259 return data;
260 }
262 ////////////////////////////////////////////////////////////////////////////////
263 void
264 ctrie_walk_keys(struct ctrie * ctrie, void (*op)(char * k, void * d))
265 {
266 if (NULL == ctrie)
267 return;
268 ctrie_visit_node(ctrie, ctrie->root, op, "");
269 }
271 ////////////////////////////////////////////////////////////////////////////////
272 void
273 ctrie_visit_node(struct ctrie * ctrie, struct ctrie_node * node, void (*op)(char * k, void * d), char * key)
274 {
275 if ( (node->key == '\0') && (node != ctrie->root) )
276 op(key, node->data);
277 else
278 {
279 size_t len = strlen(key);
280 char * k = malloc(len+2);
281 if (NULL == k)
282 return;
283 strcpy(k, key);
284 k[len] = node->key;
285 k[len+1] = '\0';
286 struct ctrie_node * p = node->child;
287 while (NULL != p)
288 {
289 ctrie_visit_node(ctrie, p, op, k);
290 p = p->next_sibling;
291 }
292 free(k);
293 }
294 }
296 ////////////////////////////////////////////////////////////////////////////////
297 void ctrie_dump_raw(struct ctrie * ctrie)
298 {
299 if (NULL == ctrie)
300 return;
302 int depth = -1;
303 ctrie_node_dump_raw(ctrie->root, depth);
304 }
306 ////////////////////////////////////////////////////////////////////////////////
307 void
308 ctrie_node_dump_raw(struct ctrie_node * node, int depth)
309 {
310 for (int i=0; i < depth; i++)
311 printf(" ");
312 ctrie_print_node(node);
313 if (node->child)
314 ctrie_node_dump_raw(node->child, depth+1);
315 if (NULL != node->next_sibling)
316 ctrie_node_dump_raw(node->next_sibling, depth+1);
317 }
319 ////////////////////////////////////////////////////////////////////////////////
320 void ctrie_dump(struct ctrie * ctrie)
321 {
322 if (NULL == ctrie)
323 return;
325 int depth = -1;
326 ctrie_node_dump_preorder(ctrie, ctrie->root, depth);
327 }
329 ////////////////////////////////////////////////////////////////////////////////
330 void
331 ctrie_node_dump_preorder(struct ctrie * ctrie, struct ctrie_node * node, int depth)
332 {
333 printf("%c", node->key);
334 if ( (node != ctrie->root) && ('\0' == node->key) )
335 printf("\n");
336 if (node->child)
337 ctrie_node_dump_preorder(ctrie, node->child, depth+1);
338 if (NULL != node->next_sibling)
339 {
340 for (int i=0; i < depth; i++)
341 printf(" ");
342 ctrie_node_dump_preorder(ctrie, node->next_sibling, depth);
343 }
344 }
346 ////////////////////////////////////////////////////////////////////////////////
347 void
348 ctrie_print_node(struct ctrie_node * node)
349 {
350 printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child,
351 (void *) node->next_sibling, node->key, node->data);
352 }
354 ////////////////////////////////////////////////////////////////////////////////
355 void
356 ctrie_subtree_list_push(char * k, void * d)
357 {
358 if (NULL == subkey_list)
359 return;
361 if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
362 list_push_front(subkey_list, k);
363 }
365 ////////////////////////////////////////////////////////////////////////////////
366 // Not reentrant since it uses the package static subkeys_list variable.
367 // However, that variable is only used while building a subkey list, so you can
368 // get more than one subkey if you want. Just don't try to get them simultaneously.
369 struct list *
370 ctrie_get_subkeys(struct ctrie * ctrie, char * prefix, size_t limit)
371 {
372 if ( (NULL == ctrie) || (NULL == prefix) )
373 return NULL;
375 struct ctrie_node * node = ctrie_find_node(ctrie, prefix);
376 if (NULL == node)
377 return NULL;
379 subkey_list = list_new();
380 if (NULL == subkey_list)
381 return NULL;
382 subkey_list_limit = limit;
384 size_t len = strlen(prefix);
385 char * pref = malloc(len+1);
386 if (NULL == pref)
387 return NULL;
388 strncpy(pref, prefix, len-1);
389 pref[len-1] = '\0';
391 ctrie_visit_node(ctrie, node->parent, ctrie_subtree_list_push, pref);
392 free(pref);
394 return subkey_list;
395 }
397 ////////////////////////////////////////////////////////////////////////////////
398 void
399 ctrie_free_subkey_list(struct list * list)
400 {
401 if (NULL == list)
402 return;
404 struct list_iterator * it = list_begin(list);
405 void * data;
406 while (data = list_remove(it))
407 free(data);
409 list_delete(list);
410 }