view src/trie.c @ 13:7886d2da8cc4

Trie working OK. Started on compact trie, ctrie.
author Eris Caffee <discordia@eldalin.com>
date Tue, 02 Oct 2012 10:13:07 -0500
parents d359966ed8de
children
line source
1 #include <ctype.h>
2 #include <stdlib.h>
3 #include <stdbool.h>
4 #include <string.h>
5 #include <stdio.h>
7 #include "list.h"
9 struct trie_node
10 {
11 char key;
12 struct trie_node * parent;
13 struct trie_node * child;
14 struct trie_node * next_sibling;
15 struct trie_node * prev_sibling;
16 void * data;
17 };
19 struct trie
20 {
21 struct trie_node * root;
22 size_t size;
23 size_t count;
24 };
27 static struct list * subkey_list = NULL;
28 static size_t subkey_list_limit = 0;
30 struct trie_node * trie_search_children(struct trie_node * node, char k);
31 void trie_visit_node(struct trie * trie, struct trie_node * node, void (*op)(char * k, void * d), char * key);
32 void trie_print_node(struct trie_node * node);
33 void trie_print_node(struct trie_node * node);
34 void trie_node_dump_preorder(struct trie * trie, struct trie_node * node, int depth);
35 void trie_node_dump_raw(struct trie_node * node, int depth);
36 void trie_subtree_list_push(char * k, void * d);
37 struct trie_node * trie_find_node(struct trie * trie, char * key);
39 ////////////////////////////////////////////////////////////////////////////////
40 struct trie *
41 trie_new()
42 {
43 struct trie * trie = malloc(sizeof(struct trie));
44 if (NULL == trie)
45 return NULL;
47 trie->root = malloc(sizeof(struct trie_node));
48 if (NULL == trie->root)
49 return NULL;
51 trie->root->parent = trie->root->child = trie->root->next_sibling = NULL;
52 trie->root->key = '\0';
53 trie->root->data = NULL;
54 trie->size = trie->count = 0;
56 return trie;
57 }
59 ////////////////////////////////////////////////////////////////////////////////
60 void
61 trie_delete(struct trie * trie)
62 {
63 if (NULL == trie)
64 return;
66 free(trie);
67 }
69 ////////////////////////////////////////////////////////////////////////////////
70 size_t
71 trie_size(struct trie * trie)
72 {
73 if (NULL == trie)
74 return 0;
75 return trie->size;
76 }
78 ////////////////////////////////////////////////////////////////////////////////
79 size_t
80 trie_count(struct trie * trie)
81 {
82 if (NULL == trie)
83 return 0;
84 return trie->count;
85 }
87 ////////////////////////////////////////////////////////////////////////////////
88 char *
89 trie_insert(struct trie * trie, char * key, void * data)
90 {
91 if ( (NULL == trie) || (NULL == key) )
92 return NULL;
94 struct trie_node * curr = trie->root;
95 struct trie_node * result;
97 size_t len = strlen(key);
98 for (int i = 0; i <= len; ++i)
99 {
100 result = trie_search_children(curr, key[i]);
101 if (NULL == result)
102 {
103 // current char is not in the trie yet, allocate a new node and attach it to the children
104 struct trie_node * new = malloc(sizeof(struct trie_node));
105 if (NULL == new)
106 return NULL;
107 new->key = tolower(key[i]);
108 new->child = new->next_sibling = new->prev_sibling = NULL;
109 new->data = NULL;
111 if (NULL == curr->child)
112 curr->child = new;
113 else
114 {
115 struct trie_node * p = curr->child;
116 while (p->next_sibling)
117 p = p->next_sibling;
118 new->prev_sibling = p;
119 p->next_sibling = new;
120 }
122 new->parent = curr;
123 curr = new;
124 trie->size++;
125 }
126 else
127 curr = result;
128 }
130 // At this point, curr is either pointing to a new \0 node with a NULL data pointer,
131 // or an existing \0 node with a valid data pointer (in which case we were asked to "insert"
132 // a key that was already present in the trie.
133 // For now I am simply ignoring collisions. We will update the data.
135 curr->data = data;
137 trie->count++;
139 return key;
140 }
142 ////////////////////////////////////////////////////////////////////////////////
143 struct trie_node *
144 trie_search_children(struct trie_node * node, char k)
145 {
146 struct trie_node * p = node->child;
147 if (NULL == p)
148 return NULL;
150 while ( (NULL != p) && (p->key != tolower(k)) )
151 p = p->next_sibling;
152 return p;
153 }
155 ////////////////////////////////////////////////////////////////////////////////
156 struct trie_node *
157 trie_find_node(struct trie * trie, char * key)
158 {
159 if ( (NULL == trie) || (NULL == key) )
160 return NULL;
162 struct trie_node * node = trie->root;
163 size_t len = strlen(key);
164 for (int i = 0; i <= len; i++)
165 {
166 node = trie_search_children(node, key[i]);
167 if (NULL == node)
168 return NULL;
169 if ('\0' == key[i])
170 return node;
171 }
173 // Should never get here
174 return NULL;
175 }
177 ////////////////////////////////////////////////////////////////////////////////
178 void *
179 trie_find(struct trie * trie, char * key)
180 {
181 struct trie_node * node = trie_find_node(trie, key);
182 if (NULL == node)
183 return NULL;
184 return node->data;
185 }
187 ////////////////////////////////////////////////////////////////////////////////
188 void *
189 trie_remove(struct trie * trie, char * key)
190 {
191 if ( (NULL == trie) || (NULL == key) )
192 return NULL;
194 struct trie_node * node = trie_find_node(trie, key);
195 if (NULL == node)
196 return NULL;
198 void * data = node->data;
200 while ( (NULL == node->prev_sibling) && (NULL == node->prev_sibling) )
201 {
202 struct trie_node * p = node;
203 node = node->parent;
204 free(p);
205 trie->size--;
206 }
208 if (node == trie->root)
209 return data;
211 if (node->prev_sibling)
212 node->prev_sibling->next_sibling = node->next_sibling;
213 else
214 node->parent->child = node->next_sibling;
216 if (node->next_sibling)
217 node->next_sibling->prev_sibling = node->prev_sibling;
219 free(node);
221 trie->count--;
223 return data;
224 }
226 ////////////////////////////////////////////////////////////////////////////////
227 void
228 trie_walk_keys(struct trie * trie, void (*op)(char * k, void * d))
229 {
230 if (NULL == trie)
231 return;
232 trie_visit_node(trie, trie->root, op, "");
233 }
235 ////////////////////////////////////////////////////////////////////////////////
236 void
237 trie_visit_node(struct trie * trie, struct trie_node * node, void (*op)(char * k, void * d), char * key)
238 {
239 if ( (node->key == '\0') && (node != trie->root) )
240 op(key, node->data);
241 else
242 {
243 size_t len = strlen(key);
244 char * k = malloc(len+2);
245 if (NULL == k)
246 return;
247 strcpy(k, key);
248 k[len] = node->key;
249 k[len+1] = '\0';
250 struct trie_node * p = node->child;
251 while (NULL != p)
252 {
253 trie_visit_node(trie, p, op, k);
254 p = p->next_sibling;
255 }
256 free(k);
257 }
258 }
260 ////////////////////////////////////////////////////////////////////////////////
261 void trie_dump_raw(struct trie * trie)
262 {
263 if (NULL == trie)
264 return;
266 int depth = -1;
267 trie_node_dump_raw(trie->root, depth);
268 }
270 ////////////////////////////////////////////////////////////////////////////////
271 void
272 trie_node_dump_raw(struct trie_node * node, int depth)
273 {
274 for (int i=0; i < depth; i++)
275 printf(" ");
276 trie_print_node(node);
277 if (node->child)
278 trie_node_dump_raw(node->child, depth+1);
279 if (NULL != node->next_sibling)
280 trie_node_dump_raw(node->next_sibling, depth+1);
281 }
283 ////////////////////////////////////////////////////////////////////////////////
284 void trie_dump(struct trie * trie)
285 {
286 if (NULL == trie)
287 return;
289 int depth = -1;
290 trie_node_dump_preorder(trie, trie->root, depth);
291 }
293 ////////////////////////////////////////////////////////////////////////////////
294 void
295 trie_node_dump_preorder(struct trie * trie, struct trie_node * node, int depth)
296 {
297 printf("%c", node->key);
298 if ( (node != trie->root) && ('\0' == node->key) )
299 printf("\n");
300 if (node->child)
301 trie_node_dump_preorder(trie, node->child, depth+1);
302 if (NULL != node->next_sibling)
303 {
304 for (int i=0; i < depth; i++)
305 printf(" ");
306 trie_node_dump_preorder(trie, node->next_sibling, depth);
307 }
308 }
310 ////////////////////////////////////////////////////////////////////////////////
311 void
312 trie_print_node(struct trie_node * node)
313 {
314 printf("%p\tchild %p\tnext_sibling %p\tkey %c\tdata %p\n", (void *) node, (void *) node->child,
315 (void *) node->next_sibling, node->key, node->data);
316 }
318 ////////////////////////////////////////////////////////////////////////////////
319 void
320 trie_subtree_list_push(char * k, void * d)
321 {
322 if (NULL == subkey_list)
323 return;
325 if ( (0 == subkey_list_limit) || (list_size(subkey_list) < subkey_list_limit) )
326 list_push_front(subkey_list, k);
327 }
329 ////////////////////////////////////////////////////////////////////////////////
330 // Not reentrant since it uses the package static subkeys_list variable.
331 // However, that variable is only used while building a subkey list, so you can
332 // get more than one subkey if you want. Just don't try to get them simultaneously.
333 struct list *
334 trie_get_subkeys(struct trie * trie, char * prefix, size_t limit)
335 {
336 if ( (NULL == trie) || (NULL == prefix) )
337 return NULL;
339 struct trie_node * node = trie_find_node(trie, prefix);
340 if (NULL == node)
341 return NULL;
343 subkey_list = list_new();
344 if (NULL == subkey_list)
345 return NULL;
346 subkey_list_limit = limit;
348 size_t len = strlen(prefix);
349 char * pref = malloc(len+1);
350 if (NULL == pref)
351 return NULL;
352 strncpy(pref, prefix, len-1);
353 pref[len-1] = '\0';
355 trie_visit_node(trie, node->parent, trie_subtree_list_push, pref);
356 free(pref);
358 return subkey_list;
359 }
361 ////////////////////////////////////////////////////////////////////////////////
362 void
363 trie_free_subkey_list(struct list * list)
364 {
365 if (NULL == list)
366 return;
368 struct list_iterator * it = list_begin(list);
369 void * data;
370 while (data = list_remove(it))
371 free(data);
373 list_delete(list);
374 }