view src/btree_mem.c @ 22:e56a76090d80

Deletion seems to be working. Still need to clean up memory usage, though.
author Eris Caffee <discordia@eldalin.com>
date Fri, 19 Oct 2012 16:07:18 -0500
parents ce502d2caaea
children 36018adade6d
line source
1 /*
2 This is a fairly straightforward implementation of a B-Tree in memory modeled after
3 Cormen, et al., Introduction to Algorithms, Chapter 18
4 */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdbool.h>
10 #include "btree_mem.h"
12 struct btree_mem_node
13 {
14 int nkeys;
15 void ** key;
16 void ** data;
17 struct btree_mem_node ** child;
18 bool leaf;
19 };
21 struct btree_mem
22 {
23 int mindeg;
24 struct btree_mem_node * root;
25 int (*cmp)(void *, void *);
26 };
28 struct btree_mem_node * btree_mem_node_new (struct btree_mem * bt);
29 void btree_mem_node_delete (struct btree_mem_node * node);
31 struct btree_mem_node * btree_mem_node_find (struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i);
33 void * btree_mem_node_insert (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
34 void * btree_mem_node_insert_nonfull (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
36 struct btree_mem_node * btree_mem_split_child (struct btree_mem * bt, struct btree_mem_node * x, int i);
38 void btree_mem_node_walk_inorder (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
39 void btree_mem_node_dump (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
41 void * btree_mem_node_remove (struct btree_mem * bt, struct btree_mem_node * node, void * key,
42 void ** key_value, void ** data_value);
44 void btree_mem_node_add_key (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
45 void * btree_mem_node_remove_key (struct btree_mem_node * node, int i);
46 struct btree_mem_node * btree_mem_node_find_prev (struct btree_mem_node * node);
47 struct btree_mem_node * btree_mem_node_find_next (struct btree_mem_node * node);
48 void btree_mem_node_merge (struct btree_mem_node * a, struct btree_mem_node * b);
50 #ifdef DEBUG
51 #define DEBUG_BLOCK(x) x
53 void btree_mem_node_print_keys(struct btree_mem_node * node);
55 #else
56 #define DEBUG_BLOCK(x)
57 #endif
59 void printit(int h, void * k, void * d)
60 {
61 for (int i = 0; i < h; i++)
62 printf("\t");
63 printf("%d %s\n", *((int*)k), (char *) d);
64 }
67 ////////////////////////////////////////////////////////////////////////////////
68 struct btree_mem *
69 btree_mem_new(int min_degree, int (*cmp)(void *, void *))
70 {
71 struct btree_mem * bt = malloc(sizeof(struct btree_mem));
72 if (NULL == bt)
73 return NULL;
75 bt->mindeg = min_degree;
76 bt->cmp = cmp;
78 bt->root = btree_mem_node_new(bt);
79 if (NULL == bt->root)
80 {
81 free(bt);
82 return NULL;
83 }
85 bt->root->leaf = true;
86 return bt;
87 }
89 ////////////////////////////////////////////////////////////////////////////////
90 // IMPORTANT NOTE: All keys and data must be removed first or else memory
91 // will leak.
92 void
93 btree_mem_delete(struct btree_mem * bt)
94 {
95 // TODO: walk down the tree and delete all nodes.
96 free(bt);
97 }
99 ////////////////////////////////////////////////////////////////////////////////
100 struct btree_mem_node *
101 btree_mem_node_new(struct btree_mem * bt)
102 {
103 struct btree_mem_node * node = malloc(sizeof(struct btree_mem_node));
104 if (NULL == node)
105 return NULL;
107 int maxchild = 2*bt->mindeg;
109 node->key = malloc( (maxchild - 1) * sizeof(void *));
110 if (NULL == node->key)
111 {
112 free(node);
113 return NULL;
114 }
116 node->data = malloc( (maxchild - 1) * sizeof(void *));
117 if (NULL == node->data)
118 {
119 free(node->key);
120 free(node);
121 return NULL;
122 }
124 node->child = malloc( maxchild * sizeof(struct btree_mem_node *));
125 if (NULL == node->data)
126 {
127 free(node->data);
128 free(node->key);
129 free(node);
130 return NULL;
131 }
133 for (int i = 0; i < maxchild - 1; i++)
134 {
135 node->key[i] = NULL;
136 node->data[i] = NULL;
137 node->child[i] = NULL;
138 }
139 node->child[maxchild-1] = NULL;
141 node->nkeys = 0;
142 node->leaf = false;
144 return node;
145 }
147 ////////////////////////////////////////////////////////////////////////////////
148 // IMPORTANT NOTE: All keys and data must be removed first or else memory
149 // will leak.
150 void
151 btree_mem_node_delete(struct btree_mem_node * node)
152 {
153 free(node->child);
154 free(node->data);
155 free(node->key);
156 free(node);
157 }
159 ////////////////////////////////////////////////////////////////////////////////
160 void *
161 btree_mem_find(struct btree_mem * bt, void * key)
162 {
163 int i;
164 struct btree_mem_node * node = btree_mem_node_find(bt, bt->root, key, &i);
165 if (NULL == node)
166 return NULL;
167 return node->data[i];
168 }
170 ////////////////////////////////////////////////////////////////////////////////
171 struct btree_mem_node *
172 btree_mem_node_find(struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i)
173 {
174 *i = 0;
175 while (( *i < node->nkeys) && (bt->cmp(key, node->key[*i]) > 0) )
176 (*i)++;
177 if ( (*i < node->nkeys) && (bt->cmp(key, node->key[*i]) == 0) )
178 return node;
179 if (node->leaf)
180 return NULL;
181 else
182 return btree_mem_node_find(bt, node->child[*i], key, i);
183 }
185 ////////////////////////////////////////////////////////////////////////////////
186 void *
187 btree_mem_insert(struct btree_mem * bt, void * key, void * data)
188 {
189 DEBUG_BLOCK( printf("Inserting key %d data >%s<\n", *((int *) key), (char *) data); )
190 struct btree_mem_node * s = bt->root;
192 // If the root node is full, split it here.
193 if ((2 * bt->mindeg - 1) == s->nkeys)
194 {
195 s = btree_mem_node_new(bt);
196 if (NULL == s)
197 return NULL;
198 s->child[0] = bt->root;
199 bt->root = s;
200 btree_mem_split_child(bt, s, 0);
201 }
202 return btree_mem_node_insert_nonfull(bt, s, key, data);
203 }
205 ////////////////////////////////////////////////////////////////////////////////
206 void *
207 btree_mem_node_insert_nonfull(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
208 {
209 int i = node->nkeys - 1;
210 if (node->leaf)
211 {
212 //DEBUG_BLOCK( printf("node->nkeys: %d\n", node->nkeys); )
213 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
214 {
215 node->key[i+1] = node->key[i];
216 node->data[i+1] = node->data[i];
217 i--;
218 }
219 node->key[i+1] = key;
220 node->data[i+1] = data;
221 node->nkeys++;
222 return key;
223 }
224 else
225 {
226 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
227 i--;
228 i++;
230 if (node->child[i]->nkeys == (2 * bt->mindeg - 1))
231 {
232 if (NULL == btree_mem_split_child(bt, node, i))
233 return NULL;
234 if (bt->cmp(key, node->key[i]) > 0)
235 i++;
236 }
237 return btree_mem_node_insert_nonfull(bt, node->child[i], key, data);
238 }
239 }
241 ////////////////////////////////////////////////////////////////////////////////
242 // x - a non-full node
243 // i - The index of a full node y in x's child list.
244 //
245 // Split y in half, adding the newly created node z as a new child of x.
246 // z will be placed after y in the child list.
248 struct btree_mem_node *
249 btree_mem_split_child(struct btree_mem * bt, struct btree_mem_node * x, int i)
250 {
252 struct btree_mem_node * y = x->child[i];
254 // Allocate new node.
255 struct btree_mem_node * z = btree_mem_node_new(bt);
256 if (NULL == z)
257 return NULL;
259 // Load z with the second half of y's data;
260 z->leaf = y->leaf;
261 z->nkeys = bt->mindeg - 1;
262 for (int j = 0; j < bt->mindeg - 1; j++)
263 {
264 z->key[j] = y->key[j + bt->mindeg];
265 z->data[j] = y->data[j + bt->mindeg];
266 }
267 if (! y->leaf)
268 {
269 for (int j = 0; j < bt->mindeg; j++)
270 z->child[j] = y->child[j + bt->mindeg];
271 }
273 // Update y's nkeys.
274 y->nkeys = bt->mindeg - 1;
276 // Make a hole in x's child list for the new node z.
277 for (int j = x->nkeys; j >= i + 1; j--)
278 x->child[j+1] = x->child[j];
279 x->child[i+1] = z;
281 // Make a hole in x's key list for the new node z.
282 for (int j = x->nkeys - 1; j >= i; j--)
283 {
284 x->key[j+1] = x->key[j];
285 x->data[j+1] = x->data[j];
286 }
287 x->key[i] = y->key[bt->mindeg-1];
288 x->data[i] = y->data[bt->mindeg-1];
290 x->nkeys += 1;
292 return z;
293 }
295 ////////////////////////////////////////////////////////////////////////////////
296 void
297 btree_mem_walk_inorder(struct btree_mem * bt, void (*f)(int, void const * const, void *))
298 {
299 btree_mem_node_walk_inorder(bt->root, f, 0);
300 }
302 ////////////////////////////////////////////////////////////////////////////////
303 void
304 btree_mem_node_walk_inorder(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
305 {
307 if (0 == node->nkeys)
308 {
309 if (NULL != node->child[0])
310 btree_mem_node_walk_inorder(node->child[0], f, depth+1);
311 return;
312 }
314 for (int i = 0; i < node->nkeys; i++)
315 {
316 if (! node->leaf)
317 btree_mem_node_walk_inorder(node->child[i], f, depth+1);
318 f(depth, node->key[i], node->data[i]);
319 }
320 if (! node->leaf)
321 btree_mem_node_walk_inorder(node->child[node->nkeys], f, depth+1);
322 }
324 ////////////////////////////////////////////////////////////////////////////////
325 void
326 btree_mem_dump(struct btree_mem * bt, void (*f)(int, void const * const, void *))
327 {
328 btree_mem_node_dump(bt->root, f, 0);
329 }
331 ////////////////////////////////////////////////////////////////////////////////
332 void
333 btree_mem_node_dump(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
334 {
336 if (0 == node->nkeys)
337 {
338 if (NULL != node->child[0])
339 btree_mem_node_dump(node->child[0], f, depth+1);
340 return;
341 }
343 for (int i = 0; i < node->nkeys; i++)
344 {
345 if (! node->leaf)
346 btree_mem_node_dump(node->child[i], f, depth+1);
347 printf("node: %p nkeys %10d \t", node, node->nkeys);
348 f(depth, node->key[i], node->data[i]);
349 }
350 if (! node->leaf)
351 btree_mem_node_dump(node->child[node->nkeys], f, depth+1);
352 }
354 ////////////////////////////////////////////////////////////////////////////////
355 void *
356 btree_mem_remove(struct btree_mem * bt, void * key, void ** key_value, void ** data_value)
357 {
358 if ( (NULL == bt) || (NULL == key) || (NULL == key_value) || (NULL == data_value) )
359 return NULL;
361 DEBUG_BLOCK( printf("Removing %d\n", *((int *) key)); )
362 void * data = btree_mem_node_remove(bt, bt->root, key, key_value, data_value);
363 if (NULL == data)
364 return NULL;
366 if ( (0 == bt->root->nkeys) && (bt->root->child[0]) )
367 {
368 DEBUG_BLOCK( printf("Freeing the root node\n"); )
369 struct btree_mem_node * n = bt->root;
370 bt->root = bt->root->child[0];
371 free(n);
372 }
373 return data;
374 }
376 ////////////////////////////////////////////////////////////////////////////////
377 // Assumption: node has bt->mindeg keys or is the root.
379 void *
380 btree_mem_node_remove(struct btree_mem * bt, struct btree_mem_node * node, void * key, void ** key_value, void ** data_value)
381 {
383 DEBUG_BLOCK( btree_mem_node_print_keys(node); )
385 int i = node->nkeys - 1;
386 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
387 i--;
389 if ( (i >= 0) && (bt->cmp(key, node->key[i]) == 0) )
390 {
391 // key is in node at key[i]
392 *key_value = node->key[i];
393 *data_value = node->data[i];
394 // printf("Will return key_value %p (%d) data_value %p (%s)\n", *key_value, *((int*)(*key_value)), *data_value, (char*)(*data_value));
395 if (node->leaf)
396 {
397 // case 1
398 DEBUG_BLOCK( printf("remove: case 1\n"); )
399 btree_mem_node_remove_key(node, i);
400 return *data_value;
401 }
402 else
403 {
404 struct btree_mem_node * prev = node->child[i];
405 struct btree_mem_node * next = node->child[i+1];
407 // if (child prev that precedes key in node has at least bt->mindeg keys) ||
408 // (child next that follows key in node has at least bt->mindeg keys)
409 if ( (prev->nkeys >= bt->mindeg) || (next->nkeys >= bt->mindeg) )
410 {
411 struct btree_mem_node * kp;
412 if (prev->nkeys >= bt->mindeg)
413 {
414 // case 2a
415 DEBUG_BLOCK( printf("remove: case 2a\n"); );
416 kp = btree_mem_node_find_prev(prev);
417 node->key[i] = kp->key[kp->nkeys-1];
418 node->data[i] = kp->data[kp->nkeys];
419 void *kv, *dv;
420 node->data[i] = btree_mem_node_remove(bt, prev, kp->key[kp->nkeys-1], kv, dv);
421 }
422 else
423 {
424 // case 2b
425 DEBUG_BLOCK( printf("remove: case 2b\n"); )
426 kp = btree_mem_node_find_next(next);
427 node->key[i] = kp->key[0];
428 node->data[i] = kp->data[0];
429 void *kv, *dv;
430 node->data[i] = btree_mem_node_remove(bt, next, kp->key[0], kv, dv);
431 }
432 node->key[i] = kp;
433 return *data_value;
434 }
435 else
436 {
437 // case 2c
438 DEBUG_BLOCK( printf("remove: case 2c\n"); )
439 btree_mem_node_add_key(bt, prev, node->key[i], node->data[i]);
440 btree_mem_node_remove_key(node, i);
441 for (int j = i+1; j <= node->nkeys+1; j++)
442 node->child[j] = node->child[j+1];
443 btree_mem_node_merge(prev, next);
444 void *kv, *dv;
445 btree_mem_node_remove(bt, prev, key, &kv, &dv);
446 return *data_value;
447 }
448 }
449 }
450 else
451 {
452 if (node->leaf)
453 {
454 DEBUG_BLOCK( printf("remove: key not found\n"); )
455 return NULL;
456 }
458 // key is in child[i+1], if anywhere
459 struct btree_mem_node * c = node->child[i+1];
461 DEBUG_BLOCK( printf("remove: key should be in node %p\n", c); )
462 if (c->nkeys == bt->mindeg - 1)
463 {
464 struct btree_mem_node * prev = NULL;
465 if (i >= 0)
466 prev = node->child[i];
468 struct btree_mem_node * next = NULL;
469 if (i+2 <= node->nkeys)
470 next = node->child[i+2];
472 DEBUG_BLOCK( printf("remove: prev %p next %p\n", prev, next); )
474 if ( (prev) && (prev->nkeys >= bt->mindeg) )
475 {
476 // case 3a (with prev sibling)
477 // Move node->key[i] into c->key[0] shifting over existing keys
478 // Move prev->key[prev->nkeys-1] into node->key[i]
479 // if (prev->leaf)
480 // move prev->child[prev->nkeys] into c->child[0] shifting over existing children
481 // prev->nkeys--;
483 DEBUG_BLOCK( printf("remove: case 3a.1\n"); )
485 btree_mem_node_add_key(bt, c, node->key[i-1], node->data[i-1]);
487 node->key[i-1] = prev->key[prev->nkeys-1];
488 node->data[i-1] = prev->data[prev->nkeys-1];
490 if (! prev->leaf)
491 {
492 for (int j = c->nkeys; j >= 0; j--)
493 c->child[j+1] = c->child[j];
494 c->child[0] = prev->child[prev->nkeys];
495 }
497 prev->nkeys--;
498 }
499 else if ( (next) && (next->nkeys >= bt->mindeg) )
500 {
501 // case 3a (with next sibling)
502 // Symmetric with the above
504 DEBUG_BLOCK( printf("remove: case 3a.2\n"); )
505 btree_mem_node_add_key(bt, c, node->key[i+1], node->data[i+1]);
507 node->key[i+1] = next->key[0];
508 node->data[i+1] = next->data[0];
509 for (int j = 0; j < next->nkeys - 1; j++)
510 {
511 next->key[j] = next->key[j+1];
512 next->data[j] = next->data[j+1];
513 }
515 if (! next->leaf)
516 {
517 c->child[c->nkeys] = next->child[0];
518 for (int j = 0; j < next->nkeys; j++)
519 next->child[j] = next->child[j+1];
520 }
522 next->nkeys--;
524 }
525 else
526 {
527 // case 3b
528 DEBUG_BLOCK( printf("remove: case 3b\n"); )
529 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
530 // printf("node %p i %d\n", node, i); fflush(stdout);
531 // printf("adding %d to %p\n", *((int*)node->key[i+1]), (next ? next : prev)); fflush(stdout);
532 if (next)
533 btree_mem_node_merge(c, next);
534 else
535 btree_mem_node_merge(prev, c);
537 btree_mem_node_add_key(bt, (next ? c : prev), node->key[i+1], node->data[i+1]);
539 btree_mem_node_remove_key(node, i+1);
541 if (next)
542 {
543 for (int j = i+2; j <= node->nkeys+1; j++)
544 node->child[j] = node->child[j+1];
545 }
546 else
547 {
548 for (int j = i; j <= node->nkeys+1; j++)
549 node->child[j] = node->child[j+1];
550 }
551 }
552 }
553 return btree_mem_node_remove(bt, c, key, key_value, data_value);
554 }
555 }
557 ////////////////////////////////////////////////////////////////////////////////
558 // Assumption: node is not full
559 // Assumption: key is not already in node
560 void
561 btree_mem_node_add_key(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
562 {
563 int i = node->nkeys;
564 while ( (i > 0) && (bt->cmp(key, node->key[i-1]) < 0) )
565 i--;
567 for (int j = node->nkeys - 1; j >= i; j--)
568 {
569 node->key[j+1] = node->key[j];
570 node->data[j+1] = node->data[j];
571 }
573 node->key[i] = key;
574 node->data[i] = data;
575 node->nkeys++;
576 }
578 ////////////////////////////////////////////////////////////////////////////////
579 // Assumption: node has at least mindeg keys.
580 // Does not remove any children.
581 void *
582 btree_mem_node_remove_key(struct btree_mem_node * node, int i)
583 {
584 void * data = node->key[i];
585 for (int j = i; j < node->nkeys; j++)
586 {
587 node->key[j] = node->key[j+1];
588 node->data[j] = node->data[j+1];
589 }
590 node->nkeys--;
591 return data;
592 }
594 ////////////////////////////////////////////////////////////////////////////////
595 // Return the node of the lowest valued key of the subtree rooted at node
596 struct btree_mem_node *
597 btree_mem_node_find_prev(struct btree_mem_node * node)
598 {
599 if (0 == node->nkeys)
600 return NULL;
602 if (node->leaf)
603 return node;
605 return btree_mem_node_find_prev(node->child[0]);
606 }
608 ////////////////////////////////////////////////////////////////////////////////
609 // Return the node of the highest valued key of the subtree rooted at node
610 struct btree_mem_node *
611 btree_mem_node_find_next(struct btree_mem_node * node)
612 {
613 if (0 == node->nkeys)
614 return NULL;
616 if (node->leaf)
617 return node->key[node->nkeys-1];
619 return btree_mem_node_find_next(node->child[node->nkeys]);
620 }
622 /////////////////////////////////////////////////////////////////////////////////
623 // move all keys/data/children from node b into node a
624 // Assumption: all keys in b come after the keys in a
625 void
626 btree_mem_node_merge(struct btree_mem_node * a, struct btree_mem_node * b)
627 {
628 int i = a->nkeys;
629 int j = 0;
630 for (; j < b->nkeys; i++, j++)
631 {
632 a->key[i] = b->key[j];
633 a->data[i] = b->data[j];
634 }
636 if (! a->leaf)
637 {
638 for (i = a->nkeys + 1 , j = 0; j <= b->nkeys; i++, j++)
639 a->child[i] = b->child[j];
640 }
642 a->nkeys += b->nkeys;
643 b->nkeys = 0;
644 }
646 /////////////////////////////////////////////////////////////////////////////////
647 #ifdef DEBUG
648 void
649 btree_mem_node_print_keys(struct btree_mem_node * node)
650 {
651 printf("node: %p nkeys %10d keys: ", node, node->nkeys);
652 for (int i = 0; i < node->nkeys; i++)
653 printf("%d ", *((int *) node->key[i]));
654 printf("\n");
655 }
656 #endif