view src/btree_mem.c @ 24:f27bdd5d40d0

Ran more tests. Found another leak. Fixed it. Now it's really good. for (( MinDeg=2; MinDeg<=100; MinDeg++ )) ; do echo MinDeg 101 ; numwords 1000 | rev | valgrind --tool=memcheck --leak-check=full --show-reachable=yes ./btree_mem_test 101 2>&1 | grep "ERROR SUMMARY" ; done
author Eris Caffee <discordia@eldalin.com>
date Fri, 19 Oct 2012 19:42:17 -0500
parents 36018adade6d
children
line source
1 /*
2 This is a fairly straightforward implementation of a B-Tree in memory modeled after
3 Cormen, et al., Introduction to Algorithms, Chapter 18
4 */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdbool.h>
10 #include "btree_mem.h"
12 struct btree_mem_node
13 {
14 int nkeys;
15 void ** key;
16 void ** data;
17 struct btree_mem_node ** child;
18 bool leaf;
19 };
21 struct btree_mem
22 {
23 int mindeg;
24 struct btree_mem_node * root;
25 int (*cmp)(void *, void *);
26 };
28 struct btree_mem_node * btree_mem_node_new (struct btree_mem * bt);
29 void btree_mem_node_delete (struct btree_mem_node * node);
31 struct btree_mem_node * btree_mem_node_find (struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i);
33 void * btree_mem_node_insert (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
34 void * btree_mem_node_insert_nonfull (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
36 struct btree_mem_node * btree_mem_split_child (struct btree_mem * bt, struct btree_mem_node * x, int i);
38 void btree_mem_node_walk_nodes_preorder (struct btree_mem_node * node, void (*f)(struct btree_mem_node *));
39 void btree_mem_node_walk_inorder (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
40 void btree_mem_node_dump (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
42 void * btree_mem_node_remove (struct btree_mem * bt, struct btree_mem_node * node, void * key,
43 void ** key_value, void ** data_value);
45 void btree_mem_node_add_key (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
46 void * btree_mem_node_remove_key (struct btree_mem_node * node, int i);
47 struct btree_mem_node * btree_mem_node_find_prev (struct btree_mem_node * node);
48 struct btree_mem_node * btree_mem_node_find_next (struct btree_mem_node * node);
49 void btree_mem_node_merge (struct btree_mem_node * a, struct btree_mem_node * b);
51 #ifdef DEBUG
52 #define DEBUG_BLOCK(x) x
54 void btree_mem_node_print_keys(struct btree_mem_node * node);
56 #else
57 #define DEBUG_BLOCK(x)
58 #endif
62 ////////////////////////////////////////////////////////////////////////////////
63 struct btree_mem *
64 btree_mem_new(int min_degree, int (*cmp)(void *, void *))
65 {
66 struct btree_mem * bt = malloc(sizeof(struct btree_mem));
67 if (NULL == bt)
68 return NULL;
70 bt->mindeg = min_degree;
71 bt->cmp = cmp;
73 DEBUG_BLOCK( printf("Getting new root node\n"); );
74 bt->root = btree_mem_node_new(bt);
75 if (NULL == bt->root)
76 {
77 free(bt);
78 return NULL;
79 }
81 bt->root->leaf = true;
82 return bt;
83 }
85 ////////////////////////////////////////////////////////////////////////////////
86 // IMPORTANT NOTE: All keys and data must be removed first or else memory
87 // will leak.
88 void
89 btree_mem_delete(struct btree_mem * bt)
90 {
91 // TODO: walk down the tree and delete all nodes.
93 btree_mem_node_walk_nodes_preorder(bt->root, btree_mem_node_delete);
94 free(bt);
95 }
97 ////////////////////////////////////////////////////////////////////////////////
98 struct btree_mem_node *
99 btree_mem_node_new(struct btree_mem * bt)
100 {
101 struct btree_mem_node * node = malloc(sizeof(struct btree_mem_node));
102 if (NULL == node)
103 return NULL;
105 DEBUG_BLOCK( printf("Allocated node %p\n", node); );
106 int maxchild = 2*bt->mindeg;
108 node->key = malloc( (maxchild - 1) * sizeof(void *));
109 if (NULL == node->key)
110 {
111 free(node);
112 return NULL;
113 }
115 node->data = malloc( (maxchild - 1) * sizeof(void *));
116 if (NULL == node->data)
117 {
118 free(node->key);
119 free(node);
120 return NULL;
121 }
123 node->child = malloc( maxchild * sizeof(struct btree_mem_node *));
124 if (NULL == node->data)
125 {
126 free(node->data);
127 free(node->key);
128 free(node);
129 return NULL;
130 }
132 for (int i = 0; i < maxchild - 1; i++)
133 {
134 node->key[i] = NULL;
135 node->data[i] = NULL;
136 node->child[i] = NULL;
137 }
138 node->child[maxchild-1] = NULL;
140 node->nkeys = 0;
141 node->leaf = false;
143 return node;
144 }
146 ////////////////////////////////////////////////////////////////////////////////
147 // IMPORTANT NOTE: All keys and data must be removed first or else memory
148 // will leak.
149 void
150 btree_mem_node_delete(struct btree_mem_node * node)
151 {
152 DEBUG_BLOCK( printf("Freeing node %p\n", node); );
153 free(node->child);
154 free(node->data);
155 free(node->key);
156 free(node);
157 }
159 ////////////////////////////////////////////////////////////////////////////////
160 void *
161 btree_mem_find(struct btree_mem * bt, void * key)
162 {
163 int i;
164 struct btree_mem_node * node = btree_mem_node_find(bt, bt->root, key, &i);
165 if (NULL == node)
166 return NULL;
167 return node->data[i];
168 }
170 ////////////////////////////////////////////////////////////////////////////////
171 struct btree_mem_node *
172 btree_mem_node_find(struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i)
173 {
174 *i = 0;
175 while (( *i < node->nkeys) && (bt->cmp(key, node->key[*i]) > 0) )
176 (*i)++;
177 if ( (*i < node->nkeys) && (bt->cmp(key, node->key[*i]) == 0) )
178 return node;
179 if (node->leaf)
180 return NULL;
181 else
182 return btree_mem_node_find(bt, node->child[*i], key, i);
183 }
185 ////////////////////////////////////////////////////////////////////////////////
186 void *
187 btree_mem_insert(struct btree_mem * bt, void * key, void * data)
188 {
189 DEBUG_BLOCK( printf("Inserting key %d data >%s<\n", *((int *) key), (char *) data); );
190 struct btree_mem_node * s = bt->root;
192 // If the root node is full, split it here.
193 if ((2 * bt->mindeg - 1) == s->nkeys)
194 {
195 DEBUG_BLOCK( printf("Splitting root node. Allocating new node\n"); );
196 s = btree_mem_node_new(bt);
197 if (NULL == s)
198 return NULL;
199 s->child[0] = bt->root;
200 bt->root = s;
201 btree_mem_split_child(bt, s, 0);
202 }
203 return btree_mem_node_insert_nonfull(bt, s, key, data);
204 }
206 ////////////////////////////////////////////////////////////////////////////////
207 void *
208 btree_mem_node_insert_nonfull(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
209 {
210 int i = node->nkeys - 1;
211 if (node->leaf)
212 {
213 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
214 {
215 node->key[i+1] = node->key[i];
216 node->data[i+1] = node->data[i];
217 i--;
218 }
219 node->key[i+1] = key;
220 node->data[i+1] = data;
221 node->nkeys++;
222 return key;
223 }
224 else
225 {
226 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
227 i--;
228 i++;
230 if (node->child[i]->nkeys == (2 * bt->mindeg - 1))
231 {
232 if (NULL == btree_mem_split_child(bt, node, i))
233 return NULL;
234 if (bt->cmp(key, node->key[i]) > 0)
235 i++;
236 }
237 return btree_mem_node_insert_nonfull(bt, node->child[i], key, data);
238 }
239 }
241 ////////////////////////////////////////////////////////////////////////////////
242 // x - a non-full node
243 // i - The index of a full node y in x's child list.
244 //
245 // Split y in half, adding the newly created node z as a new child of x.
246 // z will be placed after y in the child list.
248 struct btree_mem_node *
249 btree_mem_split_child(struct btree_mem * bt, struct btree_mem_node * x, int i)
250 {
252 struct btree_mem_node * y = x->child[i];
254 // Allocate new node.
255 DEBUG_BLOCK( printf("splitting child allocating new node\n"); );
256 struct btree_mem_node * z = btree_mem_node_new(bt);
257 if (NULL == z)
258 return NULL;
260 // Load z with the second half of y's data;
261 z->leaf = y->leaf;
262 z->nkeys = bt->mindeg - 1;
263 for (int j = 0; j < bt->mindeg - 1; j++)
264 {
265 z->key[j] = y->key[j + bt->mindeg];
266 z->data[j] = y->data[j + bt->mindeg];
267 }
268 if (! y->leaf)
269 {
270 for (int j = 0; j < bt->mindeg; j++)
271 z->child[j] = y->child[j + bt->mindeg];
272 }
274 // Update y's nkeys.
275 y->nkeys = bt->mindeg - 1;
277 // Make a hole in x's child list for the new node z.
278 for (int j = x->nkeys; j >= i + 1; j--)
279 x->child[j+1] = x->child[j];
280 x->child[i+1] = z;
282 // Make a hole in x's key list for the new node z.
283 for (int j = x->nkeys - 1; j >= i; j--)
284 {
285 x->key[j+1] = x->key[j];
286 x->data[j+1] = x->data[j];
287 }
288 x->key[i] = y->key[bt->mindeg-1];
289 x->data[i] = y->data[bt->mindeg-1];
291 x->nkeys += 1;
293 return z;
294 }
296 ////////////////////////////////////////////////////////////////////////////////
297 void
298 btree_mem_node_walk_nodes_preorder(struct btree_mem_node * node, void (*f)(struct btree_mem_node *))
299 {
300 if (! node->leaf)
301 {
302 for (int i = 0; i < node->nkeys; i++)
303 btree_mem_node_walk_nodes_preorder(node->child[i], f);
304 }
305 f(node);
306 }
308 ////////////////////////////////////////////////////////////////////////////////
309 void
310 btree_mem_walk_inorder(struct btree_mem * bt, void (*f)(int, void const * const, void *))
311 {
312 btree_mem_node_walk_inorder(bt->root, f, 0);
313 }
315 ////////////////////////////////////////////////////////////////////////////////
316 void
317 btree_mem_node_walk_inorder(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
318 {
320 if (0 == node->nkeys)
321 {
322 if (NULL != node->child[0])
323 btree_mem_node_walk_inorder(node->child[0], f, depth+1);
324 return;
325 }
327 for (int i = 0; i < node->nkeys; i++)
328 {
329 if (! node->leaf)
330 btree_mem_node_walk_inorder(node->child[i], f, depth+1);
331 f(depth, node->key[i], node->data[i]);
332 }
333 if (! node->leaf)
334 btree_mem_node_walk_inorder(node->child[node->nkeys], f, depth+1);
335 }
337 ////////////////////////////////////////////////////////////////////////////////
338 void
339 btree_mem_dump(struct btree_mem * bt, void (*f)(int, void const * const, void *))
340 {
341 btree_mem_node_dump(bt->root, f, 0);
342 }
344 ////////////////////////////////////////////////////////////////////////////////
345 void
346 btree_mem_node_dump(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
347 {
349 if (0 == node->nkeys)
350 {
351 if (NULL != node->child[0])
352 btree_mem_node_dump(node->child[0], f, depth+1);
353 return;
354 }
356 for (int i = 0; i < node->nkeys; i++)
357 {
358 if (! node->leaf)
359 btree_mem_node_dump(node->child[i], f, depth+1);
360 f(depth, node->key[i], node->data[i]);
361 }
362 if (! node->leaf)
363 btree_mem_node_dump(node->child[node->nkeys], f, depth+1);
364 }
366 ////////////////////////////////////////////////////////////////////////////////
367 void *
368 btree_mem_remove(struct btree_mem * bt, void * key, void ** key_value, void ** data_value)
369 {
370 if ( (NULL == bt) || (NULL == key) || (NULL == key_value) || (NULL == data_value) )
371 return NULL;
373 DEBUG_BLOCK( printf("Removing %d\n", *((int *) key)); );
374 void * data = btree_mem_node_remove(bt, bt->root, key, key_value, data_value);
375 if (NULL == data)
376 return NULL;
378 if ( (0 == bt->root->nkeys) && (bt->root->child[0]) )
379 {
380 DEBUG_BLOCK( printf("Freeing the root node\n"); );
381 struct btree_mem_node * n = bt->root;
382 bt->root = bt->root->child[0];
383 btree_mem_node_delete(n);
384 }
385 return data;
386 }
388 ////////////////////////////////////////////////////////////////////////////////
389 // Assumption: node has bt->mindeg keys or is the root.
391 void *
392 btree_mem_node_remove(struct btree_mem * bt, struct btree_mem_node * node, void * key, void ** key_value, void ** data_value)
393 {
395 DEBUG_BLOCK( btree_mem_node_print_keys(node); );
397 int i = node->nkeys - 1;
398 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
399 i--;
401 if ( (i >= 0) && (bt->cmp(key, node->key[i]) == 0) )
402 {
403 // key is in node at key[i]
404 *key_value = node->key[i];
405 *data_value = node->data[i];
407 if (node->leaf)
408 {
409 // case 1
410 DEBUG_BLOCK( printf("remove: case 1\n"); );
411 btree_mem_node_remove_key(node, i);
412 return *data_value;
413 }
414 else
415 {
416 struct btree_mem_node * prev = node->child[i];
417 struct btree_mem_node * next = node->child[i+1];
419 // if (child prev that precedes key in node has at least bt->mindeg keys) ||
420 // (child next that follows key in node has at least bt->mindeg keys)
421 if ( (prev->nkeys >= bt->mindeg) || (next->nkeys >= bt->mindeg) )
422 {
423 struct btree_mem_node * kp;
424 if (prev->nkeys >= bt->mindeg)
425 {
426 // case 2a
427 DEBUG_BLOCK( printf("remove: case 2a\n"); );
428 kp = btree_mem_node_find_prev(prev);
429 node->key[i] = kp->key[kp->nkeys-1];
430 node->data[i] = kp->data[kp->nkeys];
431 void *kv, *dv;
432 node->data[i] = btree_mem_node_remove(bt, prev, kp->key[kp->nkeys-1], kv, dv);
433 }
434 else
435 {
436 // case 2b
437 DEBUG_BLOCK( printf("remove: case 2b\n"); );
438 kp = btree_mem_node_find_next(next);
439 node->key[i] = kp->key[0];
440 node->data[i] = kp->data[0];
441 void *kv, *dv;
442 node->data[i] = btree_mem_node_remove(bt, next, kp->key[0], kv, dv);
443 }
444 node->key[i] = kp;
445 return *data_value;
446 }
447 else
448 {
449 // case 2c
450 DEBUG_BLOCK( printf("remove: case 2c\n"); );
451 btree_mem_node_add_key(bt, prev, node->key[i], node->data[i]);
452 btree_mem_node_remove_key(node, i);
453 for (int j = i+1; j <= node->nkeys+1; j++)
454 node->child[j] = node->child[j+1];
455 btree_mem_node_merge(prev, next);
456 btree_mem_node_delete(next);
457 void *kv, *dv;
458 btree_mem_node_remove(bt, prev, key, &kv, &dv);
459 return *data_value;
460 }
461 }
462 }
463 else
464 {
465 if (node->leaf)
466 {
467 DEBUG_BLOCK( printf("remove: key not found\n"); );
468 return NULL;
469 }
471 // key is in child[i+1], if anywhere
472 struct btree_mem_node * c = node->child[i+1];
474 DEBUG_BLOCK( printf("remove: key should be in node %p\n", c); );
475 if (c->nkeys == bt->mindeg - 1)
476 {
477 struct btree_mem_node * prev = NULL;
478 if (i >= 0)
479 prev = node->child[i];
481 struct btree_mem_node * next = NULL;
482 if (i+2 <= node->nkeys)
483 next = node->child[i+2];
485 DEBUG_BLOCK( printf("remove: prev %p next %p\n", prev, next); );
487 if ( (prev) && (prev->nkeys >= bt->mindeg) )
488 {
489 // case 3a (with prev sibling)
490 // Move node->key[i] into c->key[0] shifting over existing keys
491 // Move prev->key[prev->nkeys-1] into node->key[i]
492 // if (prev->leaf)
493 // move prev->child[prev->nkeys] into c->child[0] shifting over existing children
494 // prev->nkeys--;
496 DEBUG_BLOCK( printf("remove: case 3a.1\n"); );
498 btree_mem_node_add_key(bt, c, node->key[i-1], node->data[i-1]);
500 node->key[i-1] = prev->key[prev->nkeys-1];
501 node->data[i-1] = prev->data[prev->nkeys-1];
503 if (! prev->leaf)
504 {
505 for (int j = c->nkeys; j >= 0; j--)
506 c->child[j+1] = c->child[j];
507 c->child[0] = prev->child[prev->nkeys];
508 }
510 prev->nkeys--;
511 }
512 else if ( (next) && (next->nkeys >= bt->mindeg) )
513 {
514 // case 3a (with next sibling)
515 // Symmetric with the above
517 DEBUG_BLOCK( printf("remove: case 3a.2\n"); );
518 btree_mem_node_add_key(bt, c, node->key[i+1], node->data[i+1]);
520 node->key[i+1] = next->key[0];
521 node->data[i+1] = next->data[0];
522 for (int j = 0; j < next->nkeys - 1; j++)
523 {
524 next->key[j] = next->key[j+1];
525 next->data[j] = next->data[j+1];
526 }
528 if (! next->leaf)
529 {
530 c->child[c->nkeys] = next->child[0];
531 for (int j = 0; j < next->nkeys; j++)
532 next->child[j] = next->child[j+1];
533 }
535 next->nkeys--;
537 }
538 else
539 {
540 // case 3b
541 DEBUG_BLOCK( printf("remove: case 3b\n"); );
542 if (next)
543 btree_mem_node_merge(c, next);
544 else
545 btree_mem_node_merge(prev, c);
547 btree_mem_node_add_key(bt, (next ? c : prev), node->key[i+1], node->data[i+1]);
549 btree_mem_node_remove_key(node, i+1);
551 if (next)
552 {
553 for (int j = i+2; j <= node->nkeys; j++)
554 node->child[j] = node->child[j+1];
555 btree_mem_node_delete(next);
556 }
557 else
558 {
559 for (int j = i; j <= node->nkeys; j++)
560 node->child[j] = node->child[j+1];
561 btree_mem_node_delete(c);
562 c = prev;
563 }
564 }
565 }
566 return btree_mem_node_remove(bt, c, key, key_value, data_value);
567 }
568 }
570 ////////////////////////////////////////////////////////////////////////////////
571 // Assumption: node is not full
572 // Assumption: key is not already in node
573 void
574 btree_mem_node_add_key(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
575 {
576 int i = node->nkeys;
577 while ( (i > 0) && (bt->cmp(key, node->key[i-1]) < 0) )
578 i--;
580 for (int j = node->nkeys - 1; j >= i; j--)
581 {
582 node->key[j+1] = node->key[j];
583 node->data[j+1] = node->data[j];
584 }
586 node->key[i] = key;
587 node->data[i] = data;
588 node->nkeys++;
589 }
591 ////////////////////////////////////////////////////////////////////////////////
592 // Assumption: node has at least mindeg keys.
593 // Does not remove any children.
594 void *
595 btree_mem_node_remove_key(struct btree_mem_node * node, int i)
596 {
597 void * data = node->key[i];
598 for (int j = i; j < node->nkeys-1; j++)
599 {
600 node->key[j] = node->key[j+1];
601 node->data[j] = node->data[j+1];
602 }
603 node->nkeys--;
604 return data;
605 }
607 ////////////////////////////////////////////////////////////////////////////////
608 // Return the node of the lowest valued key of the subtree rooted at node
609 struct btree_mem_node *
610 btree_mem_node_find_prev(struct btree_mem_node * node)
611 {
612 if (0 == node->nkeys)
613 return NULL;
615 if (node->leaf)
616 return node;
618 return btree_mem_node_find_prev(node->child[0]);
619 }
621 ////////////////////////////////////////////////////////////////////////////////
622 // Return the node of the highest valued key of the subtree rooted at node
623 struct btree_mem_node *
624 btree_mem_node_find_next(struct btree_mem_node * node)
625 {
626 if (0 == node->nkeys)
627 return NULL;
629 if (node->leaf)
630 return node->key[node->nkeys-1];
632 return btree_mem_node_find_next(node->child[node->nkeys]);
633 }
635 /////////////////////////////////////////////////////////////////////////////////
636 // move all keys/data/children from node b into node a
637 // Assumption: all keys in b come after the keys in a
638 void
639 btree_mem_node_merge(struct btree_mem_node * a, struct btree_mem_node * b)
640 {
641 int i = a->nkeys;
642 int j = 0;
643 for (; j < b->nkeys; i++, j++)
644 {
645 a->key[i] = b->key[j];
646 a->data[i] = b->data[j];
647 }
649 if (! a->leaf)
650 {
651 for (i = a->nkeys + 1 , j = 0; j <= b->nkeys; i++, j++)
652 a->child[i] = b->child[j];
653 }
655 a->nkeys += b->nkeys;
656 b->nkeys = 0;
657 }
659 /////////////////////////////////////////////////////////////////////////////////
660 #ifdef DEBUG
661 void
662 btree_mem_node_print_keys(struct btree_mem_node * node)
663 {
664 printf("node: %p nkeys %10d keys: ", node, node->nkeys);
665 for (int i = 0; i < node->nkeys; i++)
666 printf("%d ", *((int *) node->key[i]));
667 printf("\n");
668 }
669 #endif