view src/btree_mem.c @ 20:66aecb6a0057

making progress
author Eris Caffee <discordia@eldalin.com>
date Thu, 18 Oct 2012 20:08:51 -0500
parents 0bf94d0ed0b0
children ce502d2caaea
line source
1 /*
2 This is a fairly straightforward implementation of a B-Tree in memory modeled after
3 Cormen, et al., Introduction to Algorithms, Chapter 18
4 */
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdbool.h>
10 #include "btree_mem.h"
12 struct btree_mem_node
13 {
14 int nkeys;
15 void ** key;
16 void ** data;
17 struct btree_mem_node ** child;
18 bool leaf;
19 };
21 struct btree_mem
22 {
23 int mindeg;
24 struct btree_mem_node * root;
25 int (*cmp)(void *, void *);
26 };
28 struct btree_mem_node * btree_mem_node_new (struct btree_mem * bt);
29 void btree_mem_node_delete (struct btree_mem_node * node);
31 struct btree_mem_node * btree_mem_node_find (struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i);
33 void * btree_mem_node_insert (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
34 void * btree_mem_node_insert_nonfull (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
36 struct btree_mem_node * btree_mem_split_child (struct btree_mem * bt, struct btree_mem_node * x, int i);
38 void btree_mem_node_walk_inorder (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
39 void btree_mem_node_dump (struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth);
41 void * btree_mem_node_remove (struct btree_mem * bt, struct btree_mem_node * node, void * key,
42 void ** key_value, void ** data_value);
44 void btree_mem_node_add_key (struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data);
45 void * btree_mem_node_remove_key (struct btree_mem_node * node, int i);
46 void * btree_mem_node_first_key (struct btree_mem_node * node);
47 void * btree_mem_node_last_key (struct btree_mem_node * node);
48 void btree_mem_node_merge (struct btree_mem_node * a, struct btree_mem_node * b);
50 #ifdef DEBUG
51 #define DEBUG_BLOCK(x) x
53 void btree_mem_nonde_print_keys(struct btree_mem_node * node);
55 #else
56 #define DEBUG_BLOCK(x)
57 #endif
59 void printit(int h, void * k, void * d)
60 {
61 for (int i = 0; i < h; i++)
62 printf("\t");
63 printf("%d %s\n", *((int*)k), (char *) d);
64 }
67 ////////////////////////////////////////////////////////////////////////////////
68 struct btree_mem *
69 btree_mem_new(int min_degree, int (*cmp)(void *, void *))
70 {
71 struct btree_mem * bt = malloc(sizeof(struct btree_mem));
72 if (NULL == bt)
73 return NULL;
75 bt->mindeg = min_degree;
76 bt->cmp = cmp;
78 bt->root = btree_mem_node_new(bt);
79 if (NULL == bt->root)
80 {
81 free(bt);
82 return NULL;
83 }
85 bt->root->leaf = true;
86 return bt;
87 }
89 ////////////////////////////////////////////////////////////////////////////////
90 // IMPORTANT NOTE: All keys and data must be removed first or else memory
91 // will leak.
92 void
93 btree_mem_delete(struct btree_mem * bt)
94 {
95 // TODO: walk down the tree and delete all nodes.
96 free(bt);
97 }
99 ////////////////////////////////////////////////////////////////////////////////
100 struct btree_mem_node *
101 btree_mem_node_new(struct btree_mem * bt)
102 {
103 struct btree_mem_node * node = malloc(sizeof(struct btree_mem_node));
104 if (NULL == node)
105 return NULL;
107 int maxchild = 2*bt->mindeg;
109 node->key = malloc( (maxchild - 1) * sizeof(void *));
110 if (NULL == node->key)
111 {
112 free(node);
113 return NULL;
114 }
116 node->data = malloc( (maxchild - 1) * sizeof(void *));
117 if (NULL == node->data)
118 {
119 free(node->key);
120 free(node);
121 return NULL;
122 }
124 node->child = malloc( maxchild * sizeof(struct btree_mem_node *));
125 if (NULL == node->data)
126 {
127 free(node->data);
128 free(node->key);
129 free(node);
130 return NULL;
131 }
133 for (int i = 0; i < maxchild - 1; i++)
134 {
135 node->key[i] = NULL;
136 node->data[i] = NULL;
137 node->child[i] = NULL;
138 }
139 node->child[maxchild-1] = NULL;
141 node->nkeys = 0;
142 node->leaf = false;
144 return node;
145 }
147 ////////////////////////////////////////////////////////////////////////////////
148 // IMPORTANT NOTE: All keys and data must be removed first or else memory
149 // will leak.
150 void
151 btree_mem_node_delete(struct btree_mem_node * node)
152 {
153 free(node->child);
154 free(node->data);
155 free(node->key);
156 free(node);
157 }
159 ////////////////////////////////////////////////////////////////////////////////
160 void *
161 btree_mem_find(struct btree_mem * bt, void * key)
162 {
163 int i;
164 struct btree_mem_node * node = btree_mem_node_find(bt, bt->root, key, &i);
165 if (NULL == node)
166 return NULL;
167 return node->data[i];
168 }
170 ////////////////////////////////////////////////////////////////////////////////
171 struct btree_mem_node *
172 btree_mem_node_find(struct btree_mem * bt, struct btree_mem_node * node, void * key, int * i)
173 {
174 *i = 0;
175 while (( *i < node->nkeys) && (bt->cmp(key, node->key[*i]) > 0) )
176 (*i)++;
177 if ( (*i < node->nkeys) && (bt->cmp(key, node->key[*i]) == 0) )
178 return node;
179 if (node->leaf)
180 return NULL;
181 else
182 return btree_mem_node_find(bt, node->child[*i], key, i);
183 }
185 ////////////////////////////////////////////////////////////////////////////////
186 void *
187 btree_mem_insert(struct btree_mem * bt, void * key, void * data)
188 {
189 DEBUG_BLOCK( printf("Inserting key %d data >%s<\n", *((int *) key), (char *) data); )
190 struct btree_mem_node * s = bt->root;
192 // If the root node is full, split it here.
193 if ((2 * bt->mindeg - 1) == s->nkeys)
194 {
195 s = btree_mem_node_new(bt);
196 if (NULL == s)
197 return NULL;
198 s->child[0] = bt->root;
199 bt->root = s;
200 btree_mem_split_child(bt, s, 0);
201 }
202 return btree_mem_node_insert_nonfull(bt, s, key, data);
203 }
205 ////////////////////////////////////////////////////////////////////////////////
206 void *
207 btree_mem_node_insert_nonfull(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
208 {
209 int i = node->nkeys - 1;
210 if (node->leaf)
211 {
212 //DEBUG_BLOCK( printf("node->nkeys: %d\n", node->nkeys); )
213 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
214 {
215 node->key[i+1] = node->key[i];
216 node->data[i+1] = node->data[i];
217 i--;
218 }
219 node->key[i+1] = key;
220 node->data[i+1] = data;
221 node->nkeys++;
222 return key;
223 }
224 else
225 {
226 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
227 i--;
228 i++;
230 if (node->child[i]->nkeys == (2 * bt->mindeg - 1))
231 {
232 if (NULL == btree_mem_split_child(bt, node, i))
233 return NULL;
234 if (bt->cmp(key, node->key[i]) > 0)
235 i++;
236 }
237 return btree_mem_node_insert_nonfull(bt, node->child[i], key, data);
238 }
239 }
241 ////////////////////////////////////////////////////////////////////////////////
242 // x - a non-full node
243 // i - The index of a full node y in x's child list.
244 //
245 // Split y in half, adding the newly created node z as a new child of x.
246 // z will be placed after y in the child list.
248 struct btree_mem_node *
249 btree_mem_split_child(struct btree_mem * bt, struct btree_mem_node * x, int i)
250 {
252 struct btree_mem_node * y = x->child[i];
254 // Allocate new node.
255 struct btree_mem_node * z = btree_mem_node_new(bt);
256 if (NULL == z)
257 return NULL;
259 // Load z with the second half of y's data;
260 z->leaf = y->leaf;
261 z->nkeys = bt->mindeg - 1;
262 for (int j = 0; j < bt->mindeg - 1; j++)
263 {
264 z->key[j] = y->key[j + bt->mindeg];
265 z->data[j] = y->data[j + bt->mindeg];
266 }
267 if (! y->leaf)
268 {
269 for (int j = 0; j < bt->mindeg; j++)
270 z->child[j] = y->child[j + bt->mindeg];
271 }
273 // Update y's nkeys.
274 y->nkeys = bt->mindeg - 1;
276 // Make a hole in x's child list for the new node z.
277 for (int j = x->nkeys; j >= i + 1; j--)
278 x->child[j+1] = x->child[j];
279 x->child[i+1] = z;
281 // Make a hole in x's key list for the new node z.
282 for (int j = x->nkeys - 1; j >= i; j--)
283 {
284 x->key[j+1] = x->key[j];
285 x->data[j+1] = x->data[j];
286 }
287 x->key[i] = y->key[bt->mindeg-1];
288 x->data[i] = y->data[bt->mindeg-1];
290 x->nkeys += 1;
292 return z;
293 }
295 ////////////////////////////////////////////////////////////////////////////////
296 void
297 btree_mem_walk_inorder(struct btree_mem * bt, void (*f)(int, void const * const, void *))
298 {
299 btree_mem_node_walk_inorder(bt->root, f, 0);
300 }
302 ////////////////////////////////////////////////////////////////////////////////
303 void
304 btree_mem_node_walk_inorder(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
305 {
307 if (0 == node->nkeys)
308 {
309 if (NULL != node->child[0])
310 btree_mem_node_walk_inorder(node->child[0], f, depth+1);
311 return;
312 }
314 for (int i = 0; i < node->nkeys; i++)
315 {
316 if (! node->leaf)
317 btree_mem_node_walk_inorder(node->child[i], f, depth+1);
318 f(depth, node->key[i], node->data[i]);
319 }
320 if (! node->leaf)
321 btree_mem_node_walk_inorder(node->child[node->nkeys], f, depth+1);
322 }
324 ////////////////////////////////////////////////////////////////////////////////
325 void
326 btree_mem_dump(struct btree_mem * bt, void (*f)(int, void const * const, void *))
327 {
328 btree_mem_node_dump(bt->root, f, 0);
329 }
331 ////////////////////////////////////////////////////////////////////////////////
332 void
333 btree_mem_node_dump(struct btree_mem_node * node, void (*f)(int, void const * const, void *), int depth)
334 {
336 if (0 == node->nkeys)
337 {
338 if (NULL != node->child[0])
339 btree_mem_node_dump(node->child[0], f, depth+1);
340 return;
341 }
343 for (int i = 0; i < node->nkeys; i++)
344 {
345 if (! node->leaf)
346 btree_mem_node_dump(node->child[i], f, depth+1);
347 printf("node: %p nkeys %10d \t", node, node->nkeys);
348 f(depth, node->key[i], node->data[i]);
349 }
350 if (! node->leaf)
351 btree_mem_node_dump(node->child[node->nkeys], f, depth+1);
352 }
354 ////////////////////////////////////////////////////////////////////////////////
355 void *
356 btree_mem_remove(struct btree_mem * bt, void * key, void ** key_value, void ** data_value)
357 {
358 if ( (NULL == bt) || (NULL == key) || (NULL == key_value) || (NULL == data_value) )
359 return NULL;
361 DEBUG_BLOCK( printf("Removing %d\n", *((int *) key)); )
362 void * data = btree_mem_node_remove(bt, bt->root, key, key_value, data_value);
363 if (NULL == data)
364 return NULL;
366 if (0 == bt->root->nkeys)
367 {
368 DEBUG_BLOCK( printf("Freeing the root node\n"); )
369 struct btree_mem_node * n = bt->root;
370 bt->root = bt->root->child[0];
371 free(n);
372 }
373 return data;
374 }
376 ////////////////////////////////////////////////////////////////////////////////
377 // Assumption: node has bt->mindeg keys or is the root.
379 void *
380 btree_mem_node_remove(struct btree_mem * bt, struct btree_mem_node * node, void * key, void ** key_value, void ** data_value)
381 {
383 DEBUG_BLOCK( btree_mem_node_print_keys(node); )
385 int i = node->nkeys - 1;
386 while ( (i >= 0) && (bt->cmp(key, node->key[i]) < 0) )
387 i--;
389 if ( (i >= 0) && (bt->cmp(key, node->key[i]) == 0) )
390 {
391 // key is in node at key[i]
392 *key_value = node->key[i];
393 *data_value = node->data[i];
394 // printf("Will return key_value %p (%d) data_value %p (%s)\n", *key_value, *((int*)(*key_value)), (char*)(*data_value));
395 if (node->leaf)
396 {
397 // case 1
398 DEBUG_BLOCK( printf("remove: case 1\n"); )
399 btree_mem_node_remove_key(node, i);
400 return *data_value;
401 }
402 else
403 {
404 struct btree_mem_node * prev = node->child[i];
405 struct btree_mem_node * next = node->child[i+1];
407 // if (child prev that precedes key in node has at least bt->mindeg keys) ||
408 // (child next that follows key in node has at least bt->mindeg keys)
409 if ( (prev->nkeys >= bt->mindeg) || (next->nkeys >= bt->mindeg) )
410 {
411 void * kp;
412 if (prev->nkeys >= bt->mindeg)
413 {
414 // case 2a
415 DEBUG_BLOCK( printf("remove: case 2a\n"); )
416 kp = btree_mem_node_last_key(prev);
417 // printf("remove: kp %d\n", *((int*)kp));
418 node->data[i] = btree_mem_node_remove(bt, prev, kp, key_value, data_value);
419 }
420 else
421 {
422 // case 2b
423 DEBUG_BLOCK( printf("remove: case 2b\n"); )
424 kp = btree_mem_node_first_key(next);
425 // printf("remove: kp %d\n", *((int*)kp));
426 node->data[i] = btree_mem_node_remove(bt, next, kp, key_value, data_value);
427 }
428 // printf("i %d\n", i);
429 // printf("remove: kp %d\n", *((int*)kp));
430 node->key[i] = kp;
431 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
432 // printf("returning %p\n", *data_value);
433 return *data_value;
434 }
435 else
436 {
437 // case 2c
438 DEBUG_BLOCK( printf("remove: case 2c\n"); )
439 btree_mem_node_add_key(bt, prev, node->key[i], node->data[i]);
440 btree_mem_node_remove_key(node, i);
441 btree_mem_node_merge(prev, next);
442 return *data_value;
443 }
444 }
445 }
446 else
447 {
448 if (node->leaf)
449 {
450 DEBUG_BLOCK( printf("remove: key not found\n"); )
451 return NULL;
452 }
454 // key is in child[i+1], if anywhere
455 struct btree_mem_node * c = node->child[i+1];
457 DEBUG_BLOCK( printf("remove: key should be in node %p\n", c); )
458 if (c->nkeys == bt->mindeg - 1)
459 {
460 struct btree_mem_node * prev = NULL;
461 if (i >= 0)
462 prev = node->child[i];
464 struct btree_mem_node * next = NULL;
465 if (i+2 <= node->nkeys)
466 next = node->child[i+2];
468 DEBUG_BLOCK( printf("remove: prev %p next %p\n", prev, next); )
470 if ( (prev) && (prev->nkeys >= bt->mindeg) )
471 {
472 // case 3a (with prev sibling)
473 // Move node->key[i] into c->key[0] shifting over existing keys
474 // Move prev->key[prev->nkeys-1] into node->key[i]
475 // if (prev->leaf)
476 // move prev->child[prev->nkeys] into c->child[0] shifting over existing children
477 // prev->nkeys--;
479 DEBUG_BLOCK( printf("remove: case 3a.1\n"); )
481 btree_mem_node_add_key(bt, c, node->key[i-1], node->data[i-1]);
483 node->key[i-1] = prev->key[prev->nkeys-1];
484 node->data[i-1] = prev->data[prev->nkeys-1];
486 if (prev->leaf)
487 {
488 for (int j = c->nkeys; j >= 0; j--)
489 c->child[j+1] = c->child[j];
490 c->child[0] = prev->child[prev->nkeys];
491 }
493 prev->nkeys--;
494 }
495 else if ( (next) && (next->nkeys >= bt->mindeg) )
496 {
497 // case 3a (with next sibling)
498 // Symmetric with the above
500 DEBUG_BLOCK( printf("remove: case 3a.2\n"); )
501 btree_mem_node_add_key(bt, c, node->key[i+1], node->data[i+1]);
503 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
505 node->key[i+1] = next->key[0];
506 node->data[i+1] = next->data[0];
507 for (int j = 0; j < next->nkeys - 1; j++)
508 {
509 next->key[j] = next->key[j+1];
510 next->data[j] = next->data[j+1];
511 }
513 if (next->leaf)
514 {
515 c->child[c->nkeys] = next->child[0];
516 for (int j = 0; j < next->nkeys; j++)
517 next->child[j] = next->child[j+1];
518 }
520 next->nkeys--;
522 }
523 else
524 {
525 // case 3b
526 DEBUG_BLOCK( printf("remove: case 3b\n"); )
527 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
528 // printf("node %p i %d\n", node, i); fflush(stdout);
529 // printf("adding %d to %p\n", *((int*)node->key[i+1]), (next ? next : prev)); fflush(stdout);
530 btree_mem_node_add_key(bt, (next ? next : prev), node->key[i+1], node->data[i+1]);
531 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
533 if (next)
534 {
535 // printf("here\n");
536 btree_mem_node_merge(c, next);
537 }
538 else
539 {
540 // printf("there\n");
541 btree_mem_node_merge(prev, c);
542 }
544 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
546 btree_mem_node_remove_key(node, i+1);
548 if (next)
549 {
550 for (int j = i+2; j <= node->nkeys+1; j++)
551 node->child[j] = node->child[j+1];
552 }
553 else
554 {
555 for (int j = i; j <= node->nkeys+1; j++)
556 node->child[j] = node->child[j+1];
557 }
559 // printf("=====\n"); btree_mem_node_dump(node, printit, 0); printf("=====\n");
560 }
561 }
562 return btree_mem_node_remove(bt, c, key, key_value, data_value);
563 }
564 }
566 ////////////////////////////////////////////////////////////////////////////////
567 // Assumption: node is not full
568 // Assumption: key is not already in node
569 void
570 btree_mem_node_add_key(struct btree_mem * bt, struct btree_mem_node * node, void * key, void * data)
571 {
572 int i = node->nkeys;
573 while ( (i > 0) && (bt->cmp(key, node->key[i-1]) < 0) )
574 i--;
576 for (int j = node->nkeys - 1; j >= i; j--)
577 {
578 node->key[j+1] = node->key[j];
579 node->data[j+1] = node->data[j];
580 }
582 node->key[i] = key;
583 node->data[i] = data;
584 node->nkeys++;
585 }
587 ////////////////////////////////////////////////////////////////////////////////
588 // Assumption: node has at least mindeg keys.
589 // Does not remove any children.
590 void *
591 btree_mem_node_remove_key(struct btree_mem_node * node, int i)
592 {
593 void * data = node->key[i];
594 for (int j = i; j < node->nkeys; j++)
595 {
596 node->key[j] = node->key[j+1];
597 node->data[j] = node->data[j+1];
598 }
599 node->nkeys--;
600 return data;
601 }
603 ////////////////////////////////////////////////////////////////////////////////
604 // Return the lowest valued key of the subtree rooted at node
605 void *
606 btree_mem_node_first_key(struct btree_mem_node * node)
607 {
608 if (0 == node->nkeys)
609 return NULL;
611 if (node->leaf)
612 return node->key[0];
614 return btree_mem_node_last_key(node->child[0]);
615 }
617 ////////////////////////////////////////////////////////////////////////////////
618 // Return the highest valued key of the subtree rooted at node
619 void *
620 btree_mem_node_last_key(struct btree_mem_node * node)
621 {
622 if (0 == node->nkeys)
623 return NULL;
625 if (node->leaf)
626 return node->key[node->nkeys-1];
628 return btree_mem_node_last_key(node->child[node->nkeys]);
629 }
631 /////////////////////////////////////////////////////////////////////////////////
632 // move all keys/data from node b into node a
633 // Assumption: all keys in b come after the keys in a
634 void
635 btree_mem_node_merge(struct btree_mem_node * a, struct btree_mem_node * b)
636 {
638 DEBUG_BLOCK( printf("merging %p with %p\n", a, b); )
639 int i = a->nkeys;
640 int j = 0;
641 for (; j < b->nkeys; i++, j++)
642 {
643 a->key[i] = b->key[j];
644 a->data[i] = b->data[j];
645 }
646 a->nkeys += b->nkeys;
647 b->nkeys = 0;
648 }
650 /////////////////////////////////////////////////////////////////////////////////
651 #ifdef DEBUG
652 void
653 btree_mem_node_print_keys(struct btree_mem_node * node)
654 {
655 printf("node: %p nkeys %10d keys: ", node, node->nkeys);
656 for (int i = 0; i < node->nkeys; i++)
657 printf("%d ", *((int *) node->key[i]));
658 printf("\n");
659 }
660 #endif