tinySQL  0.1
A self-contained database management system
bpTree.h
1 // /**
2 // * @file bpTree.h
3 // * @author CHEN Guofei (simonsatzju@gmail.com)
4 // * @brief A Bplus tree designed for 2022 MiniSQL project
5 // * @version 0.1
6 // * @date 2022-05-21
7 // *
8 // * @copyright Copyright (c) 2022
9 // *
10 // * Type declarations:
11 // * There 3 types in a Bp_node: key, value and ptr
12 // * <Key> is the corresponding attribute value to build index on.
13 // * <Value> only occurs in leaf nodes. They are aligned with key,
14 // * and points to the record in a table.
15 // * <ptr> Is the pointer to Bp_nodes. Users should never change them.
16 // *
17 // * For leaf nodes, there is only one element in ptr_field, which points
18 // * to next leaf node. It's designed for range search.
19 // *
20 // */
21 // #ifndef BPTREE_H
22 // #define BPTREE_H
23 
24 // #include <vector>
25 // #include <iostream>
26 // #include <cassert>
27 
28 // #define KEY_VALUE_T_DECLARE template<typename key_t, typename value_t>
29 // #define BP_NODE_T Bp_node<key_t, value_t>
30 // #define BP_TREE_T Bp_tree<key_t, value_t>
31 
32 // /**
33 // * @brief In memory b-plus tree node.
34 // *
35 // */
36 // KEY_VALUE_T_DECLARE
37 // struct Bp_node{
38 // bool isLeaf = false;
39 
40 // std::vector<key_t> key_field; ///< the field for keys
41 // std::vector<value_t> value_field; ///< the field for values
42 // std::vector<Bp_node*> ptr_field; ///< the field for pointers
43 
44 // BP_NODE_T* parent = nullptr;
45 
46 // /**
47 // * Find the position of a key in the node.
48 // * @param key
49 // * @return The index of key in key_field; -1 if failed
50 // */
51 // size_t FindPos_at_Node (key_t key) const{
52 // for (size_t i = 0; i < key_field.size(); ++i){
53 // if (key_field.at(i) == key){
54 // return i;
55 // }
56 // }
57 // return -1; // Convert to unsigned int max.
58 // }
59 // };
60 
61 // KEY_VALUE_T_DECLARE
62 // class Bp_tree{
63 // public:
64 // Bp_tree(size_t _degree = 4):degree(_degree){};
65 
66 // ~Bp_tree(){
67 // RecurseDelete(root);
68 // }
69 
70 // /**
71 // * @brief Basic insertion to the tree.
72 // *
73 // * @param key The key for the val.
74 // * @param val The val (or normally so-called "pointer") to the record.
75 // * @return true Insertion success
76 // * @return false Insertion fail
77 // */
78 // bool Insert(key_t key, value_t val);
79 
80 // /**
81 // * @brief Basic deletion from the tree.
82 // *
83 // * Note that normally it is not used by users.
84 // * Users should call more general functions, like update val, etc.
85 // *
86 // * @param key The key for a certain record.
87 // * @return true Deletion success.
88 // * @return false Insertion success.
89 // */
90 // bool Delete(key_t key);
91 
92 // /**
93 // * @brief Find the values by key (normally, we may say "pointers" for "values")
94 // *
95 // * @param key The search key.
96 // * @param result [out] The search result. Push them back in the result.
97 // * @return true Search success
98 // * @return false
99 // */
100 // bool FindValue(key_t key, value_t& result);
101 
102 // /**
103 // * @brief Find the value(pointer) based on the given lower and upper key
104 // *
105 // * @param lower_key
106 // * @param upper_key
107 // * @param result [out]
108 // * @return true Search success
109 // * @return false Search failure
110 // */
111 // bool FindRange(key_t lower_key, key_t upper_key, std::vector<value_t>& result);
112 
113 // /**
114 // *
115 // * @param former_key
116 // * @param new_key
117 // * @return
118 // */
119 // bool UpdateIndex(key_t former_key, key_t new_key);
120 
121 // /**
122 // * @brief Print the tree. For debug purpose
123 // *
124 // */
125 // void PrintTree(){
126 // PrintTreeRecurse(root, 0);
127 // }
128 
129 // private:
130 // size_t degree;
131 // BP_NODE_T* root = nullptr;
132 
133 
134 // /**
135 // * @brief Find the leaf node where the key-value stays
136 // *
137 // * @param key Search key
138 // * @return BP_NODE_T* Result
139 // */
140 // BP_NODE_T* FindNode(key_t key);
141 
142 // /**
143 // * @brief Simply insert in a leaf node.
144 // *
145 // * @param key
146 // * @param val
147 // * @param leaf [in] The leaf node to insert the value
148 // * @return true insertion success
149 // * @return false insertion failure
150 // */
151 // bool Insert_in_Leaf(key_t key, value_t val, BP_NODE_T* leaf);
152 
153 // /**
154 // * @brief Insert in a parent. May split. Recursive call.
155 // *
156 // * @param key the smallest search key in split (the right one when splitting)
157 // * @param prev_leaf left split node, whose pointer is in its parent->ptr_field
158 // * @param split right split node. The new node.
159 // * @return true Insert success
160 // * @return false Insert failure
161 // */
162 // bool Insert_in_Parent(key_t key, BP_NODE_T* prev_leaf, BP_NODE_T* split);
163 
164 // /**
165 // * @brief Delete in the parent based on child pointer.
166 // *
167 // * @param to_delete_child The child to be removed
168 // * @return true Delete success
169 // */
170 // bool Delete_in_Parent(BP_NODE_T* to_delete_child);
171 
172 // void RecurseDelete(BP_NODE_T* root){
173 // if (!root->isLeaf){
174 // for (BP_NODE_T* child : root->ptr_field)
175 // RecurseDelete(child);
176 // }
177 // delete root;
178 // }
179 
180 // void PrintTreeRecurse(BP_NODE_T* root, int depth){ //Recursive print (debug)
181 // assert(root != nullptr);
182 
183 // std::cout << "Depth:" << depth <<" Keys: \n";
184 // for (auto item : root->key_field){
185 // std::cout << item << ',' ;
186 // }
187 // std::cout << std::endl;
188 
189 // if(!root->isLeaf){
190 // for (BP_NODE_T* bp_n_ptr : root->ptr_field){
191 // PrintTreeRecurse(bp_n_ptr, depth + 1);
192 // }
193 // }
194 // }
195 // };
196 
197 // KEY_VALUE_T_DECLARE
198 // BP_NODE_T* BP_TREE_T::FindNode(key_t key){
199 // BP_NODE_T* C = root;
200 // assert(C->key_field.size() > 0);
201 
202 // while(!C->isLeaf){
203 // size_t i = 0;
204 // while(i < C->key_field.size() && C->key_field.at(i) < key){
205 // i += 1;
206 // }
207 
208 // // 2 cases : either way, i corresponds to the index of C->ptr_field
209 // if (i == C->key_field.size()){
210 // C = C->ptr_field.at(i);
211 // }
212 // else if (key == C->key_field.at(i)){
213 // C = C->ptr_field.at(i+1);
214 // }
215 // else{
216 // C = C->ptr_field.at(i);
217 // }
218 // }
219 
220 // return C;
221 // }
222 
223 // KEY_VALUE_T_DECLARE
224 // bool BP_TREE_T::Insert_in_Parent(key_t key, BP_NODE_T* prev_leaf, BP_NODE_T* split){
225 // if (prev_leaf == root){
226 // root = new BP_NODE_T;
227 
228 // root->key_field.push_back(key);
229 // root->ptr_field.push_back(prev_leaf);
230 // root->ptr_field.push_back(split);
231 
232 // prev_leaf->parent = root;
233 // split->parent = root;
234 // return true;
235 // }
236 // else{
237 // BP_NODE_T* parent = prev_leaf->parent;
238 // if (parent->ptr_field.size() < degree){ //Enough space for the split node
239 // size_t i;
240 // for (i = 0; i < parent->ptr_field.size(); ++i){ //TODO: May change it to bisearch
241 // if(parent->ptr_field.at(i) == prev_leaf){
242 // // Insertion happens "before" the given iterator
243 // parent->ptr_field.insert(parent->ptr_field.begin() + i + 1, split);
244 // parent->key_field.insert(parent->key_field.begin() + i, key);
245 // split->parent = parent;
246 // break;
247 // }
248 // }
249 // assert(i < parent->ptr_field.size());
250 // return true;
251 // }
252 // else{ //Not enough space. Split parent.
253 // BP_NODE_T* split_parent = new BP_NODE_T;
254 // /// Set type.
255 // split_parent->isLeaf = false;
256 
257 // /// Insert the current key and pointer temporarily
258 // size_t i;
259 // for (i = 0; i < parent->ptr_field.size(); ++i){ //TODO: May change it to bisearch
260 // if(parent->ptr_field.at(i) == prev_leaf){
261 // parent->ptr_field.insert(parent->ptr_field.begin() + i + 1, split);
262 // split->parent = parent;
263 // parent->key_field.insert(parent->key_field.begin() + i, key);
264 // break;
265 // }
266 // }
267 
268 // // Move keys and ptrs.
269 // i = (degree + 1) / 2;
270 // key_t right_smallest_key = parent->key_field.at(i - 1);
271 
272 // // Abandon a key here (which is the key for their parent),
273 // // so specially run a loop here.
274 // split_parent->ptr_field.push_back(parent->ptr_field.at(i));
275 // split_parent->ptr_field.back()->parent = split_parent;
276 // parent->ptr_field.erase(parent->ptr_field.begin() + i);
277 // parent->key_field.erase(parent->key_field.begin() + i - 1);
278 
279 // while(i < parent->ptr_field.size()){
280 // split_parent->ptr_field.push_back(parent->ptr_field.at(i));
281 // split_parent->ptr_field.back()->parent = split_parent;
282 // split_parent->key_field.push_back(parent->key_field.at(i - 1));
283 
284 // parent->ptr_field.erase(parent->ptr_field.begin() + i);
285 // parent->key_field.erase(parent->key_field.begin() + i - 1);
286 // }
287 
288 // return Insert_in_Parent(right_smallest_key, parent, split_parent);
289 // }
290 // }
291 // }
292 
293 // KEY_VALUE_T_DECLARE
294 // bool BP_TREE_T::Insert_in_Leaf(key_t key, value_t val, BP_NODE_T* leaf){
295 // assert(leaf->isLeaf);
296 
297 // size_t i = 0;
298 // while(i < leaf->key_field.size() && leaf->key_field.at(i) < key){
299 // ++i;
300 // }
301 
302 // leaf->key_field.insert(leaf->key_field.begin() + i, key);
303 // leaf->value_field.insert(leaf->value_field.begin() + i, val);
304 
305 // return true;
306 // }
307 
308 // KEY_VALUE_T_DECLARE
309 // bool BP_TREE_T::Insert(key_t key, value_t val){
310 // if (root == nullptr){
311 // root = new BP_NODE_T;
312 // root->isLeaf = true;
313 // root->key_field.push_back(key);
314 // root->value_field.push_back(val);
315 // return true;
316 // }
317 // else{
318 // BP_NODE_T* leaf = FindNode(key);
319 // if (leaf == nullptr){
320 // return false;
321 // }
322 
323 // if (leaf->key_field.size() < degree - 1){ // Still have space
324 // Insert_in_Leaf(key, val, leaf);
325 // }
326 // else{
327 // assert(leaf->key_field.size() == degree - 1);
328 
329 // BP_NODE_T* split = new BP_NODE_T;
330 
331 // /// Set type
332 // split->isLeaf = true;
333 
334 // /// Insert in leaf first
335 // Insert_in_Leaf(key, val, leaf);
336 
337 // /// Copy values and keys
338 // size_t i = degree / 2; // The position right after the (degree/2)th element
339 
340 // while(i < leaf->key_field.size()){
341 // split->key_field.push_back(leaf->key_field.at(i));
342 // split->value_field.push_back(leaf->value_field.at(i));
343 
344 // leaf->key_field.erase(leaf->key_field.begin() + i);
345 // leaf->value_field.erase(leaf->value_field.begin() + i);
346 // }
347 
348 // /// Set pointer for next leaf node (For range search)
349 // if (leaf->ptr_field.empty()){
350 // leaf->ptr_field.push_back(split);
351 // }
352 // else{
353 // split->ptr_field.push_back(leaf->ptr_field.front());
354 // leaf->ptr_field.front() = split;
355 // }
356 
357 // /// Set parent
358 // split->parent = leaf->parent;
359 
360 // Insert_in_Parent(split->key_field.front(), leaf, split);
361 // }
362 // return true;
363 // }
364 // }
365 
366 // KEY_VALUE_T_DECLARE
367 // bool BP_TREE_T::Delete_in_Parent(BP_NODE_T* to_delete_child){
368 // BP_NODE_T* parent = to_delete_child->parent;
369 
370 // // Delete the pointer to the child from its parent, and its "separation key"
371 // // Caller need to make sure that the abandoned child is the latter one in merging
372 // for (size_t i = 0; i < parent->ptr_field.size(); ++i){
373 // if (parent->ptr_field.at(i) == to_delete_child){
374 // parent->ptr_field.erase(parent->ptr_field.begin() + i);
375 // parent->key_field.erase(parent->key_field.begin() + i - 1);
376 // break;
377 // }
378 // }
379 
380 // if (parent == root){
381 // if (parent->ptr_field.size() == 1){
382 // root = parent->ptr_field.front();
383 // delete parent;
384 // return true;
385 // }
386 // }
387 // else if (parent->key_field.size() < (degree + 1) / 2){ // (int)(degree + 1)/2 is equivalent to degree/2 's upper integer
388 // BP_NODE_T* sibling;
389 // BP_NODE_T* grand_parent = parent->parent;
390 // key_t sep_key_in_grandparent;
391 // size_t i;
392 // bool is_predecessor;
393 // for (i = 0; i < grand_parent->ptr_field.size(); ++i){
394 // if (grand_parent->ptr_field.at(i) == parent) break;
395 // }
396 
397 // assert(i < grand_parent->ptr_field.size());
398 
399 // if (i < grand_parent->ptr_field.size() - 1){ // Not the last child
400 // sibling = grand_parent->ptr_field.at(i + 1);
401 // sep_key_in_grandparent = grand_parent->key_field.at(i);
402 // is_predecessor = true;
403 // }
404 // else{
405 // sibling = grand_parent->ptr_field.at(i - 1);
406 // sep_key_in_grandparent = grand_parent->key_field.at(i - 1);
407 // is_predecessor = false;
408 // }
409 
410 // // At most n - 1 keys in non-leaves
411 // if (parent->key_field.size() + sibling->key_field.size() < degree){ // Merge leaf and sibling
412 // if (is_predecessor) { // Parent is the predecessor
413 // auto temp = parent;
414 // parent = sibling;
415 // sibling = temp;
416 // }
417 // // Move the content in leaf to sibling, and set the child->parent pointer
418 // sibling->key_field.push_back(sep_key_in_grandparent);
419 // sibling->ptr_field.push_back(parent->ptr_field.front());
420 // sibling->ptr_field.back()->parent = sibling;
421 // for (size_t j = 0; j < parent->key_field.size(); ++j){
422 // sibling->key_field.push_back(parent->key_field.at(j));
423 // sibling->ptr_field.push_back(parent->ptr_field.at(j + 1));
424 // sibling->ptr_field.back()->parent = sibling;
425 // }
426 
427 // Delete_in_Parent(parent);
428 
429 // delete parent;
430 
431 // return true;
432 // }
433 // else{ // Redistribute the keys and pointers
434 // if (is_predecessor){
435 // parent->key_field.push_back(sep_key_in_grandparent);
436 // parent->ptr_field.push_back(sibling->ptr_field.front());
437 // parent->ptr_field.back()->parent = parent;
438 
439 // grand_parent->key_field.at(i) = sibling->key_field.front();
440 
441 // sibling->key_field.erase(sibling->key_field.begin());
442 // sibling->ptr_field.erase(sibling->ptr_field.begin());
443 // }
444 // else{
445 // parent->key_field.insert(parent->key_field.begin(), sep_key_in_grandparent);
446 // parent->ptr_field.insert(parent->ptr_field.begin(), sibling->ptr_field.back());
447 // parent->ptr_field.front()->parent = parent;
448 
449 // grand_parent->key_field.at(i - 1) = sibling->key_field.back();
450 
451 // sibling->key_field.pop_back();
452 // sibling->ptr_field.pop_back();
453 // }
454 
455 // return true;
456 // }
457 // }
458 // }
459 
460 // KEY_VALUE_T_DECLARE
461 // bool BP_TREE_T::Delete(key_t key) {
462 // BP_NODE_T* leaf = FindNode(key);
463 
464 // // Delete the key and value from leaf
465 // for (size_t i = 0; i < leaf->key_field.size(); ++i){
466 // if (leaf->key_field.at(i) == key){
467 // leaf->key_field.erase(leaf->key_field.begin() + i);
468 // leaf->value_field.erase(leaf->value_field.begin() + i);
469 // break;
470 // }
471 // }
472 
473 // if (leaf == root) return true;
474 // // Violate capacity constraint for leaf and non-root node
475 // else if (leaf->key_field.size() < degree / 2){ // (int)degree/2 is equivalent to (degree - 1)/2's upper integer
476 // BP_NODE_T* sibling;
477 // BP_NODE_T* parent = leaf->parent;
478 // key_t sep_key_in_parent;
479 // size_t i;
480 // bool is_predecessor;
481 // for (i = 0; i < parent->ptr_field.size(); ++i){
482 // if (parent->ptr_field.at(i) == leaf) break;
483 // }
484 // assert(i < parent->ptr_field.size());
485 // if (i < parent->ptr_field.size() - 1){ // Not the last element
486 // sibling = parent->ptr_field.at(i + 1);
487 // is_predecessor = true;
488 // sep_key_in_parent = parent->key_field.at(i);
489 // }
490 // else {
491 // sibling = parent->ptr_field.at(i - 1);
492 // is_predecessor = false;
493 // sep_key_in_parent = parent->key_field.at(i - 1);
494 // }
495 
496 // // At most n - 1 keys in leaves.
497 // if (sibling->key_field.size() + leaf->key_field.size() < degree){ // Merge leaf and sibling
498 // if (is_predecessor){ // leaf is the predecessor of sibling
499 // // Exchange the variables for convenience here
500 // auto temp = leaf;
501 // leaf = sibling;
502 // sibling = temp;
503 // }
504 
505 // // Move the content in leaf to sibling
506 // if (!leaf->ptr_field.empty()){
507 // sibling->ptr_field.front() = leaf->ptr_field.front();
508 // }
509 // for (size_t j = 0; j < leaf->key_field.size(); ++j){
510 // sibling->key_field.push_back(leaf->key_field.at(j));
511 // sibling->value_field.push_back(leaf->value_field.at(j));
512 // }
513 
514 // Delete_in_Parent(leaf);
515 
516 // delete leaf;
517 // }
518 // else{ // Redistribution between two leaves
519 // if (is_predecessor){ // leaf is the predecessor of sibling
520 // leaf->key_field.push_back(sibling->key_field.front());
521 // leaf->value_field.push_back(sibling->value_field.front());
522 
523 // sibling->key_field.erase(sibling->key_field.begin());
524 // sibling->value_field.erase(sibling->value_field.begin());
525 
526 // parent->key_field.at(i) = sibling->key_field.front(); // The separation key
527 // }
528 // else{ // leaf is the successor of sibling
529 // leaf->key_field.insert(leaf->key_field.begin(), sibling->key_field.back());
530 // leaf->value_field.insert(leaf->value_field.begin(), sibling->value_field.back());
531 
532 // sibling->key_field.pop_back();
533 // sibling->value_field.pop_back();
534 
535 // parent->key_field.at(i - 1) = leaf->key_field.front(); // The separation key
536 // }
537 // }
538 // return true;
539 // }
540 // }
541 
542 // KEY_VALUE_T_DECLARE
543 // bool BP_TREE_T::FindValue(key_t key, value_t& result) {
544 // BP_NODE_T& node = *(FindNode(key));
545 // size_t id = node.FindPos_at_Node(key);
546 // if (id != -1){
547 // result = node.value_field.at(id);
548 // return true;
549 // }
550 // else return false;
551 // }
552 
553 // KEY_VALUE_T_DECLARE
554 // bool BP_TREE_T::FindRange(key_t lower_key, key_t upper_key, std::vector<value_t>& result) {
555 // const BP_NODE_T* lower_node = FindNode(lower_key);
556 // const BP_NODE_T* upper_node = FindNode(upper_key);
557 
558 // size_t start_id = lower_node->FindPos_at_Node(lower_key);
559 // size_t end_id = upper_node->FindPos_at_Node(upper_key);
560 
561 // if (start_id == -1 || end_id == -1){
562 // return false;
563 // }
564 // else{
565 // if (lower_node == upper_node){
566 // for (size_t i = start_id; i <= end_id; ++i){
567 // result.push_back(lower_node->value_field.at(i));
568 // }
569 // }
570 // else{
571 // for (size_t i = start_id; i < lower_node->value_field.size(); ++i){
572 // result.push_back(lower_node->value_field.at(i));
573 // }
574 // const BP_NODE_T* next = lower_node->ptr_field.front();
575 // while(next != upper_node){
576 // for (auto value : next->value_field){
577 // result.push_back(value);
578 // }
579 // next = next->ptr_field.front();
580 // }
581 // for (size_t i = 0; i <= end_id; ++i) {
582 // result.push_back(upper_node->value_field.at(i));
583 // }
584 // }
585 // return true;
586 // }
587 // }
588 
589 // #endif