tinySQL
0.1
A self-contained database management system
src
include
index
bpTree.h
1
// /**
2
// * @file bpTree.h
3
// * @author CHEN Guofei (simonsatzju@gmail.com)
4
// * @brief A Bplus tree designed for 2022 MiniSQL project
5
// * @version 0.1
6
// * @date 2022-05-21
7
// *
8
// * @copyright Copyright (c) 2022
9
// *
10
// * Type declarations:
11
// * There 3 types in a Bp_node: key, value and ptr
12
// * <Key> is the corresponding attribute value to build index on.
13
// * <Value> only occurs in leaf nodes. They are aligned with key,
14
// * and points to the record in a table.
15
// * <ptr> Is the pointer to Bp_nodes. Users should never change them.
16
// *
17
// * For leaf nodes, there is only one element in ptr_field, which points
18
// * to next leaf node. It's designed for range search.
19
// *
20
// */
21
// #ifndef BPTREE_H
22
// #define BPTREE_H
23
24
// #include <vector>
25
// #include <iostream>
26
// #include <cassert>
27
28
// #define KEY_VALUE_T_DECLARE template<typename key_t, typename value_t>
29
// #define BP_NODE_T Bp_node<key_t, value_t>
30
// #define BP_TREE_T Bp_tree<key_t, value_t>
31
32
// /**
33
// * @brief In memory b-plus tree node.
34
// *
35
// */
36
// KEY_VALUE_T_DECLARE
37
// struct Bp_node{
38
// bool isLeaf = false;
39
40
// std::vector<key_t> key_field; ///< the field for keys
41
// std::vector<value_t> value_field; ///< the field for values
42
// std::vector<Bp_node*> ptr_field; ///< the field for pointers
43
44
// BP_NODE_T* parent = nullptr;
45
46
// /**
47
// * Find the position of a key in the node.
48
// * @param key
49
// * @return The index of key in key_field; -1 if failed
50
// */
51
// size_t FindPos_at_Node (key_t key) const{
52
// for (size_t i = 0; i < key_field.size(); ++i){
53
// if (key_field.at(i) == key){
54
// return i;
55
// }
56
// }
57
// return -1; // Convert to unsigned int max.
58
// }
59
// };
60
61
// KEY_VALUE_T_DECLARE
62
// class Bp_tree{
63
// public:
64
// Bp_tree(size_t _degree = 4):degree(_degree){};
65
66
// ~Bp_tree(){
67
// RecurseDelete(root);
68
// }
69
70
// /**
71
// * @brief Basic insertion to the tree.
72
// *
73
// * @param key The key for the val.
74
// * @param val The val (or normally so-called "pointer") to the record.
75
// * @return true Insertion success
76
// * @return false Insertion fail
77
// */
78
// bool Insert(key_t key, value_t val);
79
80
// /**
81
// * @brief Basic deletion from the tree.
82
// *
83
// * Note that normally it is not used by users.
84
// * Users should call more general functions, like update val, etc.
85
// *
86
// * @param key The key for a certain record.
87
// * @return true Deletion success.
88
// * @return false Insertion success.
89
// */
90
// bool Delete(key_t key);
91
92
// /**
93
// * @brief Find the values by key (normally, we may say "pointers" for "values")
94
// *
95
// * @param key The search key.
96
// * @param result [out] The search result. Push them back in the result.
97
// * @return true Search success
98
// * @return false
99
// */
100
// bool FindValue(key_t key, value_t& result);
101
102
// /**
103
// * @brief Find the value(pointer) based on the given lower and upper key
104
// *
105
// * @param lower_key
106
// * @param upper_key
107
// * @param result [out]
108
// * @return true Search success
109
// * @return false Search failure
110
// */
111
// bool FindRange(key_t lower_key, key_t upper_key, std::vector<value_t>& result);
112
113
// /**
114
// *
115
// * @param former_key
116
// * @param new_key
117
// * @return
118
// */
119
// bool UpdateIndex(key_t former_key, key_t new_key);
120
121
// /**
122
// * @brief Print the tree. For debug purpose
123
// *
124
// */
125
// void PrintTree(){
126
// PrintTreeRecurse(root, 0);
127
// }
128
129
// private:
130
// size_t degree;
131
// BP_NODE_T* root = nullptr;
132
133
134
// /**
135
// * @brief Find the leaf node where the key-value stays
136
// *
137
// * @param key Search key
138
// * @return BP_NODE_T* Result
139
// */
140
// BP_NODE_T* FindNode(key_t key);
141
142
// /**
143
// * @brief Simply insert in a leaf node.
144
// *
145
// * @param key
146
// * @param val
147
// * @param leaf [in] The leaf node to insert the value
148
// * @return true insertion success
149
// * @return false insertion failure
150
// */
151
// bool Insert_in_Leaf(key_t key, value_t val, BP_NODE_T* leaf);
152
153
// /**
154
// * @brief Insert in a parent. May split. Recursive call.
155
// *
156
// * @param key the smallest search key in split (the right one when splitting)
157
// * @param prev_leaf left split node, whose pointer is in its parent->ptr_field
158
// * @param split right split node. The new node.
159
// * @return true Insert success
160
// * @return false Insert failure
161
// */
162
// bool Insert_in_Parent(key_t key, BP_NODE_T* prev_leaf, BP_NODE_T* split);
163
164
// /**
165
// * @brief Delete in the parent based on child pointer.
166
// *
167
// * @param to_delete_child The child to be removed
168
// * @return true Delete success
169
// */
170
// bool Delete_in_Parent(BP_NODE_T* to_delete_child);
171
172
// void RecurseDelete(BP_NODE_T* root){
173
// if (!root->isLeaf){
174
// for (BP_NODE_T* child : root->ptr_field)
175
// RecurseDelete(child);
176
// }
177
// delete root;
178
// }
179
180
// void PrintTreeRecurse(BP_NODE_T* root, int depth){ //Recursive print (debug)
181
// assert(root != nullptr);
182
183
// std::cout << "Depth:" << depth <<" Keys: \n";
184
// for (auto item : root->key_field){
185
// std::cout << item << ',' ;
186
// }
187
// std::cout << std::endl;
188
189
// if(!root->isLeaf){
190
// for (BP_NODE_T* bp_n_ptr : root->ptr_field){
191
// PrintTreeRecurse(bp_n_ptr, depth + 1);
192
// }
193
// }
194
// }
195
// };
196
197
// KEY_VALUE_T_DECLARE
198
// BP_NODE_T* BP_TREE_T::FindNode(key_t key){
199
// BP_NODE_T* C = root;
200
// assert(C->key_field.size() > 0);
201
202
// while(!C->isLeaf){
203
// size_t i = 0;
204
// while(i < C->key_field.size() && C->key_field.at(i) < key){
205
// i += 1;
206
// }
207
208
// // 2 cases : either way, i corresponds to the index of C->ptr_field
209
// if (i == C->key_field.size()){
210
// C = C->ptr_field.at(i);
211
// }
212
// else if (key == C->key_field.at(i)){
213
// C = C->ptr_field.at(i+1);
214
// }
215
// else{
216
// C = C->ptr_field.at(i);
217
// }
218
// }
219
220
// return C;
221
// }
222
223
// KEY_VALUE_T_DECLARE
224
// bool BP_TREE_T::Insert_in_Parent(key_t key, BP_NODE_T* prev_leaf, BP_NODE_T* split){
225
// if (prev_leaf == root){
226
// root = new BP_NODE_T;
227
228
// root->key_field.push_back(key);
229
// root->ptr_field.push_back(prev_leaf);
230
// root->ptr_field.push_back(split);
231
232
// prev_leaf->parent = root;
233
// split->parent = root;
234
// return true;
235
// }
236
// else{
237
// BP_NODE_T* parent = prev_leaf->parent;
238
// if (parent->ptr_field.size() < degree){ //Enough space for the split node
239
// size_t i;
240
// for (i = 0; i < parent->ptr_field.size(); ++i){ //TODO: May change it to bisearch
241
// if(parent->ptr_field.at(i) == prev_leaf){
242
// // Insertion happens "before" the given iterator
243
// parent->ptr_field.insert(parent->ptr_field.begin() + i + 1, split);
244
// parent->key_field.insert(parent->key_field.begin() + i, key);
245
// split->parent = parent;
246
// break;
247
// }
248
// }
249
// assert(i < parent->ptr_field.size());
250
// return true;
251
// }
252
// else{ //Not enough space. Split parent.
253
// BP_NODE_T* split_parent = new BP_NODE_T;
254
// /// Set type.
255
// split_parent->isLeaf = false;
256
257
// /// Insert the current key and pointer temporarily
258
// size_t i;
259
// for (i = 0; i < parent->ptr_field.size(); ++i){ //TODO: May change it to bisearch
260
// if(parent->ptr_field.at(i) == prev_leaf){
261
// parent->ptr_field.insert(parent->ptr_field.begin() + i + 1, split);
262
// split->parent = parent;
263
// parent->key_field.insert(parent->key_field.begin() + i, key);
264
// break;
265
// }
266
// }
267
268
// // Move keys and ptrs.
269
// i = (degree + 1) / 2;
270
// key_t right_smallest_key = parent->key_field.at(i - 1);
271
272
// // Abandon a key here (which is the key for their parent),
273
// // so specially run a loop here.
274
// split_parent->ptr_field.push_back(parent->ptr_field.at(i));
275
// split_parent->ptr_field.back()->parent = split_parent;
276
// parent->ptr_field.erase(parent->ptr_field.begin() + i);
277
// parent->key_field.erase(parent->key_field.begin() + i - 1);
278
279
// while(i < parent->ptr_field.size()){
280
// split_parent->ptr_field.push_back(parent->ptr_field.at(i));
281
// split_parent->ptr_field.back()->parent = split_parent;
282
// split_parent->key_field.push_back(parent->key_field.at(i - 1));
283
284
// parent->ptr_field.erase(parent->ptr_field.begin() + i);
285
// parent->key_field.erase(parent->key_field.begin() + i - 1);
286
// }
287
288
// return Insert_in_Parent(right_smallest_key, parent, split_parent);
289
// }
290
// }
291
// }
292
293
// KEY_VALUE_T_DECLARE
294
// bool BP_TREE_T::Insert_in_Leaf(key_t key, value_t val, BP_NODE_T* leaf){
295
// assert(leaf->isLeaf);
296
297
// size_t i = 0;
298
// while(i < leaf->key_field.size() && leaf->key_field.at(i) < key){
299
// ++i;
300
// }
301
302
// leaf->key_field.insert(leaf->key_field.begin() + i, key);
303
// leaf->value_field.insert(leaf->value_field.begin() + i, val);
304
305
// return true;
306
// }
307
308
// KEY_VALUE_T_DECLARE
309
// bool BP_TREE_T::Insert(key_t key, value_t val){
310
// if (root == nullptr){
311
// root = new BP_NODE_T;
312
// root->isLeaf = true;
313
// root->key_field.push_back(key);
314
// root->value_field.push_back(val);
315
// return true;
316
// }
317
// else{
318
// BP_NODE_T* leaf = FindNode(key);
319
// if (leaf == nullptr){
320
// return false;
321
// }
322
323
// if (leaf->key_field.size() < degree - 1){ // Still have space
324
// Insert_in_Leaf(key, val, leaf);
325
// }
326
// else{
327
// assert(leaf->key_field.size() == degree - 1);
328
329
// BP_NODE_T* split = new BP_NODE_T;
330
331
// /// Set type
332
// split->isLeaf = true;
333
334
// /// Insert in leaf first
335
// Insert_in_Leaf(key, val, leaf);
336
337
// /// Copy values and keys
338
// size_t i = degree / 2; // The position right after the (degree/2)th element
339
340
// while(i < leaf->key_field.size()){
341
// split->key_field.push_back(leaf->key_field.at(i));
342
// split->value_field.push_back(leaf->value_field.at(i));
343
344
// leaf->key_field.erase(leaf->key_field.begin() + i);
345
// leaf->value_field.erase(leaf->value_field.begin() + i);
346
// }
347
348
// /// Set pointer for next leaf node (For range search)
349
// if (leaf->ptr_field.empty()){
350
// leaf->ptr_field.push_back(split);
351
// }
352
// else{
353
// split->ptr_field.push_back(leaf->ptr_field.front());
354
// leaf->ptr_field.front() = split;
355
// }
356
357
// /// Set parent
358
// split->parent = leaf->parent;
359
360
// Insert_in_Parent(split->key_field.front(), leaf, split);
361
// }
362
// return true;
363
// }
364
// }
365
366
// KEY_VALUE_T_DECLARE
367
// bool BP_TREE_T::Delete_in_Parent(BP_NODE_T* to_delete_child){
368
// BP_NODE_T* parent = to_delete_child->parent;
369
370
// // Delete the pointer to the child from its parent, and its "separation key"
371
// // Caller need to make sure that the abandoned child is the latter one in merging
372
// for (size_t i = 0; i < parent->ptr_field.size(); ++i){
373
// if (parent->ptr_field.at(i) == to_delete_child){
374
// parent->ptr_field.erase(parent->ptr_field.begin() + i);
375
// parent->key_field.erase(parent->key_field.begin() + i - 1);
376
// break;
377
// }
378
// }
379
380
// if (parent == root){
381
// if (parent->ptr_field.size() == 1){
382
// root = parent->ptr_field.front();
383
// delete parent;
384
// return true;
385
// }
386
// }
387
// else if (parent->key_field.size() < (degree + 1) / 2){ // (int)(degree + 1)/2 is equivalent to degree/2 's upper integer
388
// BP_NODE_T* sibling;
389
// BP_NODE_T* grand_parent = parent->parent;
390
// key_t sep_key_in_grandparent;
391
// size_t i;
392
// bool is_predecessor;
393
// for (i = 0; i < grand_parent->ptr_field.size(); ++i){
394
// if (grand_parent->ptr_field.at(i) == parent) break;
395
// }
396
397
// assert(i < grand_parent->ptr_field.size());
398
399
// if (i < grand_parent->ptr_field.size() - 1){ // Not the last child
400
// sibling = grand_parent->ptr_field.at(i + 1);
401
// sep_key_in_grandparent = grand_parent->key_field.at(i);
402
// is_predecessor = true;
403
// }
404
// else{
405
// sibling = grand_parent->ptr_field.at(i - 1);
406
// sep_key_in_grandparent = grand_parent->key_field.at(i - 1);
407
// is_predecessor = false;
408
// }
409
410
// // At most n - 1 keys in non-leaves
411
// if (parent->key_field.size() + sibling->key_field.size() < degree){ // Merge leaf and sibling
412
// if (is_predecessor) { // Parent is the predecessor
413
// auto temp = parent;
414
// parent = sibling;
415
// sibling = temp;
416
// }
417
// // Move the content in leaf to sibling, and set the child->parent pointer
418
// sibling->key_field.push_back(sep_key_in_grandparent);
419
// sibling->ptr_field.push_back(parent->ptr_field.front());
420
// sibling->ptr_field.back()->parent = sibling;
421
// for (size_t j = 0; j < parent->key_field.size(); ++j){
422
// sibling->key_field.push_back(parent->key_field.at(j));
423
// sibling->ptr_field.push_back(parent->ptr_field.at(j + 1));
424
// sibling->ptr_field.back()->parent = sibling;
425
// }
426
427
// Delete_in_Parent(parent);
428
429
// delete parent;
430
431
// return true;
432
// }
433
// else{ // Redistribute the keys and pointers
434
// if (is_predecessor){
435
// parent->key_field.push_back(sep_key_in_grandparent);
436
// parent->ptr_field.push_back(sibling->ptr_field.front());
437
// parent->ptr_field.back()->parent = parent;
438
439
// grand_parent->key_field.at(i) = sibling->key_field.front();
440
441
// sibling->key_field.erase(sibling->key_field.begin());
442
// sibling->ptr_field.erase(sibling->ptr_field.begin());
443
// }
444
// else{
445
// parent->key_field.insert(parent->key_field.begin(), sep_key_in_grandparent);
446
// parent->ptr_field.insert(parent->ptr_field.begin(), sibling->ptr_field.back());
447
// parent->ptr_field.front()->parent = parent;
448
449
// grand_parent->key_field.at(i - 1) = sibling->key_field.back();
450
451
// sibling->key_field.pop_back();
452
// sibling->ptr_field.pop_back();
453
// }
454
455
// return true;
456
// }
457
// }
458
// }
459
460
// KEY_VALUE_T_DECLARE
461
// bool BP_TREE_T::Delete(key_t key) {
462
// BP_NODE_T* leaf = FindNode(key);
463
464
// // Delete the key and value from leaf
465
// for (size_t i = 0; i < leaf->key_field.size(); ++i){
466
// if (leaf->key_field.at(i) == key){
467
// leaf->key_field.erase(leaf->key_field.begin() + i);
468
// leaf->value_field.erase(leaf->value_field.begin() + i);
469
// break;
470
// }
471
// }
472
473
// if (leaf == root) return true;
474
// // Violate capacity constraint for leaf and non-root node
475
// else if (leaf->key_field.size() < degree / 2){ // (int)degree/2 is equivalent to (degree - 1)/2's upper integer
476
// BP_NODE_T* sibling;
477
// BP_NODE_T* parent = leaf->parent;
478
// key_t sep_key_in_parent;
479
// size_t i;
480
// bool is_predecessor;
481
// for (i = 0; i < parent->ptr_field.size(); ++i){
482
// if (parent->ptr_field.at(i) == leaf) break;
483
// }
484
// assert(i < parent->ptr_field.size());
485
// if (i < parent->ptr_field.size() - 1){ // Not the last element
486
// sibling = parent->ptr_field.at(i + 1);
487
// is_predecessor = true;
488
// sep_key_in_parent = parent->key_field.at(i);
489
// }
490
// else {
491
// sibling = parent->ptr_field.at(i - 1);
492
// is_predecessor = false;
493
// sep_key_in_parent = parent->key_field.at(i - 1);
494
// }
495
496
// // At most n - 1 keys in leaves.
497
// if (sibling->key_field.size() + leaf->key_field.size() < degree){ // Merge leaf and sibling
498
// if (is_predecessor){ // leaf is the predecessor of sibling
499
// // Exchange the variables for convenience here
500
// auto temp = leaf;
501
// leaf = sibling;
502
// sibling = temp;
503
// }
504
505
// // Move the content in leaf to sibling
506
// if (!leaf->ptr_field.empty()){
507
// sibling->ptr_field.front() = leaf->ptr_field.front();
508
// }
509
// for (size_t j = 0; j < leaf->key_field.size(); ++j){
510
// sibling->key_field.push_back(leaf->key_field.at(j));
511
// sibling->value_field.push_back(leaf->value_field.at(j));
512
// }
513
514
// Delete_in_Parent(leaf);
515
516
// delete leaf;
517
// }
518
// else{ // Redistribution between two leaves
519
// if (is_predecessor){ // leaf is the predecessor of sibling
520
// leaf->key_field.push_back(sibling->key_field.front());
521
// leaf->value_field.push_back(sibling->value_field.front());
522
523
// sibling->key_field.erase(sibling->key_field.begin());
524
// sibling->value_field.erase(sibling->value_field.begin());
525
526
// parent->key_field.at(i) = sibling->key_field.front(); // The separation key
527
// }
528
// else{ // leaf is the successor of sibling
529
// leaf->key_field.insert(leaf->key_field.begin(), sibling->key_field.back());
530
// leaf->value_field.insert(leaf->value_field.begin(), sibling->value_field.back());
531
532
// sibling->key_field.pop_back();
533
// sibling->value_field.pop_back();
534
535
// parent->key_field.at(i - 1) = leaf->key_field.front(); // The separation key
536
// }
537
// }
538
// return true;
539
// }
540
// }
541
542
// KEY_VALUE_T_DECLARE
543
// bool BP_TREE_T::FindValue(key_t key, value_t& result) {
544
// BP_NODE_T& node = *(FindNode(key));
545
// size_t id = node.FindPos_at_Node(key);
546
// if (id != -1){
547
// result = node.value_field.at(id);
548
// return true;
549
// }
550
// else return false;
551
// }
552
553
// KEY_VALUE_T_DECLARE
554
// bool BP_TREE_T::FindRange(key_t lower_key, key_t upper_key, std::vector<value_t>& result) {
555
// const BP_NODE_T* lower_node = FindNode(lower_key);
556
// const BP_NODE_T* upper_node = FindNode(upper_key);
557
558
// size_t start_id = lower_node->FindPos_at_Node(lower_key);
559
// size_t end_id = upper_node->FindPos_at_Node(upper_key);
560
561
// if (start_id == -1 || end_id == -1){
562
// return false;
563
// }
564
// else{
565
// if (lower_node == upper_node){
566
// for (size_t i = start_id; i <= end_id; ++i){
567
// result.push_back(lower_node->value_field.at(i));
568
// }
569
// }
570
// else{
571
// for (size_t i = start_id; i < lower_node->value_field.size(); ++i){
572
// result.push_back(lower_node->value_field.at(i));
573
// }
574
// const BP_NODE_T* next = lower_node->ptr_field.front();
575
// while(next != upper_node){
576
// for (auto value : next->value_field){
577
// result.push_back(value);
578
// }
579
// next = next->ptr_field.front();
580
// }
581
// for (size_t i = 0; i <= end_id; ++i) {
582
// result.push_back(upper_node->value_field.at(i));
583
// }
584
// }
585
// return true;
586
// }
587
// }
588
589
// #endif
Generated by
1.8.17