From 33657122f9500ea8ef7159e76ae640aac530d3fe Mon Sep 17 00:00:00 2001 From: Bartuccio Antoine Date: Tue, 30 Jul 2019 17:26:36 +0200 Subject: [PATCH] btree: add basic in memory b-tree with integers --- .drone.yml | 13 ++ btree/btree.go | 498 ++++++++++++++++++++++++++++++++++++++++++++ btree/btree_test.go | 123 +++++++++++ main.go | 36 ++++ 4 files changed, 670 insertions(+) create mode 100644 .drone.yml create mode 100644 btree/btree.go create mode 100644 btree/btree_test.go create mode 100644 main.go diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..6ec37f8 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,13 @@ +pipeline: + build: + image: golang:1.12 + group: build + commands: + - go get -v -d ./... + - go build . + test: + image: golang:1.12 + group: test + commands: + - go get -v -d ./... + - go test -v ./... diff --git a/btree/btree.go b/btree/btree.go new file mode 100644 index 0000000..7dd7ab5 --- /dev/null +++ b/btree/btree.go @@ -0,0 +1,498 @@ +package btree + +import ( + "fmt" +) + +// Tree is the tree itself +type Tree struct { + root *Node // Pointer to the Node root + t int // Minimum degree +} + +// Node is a Node of a Btree +type Node struct { + numberOfKeys int // The number of keys really stored + t int // The value of t dependes upon disk blok size + isLeaf bool + keys []int + children []*Node +} + +// Constructors + +// NewBtree creates a new btree +func NewBtree(t int) *Tree { + + return &Tree{ + root: nil, + t: t, + } +} + +func newNode(t int, isLeaf bool) *Node { + + return &Node{ + numberOfKeys: 0, + t: t, + isLeaf: isLeaf, + keys: make([]int, 2*t-1), + children: make([]*Node, 2*t), + } +} + +// Tree methods + +// Traverse the tree +func (t *Tree) Traverse() { + + if t.root != nil { + t.root.traverse() + } +} + +// Search k in the tree +func (t *Tree) Search(k int) *Node { + + if t.root == nil { + return nil + } + + return t.root.search(k) +} + +// Remove k in the tree +func (t *Tree) Remove(k int) error { + if t.root == nil { + return fmt.Errorf("The tree is empty") + } + + err := t.root.remove(k) + + // If the root node has 0 keys, makes its first child as the new root + // If it has no child, set root as nil + if t.root.numberOfKeys == 0 { + if t.root.isLeaf { + t.root = nil + return err + } + t.root = t.root.children[0] + } + + return err +} + +// Insert k in the tree +func (t *Tree) Insert(k int) { + // If the tree is empty + if t.root == nil { + t.root = newNode(t.t, true) + t.root.keys[0] = k + t.root.numberOfKeys = 1 + return + } + + // If the tree is not empty + if !t.root.isFull() { + // If root is not full, insert in non full root + t.root.insertNonFull(k) + return + } + + // If the root is full, then the tree grows in height + s := newNode(t.t, false) + + // Make the old root as a child of the new root + s.children[0] = t.root + + // Split the old root and move 1 key to the new root + s.splitChild(0, t.root) + + // The new root has two children now. + // We decide which of the two children is going to have the new key + i := 0 + if s.keys[0] < k { + i++ + } + s.children[i].insertNonFull(k) + + // Change root + t.root = s +} + +// Node methods + +// traverse all nodes in a subtree rooted with this node +func (n *Node) traverse() { + + // There are n entries and n+1 children, treverse trough n keys and n first children + for i := 0; i < n.numberOfKeys; i++ { + // If this is not a leaf, then traverse the subtree before printing the key + if !n.isLeaf { + n.children[i].traverse() + } + fmt.Printf(" %d", n.keys[i]) + } + + // Print the subtree rooted with the last child + if !n.isLeaf { + n.children[n.numberOfKeys].traverse() + } +} + +// search k in the subtree rooted with this node +func (n *Node) search(k int) *Node { + + // Find the first entry greater than or equal to k + i := 0 + for i < n.numberOfKeys && k > n.keys[i] { + i++ + } + + // If theh found key is equal to k, return this node + if n.keys[i] == k { + return n + } + + // If the key is not found here and this is a leaf node + if n.isLeaf { + return nil + } + + // Go to the approipriate child + return n.children[i].search(k) +} + +func (n *Node) isFull() bool { + return n.numberOfKeys == 2*n.t-1 +} + +func (n *Node) insertNonFull(k int) { + + // Initialize the index as the index of the rightmost element + i := n.numberOfKeys - 1 + + // If this is a leaf node + if n.isLeaf { + // Finds the location of the new key to be inserted + // Moves all greater keys to one place ahead + for i >= 0 && n.keys[i] > k { + n.keys[i+1] = n.keys[i] + i-- + } + + // Insert the new key at the found location + n.keys[i+1] = k + n.numberOfKeys++ + return + } + + // If this is not a leaf + // Finds the child wich is going to have the new key + for i >= 0 && n.keys[i] > k { + i-- + } + + // Check if the found chird is full + if n.children[i+1].isFull() { + // If the child is full, then split it + n.splitChild(i+1, n.children[i+1]) + + // After the split, the middle key of children[i] goes up and + // children[i] is splitted into two + // See which of those two is going to have the new key + if n.keys[i+1] < k { + i++ + } + } + + n.children[i+1].insertNonFull(k) +} + +func (n *Node) splitChild(i int, y *Node) { + + // Create a new node that will store (t-1) keys of y + z := newNode(y.t, y.isLeaf) + z.numberOfKeys = n.t - 1 + + // TODO: make optimisations + // Copy the last (t-1) keys of y to z + for j := 0; j < n.t-1; j++ { + z.keys[j] = y.keys[j+n.t] + } + + // Copy the last t children of y to z + if !y.isLeaf { + for j := 0; j < n.t; j++ { + z.children[j] = y.children[j+n.t] + } + } + + // Reduce the number of keys in y + y.numberOfKeys = n.t - 1 + + // Since this node is going to have a new child, create space for it + for j := n.numberOfKeys; j >= i+1; j-- { + n.children[j+1] = n.children[j] + } + + // Link the new child to this node + n.children[i+1] = z + + // A key of y will move to this node + // Find the location of the new key and move all greater keys ahead + for j := n.numberOfKeys - 1; j >= i; j-- { + n.keys[j+1] = n.keys[j] + } + + // Copy the middle key of y to this node + n.keys[i] = y.keys[n.t-1] + + // Increment the count of keys in this node + n.numberOfKeys++ +} + +// findKey returns the index of the first key that is greater than or equal to k +func (n *Node) findKey(k int) int { + + index := 0 + for index < n.numberOfKeys && n.keys[index] < k { + index++ + } + return index +} + +// remove the key k from the sub-tree rooted with this node +func (n *Node) remove(k int) error { + + index := n.findKey(k) + + // The key to be removed is in this node + if index < n.numberOfKeys && n.keys[index] == k { + if n.isLeaf { + return n.removeFromLeaf(index) + } + return n.removeFromNonLeaf(index) + } + + // If this is a leaf, the key is not in the tree + if n.isLeaf { + return fmt.Errorf("The key %d does not exist in the tree", k) + } + + isInLastChild := false + if index == n.numberOfKeys { + isInLastChild = true + } + + // If the child where is the key has less than t keys, wi fill it + if n.children[index].numberOfKeys < n.t { + n.fill(index) + } + + // If the last child has been merged, it must be merged with the previous + // child and so we recurse on the (index-1)th child. + if isInLastChild && index > n.numberOfKeys { + return n.children[index-1].remove(k) + } + + // We recurse on the (index)th child which now has at least t keys + return n.children[index].remove(k) +} + +// removeFromLeaf the index-th key from this node which is a leaf node +func (n *Node) removeFromLeaf(index int) error { + + // Move all the keys after the index-th position one place backward + for i := index + 1; i < n.numberOfKeys; i++ { + n.keys[i-1] = n.keys[i] + } + + n.numberOfKeys-- + return nil +} + +// removeFromNonLeaf the index-th key from this node which is not a leaf node +func (n *Node) removeFromNonLeaf(index int) error { + + k := n.keys[index] + + // If the child that precedes k has at least t keys, + // find the predecessor of k in the subtree and replace k with it + // Recursively delete the predecessor in the child + if n.children[index].numberOfKeys >= n.t { + pred := n.getPred(index) + n.keys[index] = pred + return n.children[index].remove(pred) + } + + // If the child has less than t keys, examine children[index+1] + // If it has at least t keys, find the successor of k in this subtree + // Replace k by its successor and recursively delete the successor in the subtree + if n.children[index+1].numberOfKeys >= n.t { + succ := n.getSucc(index) + n.keys[index] = succ + return n.children[index+1].remove(succ) + } + + // Merge k and all of children[index+1] int children[index] + // Free children[index+1] and recursively delete k from children[index] + n.merge(index) + return n.children[index].remove(k) +} + +// getPred returns the predecessor of keys[index] +func (n *Node) getPred(index int) int { + + // Keep moving to the rightmost node until we reach a leaf + current := n.children[index] + for !current.isLeaf { + current = current.children[current.numberOfKeys] + } + + // Return the last key of the leaf + return current.keys[current.numberOfKeys-1] +} + +// getSucc returns the successor of keys[index] +func (n *Node) getSucc(index int) int { + + // Keep moving to the leftmost node starting from children[index+1] until we reach a leaf + current := n.children[index+1] + for !current.isLeaf { + current = current.children[0] + } + + // Return the first key of the leaf + return current.keys[0] +} + +// fill child children[index] which has less than t-1 keys +func (n *Node) fill(index int) { + + // If the previous child has more than t-1 keys, borrow a key from that child + if index != 0 && n.children[index-1].numberOfKeys >= n.t { + n.borrowFromPrev(index) + return + } + + // If the next child has more than t-1 keys, borrow a key from that child + if index != n.numberOfKeys && n.children[index+1].numberOfKeys >= n.t { + n.borrowFromNext(index) + return + } + + // Merge children[index] with its sibling + if index != n.numberOfKeys { + n.merge(index) + return + } + + // If this is the last child, merge with the previous sibling + n.merge(index - 1) +} + +// borrowFromPrev takes a key from children[index+1] and insert it in children[index] +func (n *Node) borrowFromPrev(index int) { + + child := n.children[index] + sibling := n.children[index-1] + + // Moves all keys in children[index] one step ahead + for i := child.numberOfKeys - 1; i >= 0; i-- { + child.keys[i+1] = child.keys[i] + } + + // If the child is not a leaf, move all its child pointers one step ahead + if !child.isLeaf { + for i := child.numberOfKeys; i >= 0; i-- { + child.children[i+1] = child.children[i] + } + } + + // Sets child's first key equal to keys[index-1] from the current node + child.keys[0] = n.keys[index-1] + + // Moves sibling's last child as children[index]'s first child + if !child.isLeaf { + child.children[0] = sibling.children[sibling.numberOfKeys] + } + + // Moves the key from the sibling to the parent + n.keys[index-1] = sibling.keys[sibling.numberOfKeys-1] + + child.numberOfKeys++ + sibling.numberOfKeys-- +} + +// borrowFromNext takes a key from children[index+1] and insert it in children[index] +func (n *Node) borrowFromNext(index int) { + + child := n.children[index] + sibling := n.children[index+1] + + // keys[index] is inserted as the last key in children[index] + child.keys[child.numberOfKeys] = n.keys[index] + + // Sibling's first child is inserted as the last child into children[index] + if !child.isLeaf { + child.children[child.numberOfKeys+1] = sibling.children[0] + } + + // The first key from sibling is inserted into keys[index] + n.keys[index] = sibling.keys[0] + + // TODO: optimize loops here + // Moving all keys in sibling one step behind + for i := 1; i < sibling.numberOfKeys; i++ { + sibling.keys[i-1] = sibling.keys[i] + } + + // Moving the child pointers one step behind + if !sibling.isLeaf { + for i := 1; i <= sibling.numberOfKeys; i++ { + sibling.children[i-1] = sibling.children[i] + } + } + + child.numberOfKeys++ + sibling.numberOfKeys-- +} + +// merge children[index] with children[index+1] +func (n *Node) merge(index int) { + + child := n.children[index] + sibling := n.children[index+1] + + // Pulls a key from the current node ande insert it into the (t-1)th position + child.keys[n.t-1] = n.keys[index] + + // Copies the keys from children[index+1] to children[index] at the end + for i := 0; i < sibling.numberOfKeys; i++ { + child.keys[i+n.t] = sibling.keys[i] + } + + // Copies the child pointers from C[index+1] to children[index] + if !child.isLeaf { + for i := 0; i <= sibling.numberOfKeys; i++ { + child.children[i+n.t] = sibling.children[i] + } + } + + // Moves all keys after index in the current node one step before + // to fill the gap created by moving keys[index] to children[index] + for i := index + 1; i < n.numberOfKeys; i++ { + n.keys[i-1] = n.keys[i] + } + + // Moves the child pointer after (index+1) in the current node one step before + // This action marks sibling for deletion by the GC + for i := index + 2; i <= n.numberOfKeys; i++ { + n.children[i-1] = n.children[i] + } + + // Updates the key count of child and the current node + child.numberOfKeys += sibling.numberOfKeys + 1 + n.numberOfKeys-- +} diff --git a/btree/btree_test.go b/btree/btree_test.go new file mode 100644 index 0000000..7a5dac6 --- /dev/null +++ b/btree/btree_test.go @@ -0,0 +1,123 @@ +package btree + +import ( + "fmt" + "testing" +) + +func TestTree_Search(t *testing.T) { + tree := NewBtree(3) + tree.Insert(6) + + k := 6 + if tree.Search(k) == nil { + t.Errorf("Not present %d", k) + } + + k = 15 + if tree.Search(k) != nil { + t.Errorf("Present %d", k) + } +} + +func TestTree_Remove(t *testing.T) { + tree := NewBtree(3) + toInsert := []int{10, 20, 5, 6, 12, 30, 7, 17} + + for _, k := range toInsert { + tree.Insert(k) + } + + tree.Remove(toInsert[0]) + for _, k := range toInsert[1:] { + if tree.Search(k) == nil { + t.Errorf("Not present %d", k) + } + } + + if tree.Search(toInsert[0]) != nil { + t.Errorf("Present %d", toInsert[0]) + } + +} + +func TestTree_Insert(t *testing.T) { + tree := NewBtree(3) + toInsert := []int{10, 20, 5, 6, 12, 30, 7, 17} + + // Test before insertion + for _, k := range toInsert { + if tree.Search(k) != nil { + t.Errorf("Present %d", k) + } + } + + for _, k := range toInsert { + tree.Insert(k) + } + + // Test after insertion + for _, k := range toInsert { + if tree.Search(k) == nil { + t.Errorf("Not present %d", k) + } + } +} + +/* +func TestTree_Traverse(t *testing.T) { + tree := NewBtree(3) + + toInsert := []int{1, 3, 7, 10, 11, 13, 14, 15, 18, 16, 19, 24, 25, 26, 21, 4, 5, 20, 22, 2, 17, 12, 6} + for _, k := range toInsert { + tree.Insert(k) + } + + tree.Traverse() + fmt.Println("\n 1 2 3 4 5 6 7 10 11 12 13 14 15 16 17 18 19 20 21 22 24 25 26") +} +*/ + +func ExampleTree_Traverse() { + tree := NewBtree(3) + + toInsert := []int{1, 3, 7, 10, 11, 13, 14, 15, 18, 16, 19, 24, 25, 26, 21, 4, 5, 20, 22, 2, 17, 12, 6} + for _, k := range toInsert { + tree.Insert(k) + } + + tree.Traverse() + fmt.Println("") + + tree.Remove(6) + tree.Traverse() + fmt.Println("") + + tree.Remove(13) + tree.Traverse() + fmt.Println("") + + tree.Remove(7) + tree.Traverse() + fmt.Println("") + + tree.Remove(4) + tree.Traverse() + fmt.Println("") + + tree.Remove(2) + tree.Traverse() + fmt.Println("") + + tree.Remove(16) + tree.Traverse() + + // Output: + // 1 2 3 4 5 6 7 10 11 12 13 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 2 3 4 5 7 10 11 12 13 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 2 3 4 5 7 10 11 12 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 2 3 4 5 10 11 12 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 2 3 5 10 11 12 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 3 5 10 11 12 14 15 16 17 18 19 20 21 22 24 25 26 + // 1 3 5 10 11 12 14 15 17 18 19 20 21 22 24 25 26 +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..3596aff --- /dev/null +++ b/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + + "git.klmp200.net/klmp200/kvs/btree" +) + +func main() { + t := btree.NewBtree(3) + t.Insert(10) + t.Insert(20) + t.Insert(5) + t.Insert(6) + t.Insert(12) + t.Insert(30) + t.Insert(7) + t.Insert(17) + + fmt.Print("Traversal of the constructed tree is") + t.Traverse() + + k := 6 + if t.Search(k) != nil { + fmt.Print("\nPresent") + } else { + fmt.Print("\nNot Present") + } + + k = 15 + if t.Search(k) != nil { + fmt.Print("\nPresent") + } else { + fmt.Print("\nNot Present") + } +}