The Binary Search Tree (BSTree) data structure
Agenda
- Binary Trees & Binary Search Trees: definitions
- Linked tree structure and Manual construction
- Recursive binary search tree functions
Binary Tree: def
- A binary tree is a structure that is either empty, or consists of a root node containing a value and references to a left and right sub-tree, which are themselves binary trees.
Naming nodes:
- The single node in a binary tree without a parent is the root node of the tree
- We say that a given node is the parent of its left and right child nodes; nodes with the same parent are called siblings
- If a node has no children we call it a leaf node; otherwise, we call it an internal node
Binary tree metrics (note: alternative defs are sometimes used!):
- The depth of a node is the number of nodes from the root of the tree to that node (inclusive)
- The height of a node is the number of nodes on the longest path from that node down to a leaf (inclusive)
Categorizing binary trees:
- A complete binary tree is one where all but the last level are filled, and in the last level leaves are as far to the left as possible
- A perfect binary tree is one where all internal nodes have 2 children, and all leaves have the same depth
- A balanced binary tree is … ?
Binary Search Tree (BSTree): def
- A binary search tree is a binary tree where the value contained in
every node is:
- greater than all keys in its left subtree, and
- less than all keys in its right subtree
Linked tree structure and Manual construction:
class Node:
def __init__(self, val, left=None, right=None):
self.val = val
self.left = left
self.right = right
def __repr__(self):
def str_rec(t,depth):
if not t:
return ""
else:
return (("---" * depth)
+ str(t.val)
+ "\n" + str_rec(t.left, depth + 1)
+ str_rec(t.right, depth + 1))
return str_rec(self, 0)
Recursive bstree functions
def tmin(t):
if not t.left:
return t.val
return tmin(t.left)
import sys
def max_with_none(*nums):
result = None
for n in nums:
if not result:
result = n
elif n:
result = max(result,n)
return result
def tmax(t: Node):
if not t:
return None
return max_with_none(t.val, tmax(t.left), tmax(t.right))
def tmaxbs(t: Node):
if not t.right:
return t.val
return tmax(t.right)
def find(t, x):
if not t:
return False
if t.val == x:
return True
if t.val > x:
return find(t.left, x)
if t.val < x:
return find(t.right, x)
import builtins
max = builtins.max
def height(t):
if not t:
return 0
return 1 + max([height(t.left), height(t.right)])
- tree traversal (depth-first traversal)
- pre-order traversal
- post-order traversal
- in-order traversal
def traversal_pre(t): # traversal
if t:
print(t.val)
traversal_pre(t.left)
traversal_pre(t.right)
def traversal_post(t): # traversal
if t:
traversal_post(t.left)
traversal_post(t.right)
print(t.val)
def traversal_in(t): # traversal
if t:
traversal_in(t.left)
print(t.val)
traversal_in(t.right)
def mymap(t,f):
f(t.val)
if t.left:
mymap(t.left, f)
if t.right:
mymap(t.right, f)
myt = Node(3, Node(1), Node(5))
myt
3
---1
---5
myothert = Node(3, Node(2), Node(6, Node(4), Node(8)))
myothert
3
---2
---6
------4
------8
yetanothert = Node(4, Node(2, Node(1), Node(3)),
Node(5, None, Node(15, Node(10), Node(20))))
yetanothert
4
---2
------1
------3
---5
------15
---------10
---------20
def print_tree_stats(t):
print(f"height: {height(t)} max_value: {tmax(t)}:\n{t}")
print_tree_stats(myt)
print_tree_stats(myothert)
print_tree_stats(yetanothert)
height: 2 max_value: 5: 3 —1 —5
height: 3 max_value: 8: 3 —2 —6 ——4 ——8
height: 4 max_value: 20: 4 —2 ——1 ——3 —5 ——15 ———10 ———20
- pre-order traversal
def traversal_somehow(f):
print("myt:\n")
f(myt)
print("myothert:\n")
f(myothert)
print("yetanothert:\n")
f(yetanothert)
class summer:
def __init__(self):
self.sm = 0
def acc(self):
def insum(x):
self.sm += x
return insum
def get_sum(self):
sm
x = summer()
mymap(myt, x.acc())
print(f"sum is: {x.sm}")
mymap(myt, lambda x: print(x+1))
sum is: 9 4 2 6
print(f"""find 3: {find(myt, 3)}
find 5: {find(myt, 5)}
find 1: {find(myt, 1)}
find 2: {find(myt, 2)}""")
find 3: True find 5: True find 1: True find 2: False
BSTree API and implementation
- API
- Search
- Addition
- Removal
- Iteration / Traversal
API & Implementation
class BSTree:
class Node:
def __init__(self, val, left=None, right=None):
self.val = val
self.left = left
self.right = right
@staticmethod
def max_with_none(*nums):
result = None
for n in nums:
if not result:
result = n
elif n:
result = max(result,n)
return result
def tmax(self):
if not self.right:
return self.val
return self.right.tmax()
def __repr__(self):
b = BSTree()
b.root = self
return b.to_pretty_string()
def __str__(self):
return self.__repr__()
def __init__(self):
self.size = 0
self.root = None
def add(self, val):
"""Adds `val` to this tree while maintaining BSTree properties."""
assert(val not in self)
def rec_add(r,val):
if(r.val > val):
if(r.left):
rec_add(r.left,val)
else:
r.left = BSTree.Node(val)
elif(r.val < val):
if(r.right):
rec_add(r.right,val)
else:
r.right = BSTree.Node(val)
if self.root:
rec_add(self.root, val)
else:
self.root = Node(val)
self.size += 1
def __contains__(self, val):
"""Returns `True` if val is in this tree and `False` otherwise."""
def rec_contains(r,val):
if r:
if(r.val == val):
return True
elif(r.val > val):
return rec_contains(r.left,val)
else:
return rec_contains(r.right,val)
return rec_contains(self.root,val)
def __delitem__(self, val):
"""Removes `val` from this tree while maintaining BSTree properties."""
assert(val in self)
def rec_del(parent,isleft,t,val):
if t.val > val:
rec_del(t, True, t.left, val)
elif t.val < val:
rec_del(t, False, t.right,val)
else:
if t.left and t.right: # node has two children, replace with largest value in the left subtree and then delete that one
replaceval = t.left.tmax()
t.val = replaceval
rec_del(t, True, t.left, replaceval)
elif t.left: # replace node with its only child (the left one)
t.val = t.left.val
t.right = t.left.right
t.left = t.left.left
elif t.right: # replace node with its only child (the right one)
t.val = t.right.val
t.left = t.right.left
t.right = t.right.right
else:
if parent:
if isleft:
parent.left = None
else:
parent.right = None
rec_del(None,None,self.root,val)
self.size += -1
def __iter__(self):
"""Returns an iterator over all the values in the tree, in ascending order."""
def rec_iter(r):
if r:
yield from rec_iter(r.left)
yield r.val
yield from rec_iter(r.right)
yield from rec_iter(self.root)
def __len__(self):
return self.size
def to_pretty_string(self, width=64):
height = self.height()
nodes = [(self.root, 0)]
prev_level = 0
repr_str = ''
while nodes:
n,level = nodes.pop(0)
if prev_level != level:
prev_level = level
repr_str += '\n'
if not n:
if level < height-1:
nodes.extend([(None, level+1), (None, level+1)])
repr_str += '{val:^{width}}'.format(val='-', width=width//2**level)
elif n:
if n.left or level < height-1:
nodes.append((n.left, level+1))
if n.right or level < height-1:
nodes.append((n.right, level+1))
repr_str += '{val:^{width}}'.format(val=n.val, width=width//2**level)
return repr_str
def pprint(self, width=64):
"""Attempts to pretty-print this tree's contents."""
print(self.to_pretty_string(width))
def height(self):
"""Returns the height of the longest branch of the tree."""
def height_rec(t):
if not t:
return 0
else:
return max(1+height_rec(t.left), 1+height_rec(t.right))
return height_rec(self.root)
t = BSTree()
t.root = BSTree.Node(5,
left=BSTree.Node(2),
right=BSTree.Node(10))
t.size = 3
t.pprint()
5
2 10
print(t.height())
2
Example API Usage
- let us use the implementation of the API
Search
t = BSTree()
t.root = BSTree.Node(5,
left=BSTree.Node(2),
right=BSTree.Node(10))
t.size = 3
5 in t
True
Add
import random
t = BSTree()
vals = list(range(5))
random.shuffle(vals)
for x in vals:
t.add(x)
t.pprint()
1
0 2
-
-
- 3
-
-
-
- - - - - 4
-
Removal
t = BSTree()
for x in [10, 5, 15, 2, 17]:
t.add(x)
t.pprint()
10
5 15
2 - - 17
del t[2]
t.pprint()
10
5 15
-
-
- 17
-
t = BSTree()
for x in [10, 5, 15, 2, 17]:
t.add(x)
t.pprint()
10
5 15
2 - - 17
del t[5]
t.pprint()
10
2 15
-
-
- 17
-
t = BSTree()
for x in [10, 5, 15, 2, 17]:
t.add(x)
t.pprint()
10
5 15
2 - - 17
del t[15]
t.pprint()
10
5 17
2 - - -
t = BSTree()
for x in [10, 5, 15, 2, 17]:
t.add(x)
t.pprint()
10
5 15
2 - - 17
del t[10]
t.pprint()
5
2 15
-
-
- 17
-
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
t.add(x)
t.pprint()
10
5 15
2 7 12 18
1 - - 9 - - - -
-
-
-
-
-
- 8 - - - - - - - - -
-
-
-
-
del t[15]
t.pprint()
10
5 12
2 7 - 18
1 - - 9 - - - -
-
-
-
-
-
- 8 - - - - - - - - -
-
-
-
-
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
t.add(x)
t.pprint()
10
5 15
2 7 12 18
1 - - 9 - - - -
-
-
-
-
-
- 8 - - - - - - - - -
-
-
-
-
del t[5]
t.pprint()
10
2 15
1 7 12 18
-
-
- 9 - - - -
-
-
-
-
-
-
- 8 - - - - - - - - -
-
-
-
-
t = BSTree()
for x in [10, 5, 2, 7, 9, 8, 1, 15, 12, 18]:
t.add(x)
t.pprint()
10
5 15
2 7 12 18
1 - - 9 - - - -
-
-
-
-
-
- 8 - - - - - - - - -
-
-
-
-
del t[10]
t.pprint()
9
5 15
2 7 12 18
1 - - 8 - - - -
Iteration / Traversal
import random
t = BSTree()
vals = list(range(20))
random.shuffle(vals)
for x in vals:
t.add(x)
for x in t:
print(x)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19