Linked Structures
Agenda
- Motives
- Objectives
- Mechanisms
- Linked Data Structures
0. Import arraylists for demonstration
The ArrayList
data structure
- Array API:
- create array of size
n
- access element at position
i
- set the element at position
i
- get length of array
len(array)
- create array of size
class MyArray:
def __init__(self,n):
self.data = [None] * n
self.len = n
def __getitem__(self, idx):
"""Implements `x = self[idx]`"""
assert(isinstance(idx, int) and self.len > idx)
return self.data[idx]
def __setitem__(self, idx, value):
"""Implements `self[idx] = x`"""
assert(isinstance(idx, int) and self.len > idx)
self.data[idx] = value
def __len__(self):
"""Implements `len(self)`"""
return self.len
def __repr__(self):
"""Supports inspection"""
return "[" + ",".join([str(x) for x in self.data]) + "]"
class MyActualArrayList:
def __init__(self,n=10):
self.data = MyArray(n)
self.len = 0
def append(self, value): # append, yah in O(n)
if len(self.data) <= self.len:
newa = MyArray(2 * len(self.data)) # n
for i in range(0, self.len): # n
newa[i] = self.data[i] # n
self.data = newa # 1
self.data[self.len] = value # 1
self.len += 1 # 1
def __getitem__(self, idx): # O(1)
"""Implements `x = self[idx]`"""
assert(isinstance(idx, int) and idx < self.len)
return self.data[idx]
def __setitem__(self, idx, value): # O(1)
"""Implements `self[idx] = x`"""
assert(isinstance(idx, int) and idx < self.len)
self.data[idx] = value
def __delitem__(self, idx):
"""Implements `del self[idx]`"""
assert(isinstance(idx, int) and idx < self.len)
for i in range(idx+1, self.len):
self.data[i-1] = self.data[i]
self.len += -1
def __len__(self):
"""Implements `len(self)`"""
return self.len
def __repr__(self):
"""Supports inspection"""
return self.data.data[0:self.len].__repr__()
1. Motives
- deleting the first element of an arraylist is in linear time
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from timeit import timeit
def time_array_front_insert_delete(n): # repeatedly append to the end and delete first
return timeit('lst.insert(0, None) ; del lst[0]',
'lst = list(range({}))'.format(n),
number=1000)
ns = np.linspace(100, 10000, 50)
plt.plot(ns, [time_array_front_insert_delete(int(n)) for n in ns], 'ro')
plt
<module 'matplotlib.pyplot' from '/Users/lord_pretzel/.pyenv/versions/3.9.0/lib/python3.9/site-packages/matplotlib/pyplot.py'>
- however, as discussed deleting from the end is \(O(1)\) in both our implementation of
MyActualArrayList
and the Python list implementation
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from timeit import timeit
def time_array_end_delete(n):
return timeit('lst.append(None) ; del lst[{} - 1]'.format(n),
'lst = list(range({}))'.format(n),
number=100000) # repeat this often enough to not measure the setup time of creating the list which is linear time
ns = np.linspace(100, 10000, 50)
plt.plot(ns, [time_array_end_delete(int(n)) for n in ns], 'ro')
plt
<module 'matplotlib.pyplot' from '/Users/lord_pretzel/.pyenv/versions/3.9.0/lib/python3.9/site-packages/matplotlib/pyplot.py'>
from timeit import timeit
# create the list upfront to not measure time of append
max = 10000
mylst = MyActualArrayList()
for i in range(0,max):
mylst.append(1)
# report repeated insertion / deletion at the end
def time_our_array_list_end_delete(n):
return timeit('lst.append(None); del lst[{} - 1]'.format(n),
'from __main__ import MyActualArrayList; '
'lst = mylst; '
'lst.len = {}'.format(n),
globals=globals(),
number=10000)
ns = np.linspace(100, max, 50)
plt.plot(ns, [time_our_array_list_end_delete(int(n)) for n in ns], 'ro')
plt
<module 'matplotlib.pyplot' from '/Users/lord_pretzel/.pyenv/versions/3.9.0/lib/python3.9/site-packages/matplotlib/pyplot.py'>
- note that this is constant time!
Runtime of concatenating lists
# consider:
def concatenate(arr1, arr2):
"""Concatenates the contents of arr1 and arr2 as efficiently (time-wise)
as possible, so that the resulting structure can be used to index all
combined elements (arr1's followed by arr2's)."""
# option 1: O(?)
for x in arr2:
arr1.append(x)
return arr1
def concatenate2(arr1, arr2):
# option 2: O(?)
arr1.extend(arr2)
return arr1
def concatenate3(arr1, arr2):
# option 3: O(?)
return arr1 + arr2
-
this is linear time in the length of the input lists
%matplotlib inline import matplotlib.pyplot as plt import numpy as np from timeit import timeit def time_array_front_insert_delete(n): return timeit('for x in lst2: lst1.append(x)', 'lst1 = list(range({})); lst2 = list(range({}))'.format(n,n), number=1000) ns = np.linspace(100, 10000, 50) plt.plot(ns, [time_array_front_insert_delete(int(n)) for n in ns], 'ro') plt
<module 'matplotlib.pyplot' from '/Users/lord_pretzel/.pyenv/versions/3.9.0/lib/python3.9/site-packages/matplotlib/pyplot.py'>
2. Objectives
We would like a new data storage mechanism for constructing data structures that:
- does not require monolithic, contiguous memory allocation
- allows individual elements to be flexibly and efficiently reorganized
- e.g., maybe even concatenation could be efficient
- and preserves the ability to locate (e.g., via position) and iterate over elements
3. Mechanisms
3.1. Two-Element Lists
# data items
i1 = 'lions'
i2 = 'tigers'
i3 = 'bears'
i4 = 'oh, my'
[i1, i2, i3, i4]
['lions', 'tigers', 'bears', 'oh, my']
# creating individual "links"
l4 = [i4,None]
l3 = [i3,l4]
l2 = [i2,l3]
l1 = [i1,l2]
l1
['lions', ['tigers', ['bears', ['oh, my', None]]]]
# link-ing them together
# iteration
def printlist(head):
cur = head
while cur != None:
print(cur[0])
cur = cur[1]
printlist(l1)
lions tigers bears oh, my
# prepending
i0 = 'walruses'
l0 = [i0,l1]
head = l0
printlist(head)
walruses lions tigers bears oh, my
def prepend_list(head, el): # O(1)
newcell = [el,head] # 1
return newcell # 1
newhead = prepend_list(l1, 'walruses')
printlist(newhead)
walruses lions tigers bears oh, my
# index access
def get_linked_list(head, pos): # O(n)
cur = head # 1
for i in range(0,pos): # O(n)
cur = cur[1] # O(n)
if not cur: # O(n)
raise IndexError() # 1
return cur[0] # 1
get_linked_list(newhead,3)
'bears'
# insertion
def insert_linked_list(head, pos, el): # O(n)
cur = head
for i in range(0,pos):
cur = cur[1]
if not cur:
raise IndexError()
newel = [ el, cur[1] ]
cur[1] = newel
i2_5 = 'elephants'
for i in range(0,4):
insert_linked_list(l0,2,i2_5) # did run it 4 times
printlist(head)
walruses lions tigers elephants elephants elephants elephants bears oh, my
# find cell
def get_cell_at_pos(head,pos):
cur = head
for i in range(0,pos):
cur = cur[1]
return cur
# deletion
def delete_linked_list(head,pos): # does not work the first element
cur = get_cell_at_pos(head,pos-1)
nxt = cur[1]
cur[1] = nxt[1]
delete_linked_list(head,3)
printlist(head)
walruses lions tigers elephants elephants elephants bears oh, my
delete_linked_list(head,3)
delete_linked_list(head,3)
delete_linked_list(head,3)
printlist(head)
walruses lions tigers bears oh, my
3.2. “Link” objects
class Link:
def __init__(self, val, next=None):
self.val = val
self.next = next
def __str__(self):
return self.__repr__()
def __repr__(self):
nextrep = "None" if not self.next else self.next.__repr__()
return f"Link({self.val.__repr__()},{nextrep})"
# manually constructing a list
head = Link('lions',Link('bear',Link('fleas',None)))
print(head)
Link(‘lions’,Link(‘bear’,Link(‘fleas’,None)))
# prepending
def prepend(l, val):
return Link(val,l)
l = None
for x in range(10):
l = prepend(l, x)
l
Link(9,Link(8,Link(7,Link(6,Link(5,Link(4,Link(3,Link(2,Link(1,Link(0,None))))))))))
# iterator
def link_iterator(l):
cur = l
while cur.next:
yield cur.val
cur = cur.next
for x in link_iterator(l):
print(x)
9 8 7 6 5 4 3 2 1
# iteration based on a recursive pattern
def link_iterator_rec(l):
if l:
yield l.val
for val in link_iterator_rec(l.next):
yield val
for x in link_iterator_rec(l):
print(x)
9 8 7 6 5 4 3 2 1 0
4. Linked Data Structures
4.1 Linked List
class LinkedList:
class Link:
def __init__(self, val, next=None):
self.val = val
self.next = next
def __init__(self):
self.head = None
self.tail = None
self.len = 0
def __len__(self): # O(n)
return self.len
def normalize_index(self,i):
assert(i >= -len(self) and i < len(self))
if i < 0: # -i to accessing from back of list
i = len(self) + i
return i
def find_link(self, pos): # O(n)
assert(pos >= 0 and pos < len(self))
cur = self.head
for i in range(0,pos):
cur = cur.next
if not cur:
raise IndexError()
return cur
def __getitem__(self, index):
nindex = self.normalize_index(index)
# print(nindex)
return self.find_link(nindex).val
def __setitem__(self, index, val):
nindex = self.normalize_index(index)
cur = self.find_link(nindex)
cur.val = val
def prepend(self, val):
self.head = self.Link(val,self.head)
self.len += 1
def insert(self, pos, val): # O(n)
npos = self.normalize_index(pos)
assert(npos >= 0 and npos <= len(self))
if npos == 0:
self.prepend(val)
else:
link = self.find_link(npos - 1) # call to find_link is O(n)
newcell = self.Link(val, link.next)
link.next = newcell
self.len += 1
def __delitem__(self, pos): # O(n)
npos = self.normalize_index(pos)
assert(npos >= 0 and npos < len(self))
if npos == 0:
self.head = self.head.next
else:
cur = self.find_link(npos - 1) # call to find_link is O(n)
cur.next = cur.next.next
self.len += -1
def __iter__(self):
cur = self.head
while cur:
yield cur.val
cur = cur.next
def concat(self,other):
if len(self) == 0:
self.head = other.head
else:
self.tail.next = other.head # O(1)
self.tail = other.tail
# for el in other: # n
# self.insert(self.len,el) # n * n = O(n^2)
def reserve(self): # for example [1,2,3] -> [3,2,1] O(n)
pass # return reversed list
def __repr__(self):
return '[' + ', '.join(str(x) for x in self) + ']'
l = LinkedList()
for x in range(10):
l.prepend(x)
l
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
l2 = LinkedList()
for x in range(10):
l2.prepend(x)
del l2[0]
del l2[3]
l2
[8, 7, 6, 4, 3, 2, 1, 0]
4.2 Binary Tree
class BinaryLink:
def __init__(self, val, left=None, right=None):
self.val = val
self.left = left
self.right = right
# manual construction of a "tree" representing the expression ((5+3)*(8-4))
t = BinaryLink('*')
t.left = BinaryLink('+')
t.left.left = BinaryLink('5')
t.left.right = BinaryLink('3')
t.right = BinaryLink('-')
t.right.left = BinaryLink('8')
t.right.right = BinaryLink('4')
def print_expr_tree(t):
if t:
if not t.val.isdigit():
print('(', end='')
print_expr_tree(t.left)
print(t.val, end='')
print_expr_tree(t.right)
if not t.val.isdigit():
print(')', end='')
print_expr_tree(t)
((5+3)*(8-4))
4.3 N-ary Tree
class NaryLink:
def __init__(self, val, n=2):
self.val = val
self.children = [None] * n
def __getitem__(self, idx):
return self.children[idx]
def __setitem__(self, idx, val):
self.children[idx] = val
def __iter__(self):
for c in self.children:
yield c
root = NaryLink('Kingdoms', n=5)
root[0] = NaryLink('Animalia', n=35)
root[1] = NaryLink('Plantae', n=12)
root[2] = NaryLink('Fungi', n=7)
root[3] = NaryLink('Protista', n=5)
root[4] = NaryLink('Monera', n=5)
root[2][0] = NaryLink('Chytridiomycota')
root[2][1] = NaryLink('Blastocladiomycota')
root[2][2] = NaryLink('Glomeromycota')
root[2][3] = NaryLink('Ascomycota')
root[2][4] = NaryLink('Basidiomycota')
root[2][5] = NaryLink('Microsporidia')
root[2][6] = NaryLink('Neocallimastigomycota')
root[0][0] = NaryLink('Mamalia')
root[0][1] = NaryLink('Fish')
def tree_iter(root):
if root:
yield root.val
for c in root:
yield from tree_iter(c)
def treepp(root,depth=0):
if root:
print(depth * '\t' + root.val)
for c in root:
treepp(c,depth+1)
treepp(root)
Kingdoms Animalia Mamalia Fish Plantae Fungi Chytridiomycota Blastocladiomycota Glomeromycota Ascomycota Basidiomycota Microsporidia Neocallimastigomycota Protista Monera