CS331 - Datastructures and Algorithms

Version 1

Course webpage for CS331

Priority Queues

Agenda

  1. Motives
  2. Naive implementation
  3. Heaps
    • Mechanics
    • Implementation
    • Run-time Analysis
  4. Heapsort

1. Motives

  • priority queue stores number (or items that have a value)
  • add(item) - insert item into the queue
  • pop_max() - return and remove item with maximum value
  • max() - return item with maximum value

2. Naive implementation

class PriorityQueue:
    def __init__(self):
        self.data = []

    def add(self, x):
        self.data.append(x)
        self.data = sorted(self.data)

    def max(self):
        return self.data[-1]

    def pop_max(self):
        m = self.data[-1]
        del self.data[-1]
        return m

    def __bool__(self):
        return len(self.data) > 0

    def __len__(self):
        return len(self.data)

    def __repr__(self):
        return repr(self.data)
pq = PriorityQueue()
import random
for _ in range(10):
    pq.add(random.randrange(100))
pq
[9, 14, 33, 34, 42, 47, 56, 70, 88, 97]
while pq:
    print(pq.pop_max())

97 88 70 56 47 42 34 33 14 9

3. Heaps

Mechanics

  • see notes

Implementation

class Heap:
    def __init__(self):
        self.data = []

    @staticmethod
    def parent(n):
        return (n - 1) // 2

    @staticmethod
    def left_child(n):
        return 2 * n + 1

    @staticmethod
    def right_child(n):
        return 2 * n + 2

    def pos_exists(self, n):
        return n < len(self)

    def switch_node(self, parent, child):
        parentval = self.data[parent]
        childval = self.data[child]
        self.data[parent] = childval
        self.data[child] = parentval

    def trickle_down(self, n):
        lc = Heap.left_child(n)
        rc = Heap.right_child(n)
        curval = self.data[n]
        #print(f" pos={n}:{curval} with heap {self}")
        if self.pos_exists(lc):
            if self.pos_exists(rc):
                lcval = self.data[lc]
                rcval = self.data[rc]
                #print(f"node {n}:{curval} with left: {lc}:{lcval} and  right: {rc}:{rcval}")
                if lcval > curval or rcval > curval:
                    if lcval > rcval:
                        #print("switch with left")
                        self.switch_node(n, lc)
                        self.trickle_down(lc)
                    else:
                        #print("switch with right")
                        self.switch_node(n, rc)
                        self.trickle_down(rc)
            else:
                lcval = self.data[lc]
                #print(f"node {n}:{curval} with left: {lc}:{lcval}")
                if lcval > curval:
                    #print("switch with left")
                    self.switch_node(n, lc)
                    self.trickle_down(lc)

    def trickle_up(self, n):
        if n > 0:
            p = Heap.parent(n)
            pval = self.data[p]
            curval = self.data[n]
            if pval < curval:
                self.switch_node(p,n)
                self.trickle_up(p)

    def add(self, x):
        self.data.append(x)
        self.trickle_up(len(self.data) - 1)

    def max(self):
        self.data[0]

    def pop_max(self):
        m = self.data[0]
        self.data[0] = self.data[-1]
        del self.data[-1]
        if len(self.data) > 0:
            self.trickle_down(0)
        return m

    def check_heap(self, pos):
        v = self.data[pos]
        lc = Heap.left_child(pos)
        rc = Heap.right_child(pos)
        #print(f"check {pos} of {len(self)} -> [{lc},{rc}]")
        if self.pos_exists(lc):
            lv = self.data[lc]
            if v < lv:
                #print(f"left child is {lv} of node with {v}@{pos}")
                return False
            self.check_heap(lc)
        if self.pos_exists(rc):
            rv = self.data[rc]
            if v < rv:
                #print(f"right child is {rv} of node with {v}@{pos}")
                return False
            self.check_heap(rc)
        return True

    def __bool__(self):
        return len(self.data) > 0

    def __len__(self):
        return len(self.data)

    def __repr__(self):
        return repr(self.data)
import random
for _ in range(10):
    h = Heap()
    for _ in range(10):
        h.add(random.randrange(100))
  • inserting all elements of a list and then popping all elements returns the elements in reverse order
h = Heap()
for _ in range(10):
    h.add(random.randrange(100))
while h:
    print(h.pop_max())

88 81 73 71 59 36 22 20 6 0

  • we can use this to sort a list:
h = Heap()
l = [4,2,7,16,254,43,34,23]
for i in l:
    h.add(i)
s = [None] * len(l)
for i in range(-1, -1 * len(l) - 1, -1):
    s[i] = h.pop_max()
s
[2, 4, 7, 16, 23, 34, 43, 254]

Run-time Analysis

  • max - O(1)
  • add - O(log n) (height of a full binary tree is log(n))
  • pop_max - O(log n) (height of a full binary tree is log(n))

4. Heapsort

  • use a heap for sorting as discussed above
def heapsort(iterable):
    heap = Heap()
    for i in iterable:
        heap.add(i)
    s = [None] * len(heap)
    for i in range(-1, -1 * len(s) - 1, -1):
        s[i] = heap.pop_max()
    return s
import random

def pairs(iterable):
    it = iter(iterable)
    a = next(it)
    try:
        while True:
            b = next(it)
            yield a,b
            a = b
    except:
        pass

lst = heapsort(random.random() for _ in range(1000))
print(all((a <= b) for a, b in pairs(lst)))

True

import timeit
def time_heapsort(n):
    return timeit.timeit('heapsort(rlst)',
                           'from __main__ import heapsort; '
                           'import random; '
                           'rlst = (random.random() for _ in range({}))'.format(n),
                           number=1000)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.show()
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

ns = np.linspace(100, 10000, 50, dtype=np.int_)
plt.plot(ns, [time_heapsort(n) for n in ns], 'r+')
plt.plot(ns, ns*np.log2(ns)*0.01/10000, 'b') # O(n log n) plot
plt.show()

heapsort in place

  • build the heap bottom up:
    • the last element is a heap
    • until the whole array is a heap:
      • expand the heap by one element to the left
      • this element is the new root and we use trickle_down (sift_down) to restore the heap property
    • then until the heap is empty, remove the root of the heap (pop_max) at place it one element beyond the end of the current heap
def swap(lst,l,r):
    lst[l], lst[r] = lst[r], lst[l]

def heapsort_inplace(lst):
    heapify(lst)
    print(f"\nfinal heap: {lst}\n")
    for i in range(len(lst) -1, -1, -1):
        swap(lst,i,0)
        sift_down(lst,0,i-1)
        print(f"pop and insert at {i}: heap: {lst[0:i]} sorted suffix {lst[i:len(lst)]}")


def heapify(lst):
    for i in range(len(lst) -1,-1,-1):
        sift_down(lst,i,len(lst) - 1)
        print(f"heapified {i} to {len(lst) - 1}: {lst[0:i]} * {lst[i:len(lst)]}")

def sift_down(lst,start,end):
    root = start

    while Heap.left_child(root) <= end:
        child = Heap.left_child(root)
        swp = root

        if lst[swp] < lst[child]: # left child larger
            swp = child
        if child+1 <= end and lst[swp] < lst[child+1]: # right child larger than left or root
            swp = child + 1
        if root == swp:
            return
        swap(lst,root,swp)
        root = swp
l = [5,123,54,1,23,4,5123,99,123,432,555]
heapsort_inplace(l)
print(f"\nsorted results: {l}")

heapified 10 to 10: [5, 123, 54, 1, 23, 4, 5123, 99, 123, 432] * [555] heapified 9 to 10: [5, 123, 54, 1, 23, 4, 5123, 99, 123] * [432, 555] heapified 8 to 10: [5, 123, 54, 1, 23, 4, 5123, 99] * [123, 432, 555] heapified 7 to 10: [5, 123, 54, 1, 23, 4, 5123] * [99, 123, 432, 555] heapified 6 to 10: [5, 123, 54, 1, 23, 4] * [5123, 99, 123, 432, 555] heapified 5 to 10: [5, 123, 54, 1, 23] * [4, 5123, 99, 123, 432, 555] heapified 4 to 10: [5, 123, 54, 1] * [555, 4, 5123, 99, 123, 432, 23] heapified 3 to 10: [5, 123, 54] * [123, 555, 4, 5123, 99, 1, 432, 23] heapified 2 to 10: [5, 123] * [5123, 123, 555, 4, 54, 99, 1, 432, 23] heapified 1 to 10: [5] * [555, 5123, 123, 432, 4, 54, 99, 1, 123, 23] heapified 0 to 10: [] * [5123, 555, 54, 123, 432, 4, 5, 99, 1, 123, 23]

final heap: [5123, 555, 54, 123, 432, 4, 5, 99, 1, 123, 23]

pop and insert at 10: heap: [555, 432, 54, 123, 123, 4, 5, 99, 1, 23] sorted suffix [5123] pop and insert at 9: heap: [432, 123, 54, 99, 123, 4, 5, 23, 1] sorted suffix [555, 5123] pop and insert at 8: heap: [123, 123, 54, 99, 1, 4, 5, 23] sorted suffix [432, 555, 5123] pop and insert at 7: heap: [123, 99, 54, 23, 1, 4, 5] sorted suffix [123, 432, 555, 5123] pop and insert at 6: heap: [99, 23, 54, 5, 1, 4] sorted suffix [123, 123, 432, 555, 5123] pop and insert at 5: heap: [54, 23, 4, 5, 1] sorted suffix [99, 123, 123, 432, 555, 5123] pop and insert at 4: heap: [23, 5, 4, 1] sorted suffix [54, 99, 123, 123, 432, 555, 5123] pop and insert at 3: heap: [5, 1, 4] sorted suffix [23, 54, 99, 123, 123, 432, 555, 5123] pop and insert at 2: heap: [4, 1] sorted suffix [5, 23, 54, 99, 123, 123, 432, 555, 5123] pop and insert at 1: heap: [1] sorted suffix [4, 5, 23, 54, 99, 123, 123, 432, 555, 5123] pop and insert at 0: heap: [] sorted suffix [1, 4, 5, 23, 54, 99, 123, 123, 432, 555, 5123]

sorted results: [1, 4, 5, 23, 54, 99, 123, 123, 432, 555, 5123]

Last updated on Wednesday, April 14, 2021
Published on Monday, March 22, 2021
 Edit on GitHub