Language Features I

Start in JupyterHub

list/set/dict comprehensions

# see BAS2
def find_numbers_in_interval(numbers, lower, upper):
    out = []
    for number in numbers:
        if lower <= number <= upper:
            out.append(number)
    return out
def find_numbers_in_interval(numbers, lower, upper):
    return [n for n in numbers if lower <= n <= upper]
# see BAS2
def clip(numbers, limit):
    out = []
    for n in numbers:
        if n > limit:
            out.append(limit)
        else:
            out.append(n)
    return out
def clip(numbers, limit):
    return [limit if n > limit else n for n in numbers]
def clip(numbers, limit):
    return [min(n, limit) for n in numbers]
def batches(elements, batch_size):
    return [elements[ofst:ofst+batch_size] for ofst in range(0, len(elements), batch_size)]
# see BAS2
def remove_units(values):
    out = []
    # this is difficult to read
    for i in range(len(values)):
        if values[i][1] == "m":
            out.append(values[i][0] * 1000)
        elif values[i][1] == "cm":
            out.append(values[i][0] * 10)
        else:
            out.append(values[i][0])
    return out
# using list comprehensions still does not make it readable
def remove_units(values):
    return [value * 1000 if unit == "m" else value * 10 if unit == "cm" else value for value, unit in values]
# use a dict for factor lookup! ("separation of concerns")
CONVERSION_FACTORS = {
    "m":  1000,
    "cm":   10
}

# note that units other than "m" and "cm" default to a factor of 1
def remove_units(values):
    return [CONVERSION_FACTORS.get(unit, 1) * value for value, unit in values]
# this version raises an exception for unknown units
CONVERSION_FACTORS = {
    "m":  1000,
    "cm":   10,
    "mm":    1
}

def remove_units(values):
    return [CONVERSION_FACTORS[unit] * value for value, unit in values]
[i + j for i in range(5) for j in range(5)]
[0, 1, 2, 3, 4, 1, 2, 3, 4, 5, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 4, 5, 6, 7, 8]
# set comprehensions filter duplicates, but do not retain element order
{i + j for i in range(5) for j in range(5)}
{0, 1, 2, 3, 4, 5, 6, 7, 8}

Exhibit: Why Coding Conventions Matter

The following code neglects all style conventions. See if you can figure out what this function is supposed to do!

def this_could_have_been_a_good_name(b,a):
    i,res=0,[]
    while i<len(b):
        for e in a:
            if e     >=0: res += [b [i]**e]
        i+=1
    return res

This function can be cleaned up significantly by using more appropriate variable names and by rewriting the loops as a list comprehension.

I recommend to take a look at black too.

def cross_powers_with_nonnegative_exponent(bases, exponents):
    return [b ** e for b in bases for e in exponents if e >= 0]

For better reusablility, move the filter operation to a different function.

def cross_powers(bases, exponents):
    return [b ** e for b in bases for e in exponents]

def cross_powers_with_nonnegative_exponent(bases, exponents):
    nonnegative_exponents = [e for e in exponents if e >= 0]
    return cross_powers(bases, nonnegative_exponents)

Exceptions and Assertions

# OPTION 1: No check, treat upper < lower as an empty interval
def find_numbers_in_interval(numbers, lower, upper):
    return [n for n in numbers if lower <= n <= upper]
# OPTION 2: Explicit check
def find_numbers_in_interval(numbers, lower, upper):
    if upper < lower:
        raise ValueError("Lower bound cannot be larger than upper bound!")
    return [n for n in numbers if lower <= n <= upper]
# OPTION 3: Assume user is familiar with the interface, check only if debug mode is not disabled
def find_numbers_in_interval(numbers, lower, upper):
    assert lower <= upper, "Lower bound cannot be larger than upper bound!"
    return [n for n in numbers if lower <= n <= upper]

Use assert whenever you can. assert will not be evaluated in optimized mode, so there is no performance penalty.

Also see https://en.wikipedia.org/wiki/Currying

Assignment LF1

  1. Einheiten entfernen (schwer) Schreiben Sie eine Funktion, die eine Liste aus 2-Tupeln aus einer Zahl und einem String nimmt. Die Zahl bezeichnet eine Temperatur und der String ist entweder “C”, “F”, oder “K” und stellt die Einheit der Zahl dar. Erzeugen Sie daraus eine Liste, die alle Temperaturen in Kelvin enthält.

# see LF1
def remove_units(values):
    out = []
    for value, unit in values:
        if unit == "C":
            out.append(value + 273.15)
        elif unit == "F":
            res = (value + 459.67) * 5 / 9
            out.append(res)
        else:
            out.append(value)
    return out
# conversion factors do not work this time, but conversion functions work!
CONVERTERS = {
    "C": lambda val: val + 273.15,
    "F": lambda val: (val + 459.67) * 5 / 9,
    "K": lambda val: val
}

def remove_units(values):
    return [CONVERTERS[unit](value) for value, unit in values]

Binary Heap with Comparison Function

Can we turn the MinHeap into a MaxHeap? Can we generalize to arbitrary comparison functions similar to what max is doing?

# simplest solution: wrap every entry and invert comparison
class InvertedCompareWrapper:
    def __init__(self, wrapped):
        self.wrapped = wrapped
    
    def __lt__(self, other):
        return other.wrapped < self.wrapped
    
    def __repr__(self):
        return f"InvertedCompareWrapper({self.wrapped!r})"
class MinHeap:
    # never use mutable default parameters such as lists
    def __init__(self, iterable=None):
        self._buffer = []
        if iterable is None:
            return
        for elem in iterable:
            self.add(elem)
            
    # def _sift_up(self, idx):
    #     if idx == 0:
    #         return
    #     parent = (idx - 1) // 2
    #     if self._buffer[parent] < self._buffer[idx]:
    #         return
    #     self._swap(idx, parent)
    #     self._sift_up(parent)
    
    # _sift_up und _sift_down are implementation details
    # and not part of the public interface
    # this is why we prefix them with "_"
    def _sift_up(self, idx):
        # prefer this over a recursive solution because of stack overflows
        while idx > 0:
            parent = (idx - 1) // 2
            # this requires only __lt__ to be defined!
            if self._buffer[parent] < self._buffer[idx]:
                break
            self._swap(idx, parent)
            idx = parent
            
    # def _sift_down(self, idx):
    #     left_child = 2 * idx + 1
    #     if left_child >= len(self._buffer):
    #         return
    #     smaller_child = left_child
    #     right_child = left_child + 1
    #     if right_child < len(self._buffer):
    #         if self._buffer[right_child] < self._buffer[left_child]:
    #             smaller_child = right_child
    #     if self._buffer[smaller_child] < self._buffer[idx]:
    #         self._swap(smaller_child, idx)
    #         self._sift_down(smaller_child)
    
    def _sift_down(self, idx):
        while True:
            left_child = idx * 2 + 1
            if left_child >= len(self._buffer):
                break
            smaller_child = left_child
            right_child = left_child + 1
            if right_child < len(self._buffer):
                if self._buffer[right_child] < self._buffer[left_child]:
                    smaller_child = right_child
            if self._buffer[smaller_child] < self._buffer[idx]:
                self._swap(idx, smaller_child)
            else:
                break

    # this is used multiple times in different places, so use a separate function
    def _swap(self, i, j):
        self._buffer[i], self._buffer[j] = self._buffer[j], self._buffer[i]
    
    def add(self, value):
        self._buffer.append(value)
        last_index = len(self._buffer) - 1
        self._sift_up(last_index)
    
    def extract_min(self):
        if not self._buffer:
            raise IndexError("Extract from empty heap")
        minimum = self._buffer[0]
        self._swap(0, -1)
        self._buffer.pop()
        self._sift_down(0)
        return minimum
    
    def __len__(self):
        return len(self._buffer)
    
    def __bool__(self):
        return bool(self._buffer)
    
    # for an implementation of __eq__ und __ne__
    # see the standard library chapter
    
    # heaps are not indexable, so do not implement this
    #def __getitem__(self, key):
    #    return NotImplemented
    
    # solution to assignment
    def __str__(self):
        return repr(self)
    
    # solution to assignment
    def __repr__(self):
        # return f"<MinHeap with {len(self)} entries>"
        # this is only possible if MinHeap provides an appropriate constructor
        return f"MinHeap({self._buffer!r})"
h = MinHeap()
h.add(2)
h.add(1)
h.extract_min()
1
h = MinHeap()
h.add(InvertedCompareWrapper(1))
h.add(InvertedCompareWrapper(2))
h.extract_min().wrapped
2

Another alternative solution would be to add a reversed boolean flag. However, the mess of branches in every single comparison would be unbearable! Instead, use an object-oriented approach.

from collections import Counter

class Heap:
    def __init__(self, iterable=None):
        self._buffer = []
        if iterable is None:
            return
        for elem in iterable:
            self.add(elem)
            
    def _is_smaller(self, lhs, rhs):
        return lhs < rhs
    
    def _sift_up(self, idx):
        while idx > 0:
            parent = (idx - 1) // 2
            if not self._is_smaller(self._buffer[idx], self._buffer[parent]):
                break
            self._swap(idx, parent)
            idx = parent
    
    def _sift_down(self, idx):
        while True:
            left_child = idx * 2 + 1
            # do not use _is_smaller here since we are comparing indices
            if left_child >= len(self._buffer):
                break
            smaller_child = left_child
            right_child = left_child + 1
            # do not use _is_smaller here since we are comparing indices
            if right_child < len(self._buffer):
                if self._is_smaller(self._buffer[right_child], self._buffer[left_child]):
                    smaller_child = right_child
            if self._is_smaller(self._buffer[smaller_child], self._buffer[idx]):
                self._swap(idx, smaller_child)
            else:
                break
                
    # NO CHANGES BELOW THIS LINE
    # THIS IS A CONSEQUENCE OF SEPARATING CONCERNS EARLY ON

    def _swap(self, i, j):
        self._buffer[i], self._buffer[j] = self._buffer[j], self._buffer[i]
    
    def add(self, value):
        self._buffer.append(value)
        last_index = len(self._buffer) - 1
        self._sift_up(last_index)
    
    def extract_min(self):
        if not self._buffer:
            raise IndexError("Extract from empty heap")
        minimum = self._buffer[0]
        self._swap(0, -1)
        self._buffer.pop()
        self._sift_down(0)
        return minimum
    
    def __len__(self):
        return len(self._buffer)
    
    def __bool__(self):
        return bool(self._buffer)

    def __eq__(self, other):
        return Counter(self._buffer) == Counter(other._buffer)
    
    def __ne__(self, other):
        return not self.__eq__(other)
    
    def __str__(self):
        return repr(self)
    
    def __repr__(self):
        return f"MinHeap({self._buffer!r})"

This is the only instance where inheritance is used in this course. Python uses duck-typing, so there is less demand for class hierarchies than in traditional object-oriented languages. We use inheritance exclusively for replacing methods in subclasses.

User-defined comparison can also be implemented by passing a comparison function to the constructor. This makes implementing __repr__ very difficult since functions cannot be converted to their original code representation.

# From Object Oriented Programming

import operator

from collections import Counter

class Heap:
    # you can also use a lambda function here
    # is_smaller=lambda lhs, rhs: lhs < rhs
    def __init__(self, iterable=None, *, is_smaller=operator.lt):
        self._buffer = []
        self._is_smaller = is_smaller
        if iterable is None:
            return
        for elem in iterable:
            self.add(elem)
    
    # NO CHANGES BELOW THIS LINE
    
    def _sift_up(self, idx):
        while idx > 0:
            parent = (idx - 1) // 2
            if not self._is_smaller(self._buffer[idx], self._buffer[parent]):
                break
            self._swap(idx, parent)
            idx = parent
    
    def _sift_down(self, idx):
        while True:
            left_child = idx * 2 + 1
            if left_child >= len(self._buffer):
                break
            smaller_child = left_child
            right_child = left_child + 1
            # wieso wird das nicht ersetzt
            if right_child < len(self._buffer):
                if self._is_smaller(self._buffer[right_child], self._buffer[left_child]):
                    smaller_child = right_child
            if self._is_smaller(self._buffer[smaller_child], self._buffer[idx]):
                self._swap(idx, smaller_child)
            else:
                break
                
    def _swap(self, i, j):
        self._buffer[i], self._buffer[j] = self._buffer[j], self._buffer[i]
    
    def add(self, value):
        self._buffer.append(value)
        last_index = len(self._buffer) - 1
        self._sift_up(last_index)
    
    def extract_min(self):
        if not self._buffer:
            raise IndexError("Extract from empty heap")
        minimum = self._buffer[0]
        self._swap(0, -1)
        # minimum = self._buffer.pop()
        self._buffer.pop()
        self._sift_down(0)
        return minimum
    
    def __len__(self):
        return len(self._buffer)
    
    def __bool__(self):
        return bool(self._buffer)
    
    # two heaps are equal if they contain the same elements the same number of times
    # see collections section
    def __eq__(self, other):
        return Counter(self._buffer) == Counter(other._buffer)
    
    def __ne__(self, other):
        return not self.__eq__(other)
    
    def __str__(self):
        return repr(self)
    
    # cannot represent self._compare as string
    def __repr__(self):
        return f"<binary min-heap with user-defined comparator and entries {self._buffer}>"

Assignment LF2

  1. Heap mit Key-Funktion Schreiben Sie eine Variante des Heaps, der eine Key-Funktion nimmt, um den zu vergleichenden Wert zu bestimmen, ähnlich wie max.

class KeyedMinHeap(MinHeap):
    def __init__(self, iterable=None, *, key=lambda x: x):
        super().__init__(iterable, is_smaller=lambda x, y: key(x) < key(y))

Closures

Python functions can capture surrounding variables when they are defined and access these variables later during execution.

See https://en.wikipedia.org/wiki/Universal_hashing for details on the type of hash function we are using here.

import random

# generate hash function using a function factory
# "a" and "b" are captured by the inner function
def random_universal_hash(p, m):
    assert p >= m
    a = random.randrange(1, p)
    b = random.randrange(0, p)
    def hasher(x):
        return (a * x + b) % p % m
    return hasher
hashers = [random_universal_hash(43, 20) for _ in range(20)]
[h(5) for h in hashers]
[11, 14, 2, 11, 7, 1, 7, 5, 11, 13, 1, 14, 2, 2, 2, 12, 17, 4, 5, 14]
# hash functions do not show a and b in their string representation
hashers
[<function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>,
 <function __main__.random_universal_hash.<locals>.hasher(x)>]
# closures can even modify the captured variables
def make_counter():
    count = 0
    def counter():
        nonlocal count
        temp = count
        count += 1
        return temp
    return counter
counter = make_counter()
second_counter = make_counter()
counter()
0
second_counter()
0

Duck Typing

import random

from collections import namedtuple


# define a class to fix the string representation problem
# note that namedtuple also defines comparison operators - even if it makes no sense here
class UniversalHashFunction(namedtuple('UniversalHashFunction', ['a', 'b', 'p', 'm'])):
    def __str__(self):
        return f"({self.a} * x + {self.b}) % {self.p} % {self.m}"
    
    # this makes any instance of UniversalHashFunction behave like a function
    def __call__(self, x):
        return (self.a * x + self.b) % self.p % self.m


def random_universal_hash(p, m):
    assert p >= m
    a = random.randrange(1, p)
    b = random.randrange(0, p)
    return UniversalHashFunction(a, b, p, m)
hashers = [random_universal_hash(43, 20) for _ in range(20)]
# nice programmer-friendly representation
hashers
[UniversalHashFunction(a=9, b=14, p=43, m=20),
 UniversalHashFunction(a=38, b=38, p=43, m=20),
 UniversalHashFunction(a=4, b=34, p=43, m=20),
 UniversalHashFunction(a=28, b=28, p=43, m=20),
 UniversalHashFunction(a=20, b=16, p=43, m=20),
 UniversalHashFunction(a=40, b=24, p=43, m=20),
 UniversalHashFunction(a=23, b=14, p=43, m=20),
 UniversalHashFunction(a=35, b=39, p=43, m=20),
 UniversalHashFunction(a=42, b=0, p=43, m=20),
 UniversalHashFunction(a=35, b=39, p=43, m=20),
 UniversalHashFunction(a=42, b=21, p=43, m=20),
 UniversalHashFunction(a=34, b=29, p=43, m=20),
 UniversalHashFunction(a=10, b=30, p=43, m=20),
 UniversalHashFunction(a=22, b=42, p=43, m=20),
 UniversalHashFunction(a=10, b=5, p=43, m=20),
 UniversalHashFunction(a=17, b=0, p=43, m=20),
 UniversalHashFunction(a=40, b=27, p=43, m=20),
 UniversalHashFunction(a=31, b=6, p=43, m=20),
 UniversalHashFunction(a=34, b=3, p=43, m=20),
 UniversalHashFunction(a=26, b=5, p=43, m=20)]
# nice human-readable representation
print(hashers[0])
(9 * x + 14) % 43 % 20
[h(5) for h in hashers]
[16, 13, 11, 19, 10, 9, 0, 2, 18, 2, 16, 7, 17, 3, 12, 2, 12, 12, 1, 6]
strings = [
    "gorilla",
    "mouse",
    "leopard",
    "zebra",
    "fox"
]

# strings support comparison the same way numbers do
find_numbers_in_interval(strings, "g", "l")
['gorilla']

Assignment LF3 🧑‍🏫

  1. n closest🧑‍🏫 Schreiben Sie eine Funktion, die eine Zahl x, eine Zahl n und eine Liste von Zahlen nimmt und aus der Liste n Zahlen heraussucht, die betragsmäßig am nähesten an x liegen. nclosest(x=4.5, n=3, candidates=[2, 3, 4, 6, 8]) -> [3, 4, 6]

def nclosest(x, n, candidates):
    # this would have been more elegant if we used a key function
    compare = lambda lhs, rhs: abs(lhs - x) < abs(rhs - x)
    heap = Heap(candidates, is_smaller=compare)
    return [heap.extract_min() for _ in range(n)]
import heapq

def nclosest(x, n, candidates):
    # use the standard library whenever possible
    return heapq.nsmallest(n, candidates, key=lambda c: abs(c - x))
nclosest(x=4.5, n=3, candidates=[2, 3, 4, 6, 8])
[4, 3, 6]

Bonus Assignment LF4 🧑‍🏫

  1. BoundedHeap🧑‍🏫 Modifizieren Sie eine der hier vorgestellen Heap-Varianten so, dass maximal k Elemente auf dem Heap liegen können. Die Zahl k kann im Konstruktor übergeben werden.

# From Object Oriented Programming

import operator

from collections import Counter

class Heap:
    # you can also use a lambda function here
    # is_smaller=lambda lhs, rhs: lhs < rhs
    def __init__(self, iterable=None, *, is_smaller=operator.lt, k=None):
        self._buffer = []
        self._is_smaller = is_smaller
        self._maximum_number_of_elements = k
        if iterable is None:
            return
        for elem in iterable:
            self.add(elem)
    
    # NO CHANGES BELOW THIS LINE
    
    def _sift_up(self, idx):
        while idx > 0:
            parent = (idx - 1) // 2
            if not self._is_smaller(self._buffer[idx], self._buffer[parent]):
                break
            self._swap(idx, parent)
            idx = parent
    
    def _sift_down(self, idx):
        while True:
            left_child = idx * 2 + 1
            if left_child >= len(self._buffer):
                break
            smaller_child = left_child
            right_child = left_child + 1
            # wieso wird das nicht ersetzt
            if right_child < len(self._buffer):
                if self._is_smaller(self._buffer[right_child], self._buffer[left_child]):
                    smaller_child = right_child
            if self._is_smaller(self._buffer[smaller_child], self._buffer[idx]):
                self._swap(idx, smaller_child)
            else:
                break
                
    def _swap(self, i, j):
        self._buffer[i], self._buffer[j] = self._buffer[j], self._buffer[i]
    
    def add(self, value):
        if self._maximum_number_of_elements and self._maximum_number_of_elements <= len(self._buffer):
            raise ValueError("Out of Memory")
        self._buffer.append(value)
        last_index = len(self._buffer) - 1
        self._sift_up(last_index)
    
    def extract_min(self):
        if not self._buffer:
            raise IndexError("Extract from empty heap")
        minimum = self._buffer[0]
        self._swap(0, -1)
        # minimum = self._buffer.pop()
        self._buffer.pop()
        self._sift_down(0)
        return minimum
    
    def __len__(self):
        return len(self._buffer)
    
    def __bool__(self):
        return bool(self._buffer)
    
    # two heaps are equal if they contain the same elements the same number of times
    # see collections section
    def __eq__(self, other):
        return Counter(self._buffer) == Counter(other._buffer)
    
    def __ne__(self, other):
        return not self.__eq__(other)
    
    def __str__(self):
        return repr(self)
    
    # cannot represent self._compare as string
    def __repr__(self):
        return f"<binary min-heap with at most {self._maximum_number_of_elements} elements and user-defined comparator and entries {self._buffer}>"