diff --git a/a6_include.py b/a6_include.py new file mode 100644 index 0000000..00da44d --- /dev/null +++ b/a6_include.py @@ -0,0 +1,221 @@ +# Course: CS261 - Data Structures +# Assignment: 6 +# Description: Provided data structures necessary to complete the assignment. +# Please look through this file carefully to see what methods +# are available and how they're implemented. +# Don't modify the contents of this file. + + +# -------------- Used by both HashMaps (SC & OA) -------------- # + +class DynamicArrayException(Exception): + pass + + +class DynamicArray: + """ + Class implementing a Dynamic Array + Supported methods are: + append, pop, swap, get_at_index, set_at_index, length + """ + + def __init__(self, arr=None) -> None: + """Initialize new dynamic array using a list.""" + self._data = arr.copy() if arr else [] + + def __iter__(self): + """ + Disable iterator capability for DynamicArray class + This means loops and aggregate functions like + those shown below won't work: + + da = DynamicArray() + for value in da: # will not work + min(da) # will not work + max(da) # will not work + sort(da) # will not work + """ + return None + + def __str__(self) -> str: + """Override string method to provide more readable output.""" + return str(self._data) + + def append(self, value: object) -> None: + """Add new element at the end of the array.""" + self._data.append(value) + + def pop(self): + """Remove element from end of the array and return it.""" + return self._data.pop() + + def swap(self, i: int, j: int) -> None: + """Swap two elements in array given their indices.""" + self._data[i], self._data[j] = self._data[j], self._data[i] + + def get_at_index(self, index: int): + """Return value of element at a given index.""" + if index < 0 or index >= self.length(): + raise DynamicArrayException + return self._data[index] + + def __getitem__(self, index: int): + """Return value of element at a given index using [] syntax.""" + return self.get_at_index(index) + + def set_at_index(self, index: int, value: object) -> None: + """Set value of element at a given index.""" + if index < 0 or index >= self.length(): + raise DynamicArrayException + self._data[index] = value + + def __setitem__(self, index: int, value: object) -> None: + """Set value of element at a given index using [] syntax.""" + self.set_at_index(index, value) + + def length(self) -> int: + """Return length of array.""" + return len(self._data) + + +def hash_function_1(key: str) -> int: + """Sample Hash function #1 to be used with HashMap implementation""" + hash = 0 + for letter in key: + hash += ord(letter) + return hash + + +def hash_function_2(key: str) -> int: + """Sample Hash function #2 to be used with HashMap implementation""" + hash, index = 0, 0 + index = 0 + for letter in key: + hash += (index + 1) * ord(letter) + index += 1 + return hash + + +# --------- For use in Separate Chaining (SC) HashMap --------- # + +class SLNode: + """ + Singly Linked List node for use in a hash map + """ + + def __init__(self, key: str, value: object, next: "SLNode" = None) -> None: + """Initialize node given a key and value.""" + self.key = key + self.value = value + self.next = next + + def __str__(self) -> str: + """Override string method to provide more readable output.""" + return '(' + str(self.key) + ': ' + str(self.value) + ')' + + +class LinkedListIterator: + """ + Separate iterator class for LinkedList + """ + + def __init__(self, current_node: SLNode) -> None: + """Initialize the iterator with a node.""" + self._node = current_node + + def __iter__(self) -> "LinkedListIterator": + """Return the iterator.""" + return self + + def __next__(self) -> SLNode: + """Obtain next node and advance iterator.""" + + if not self._node: + raise StopIteration + + current_node = self._node + self._node = self._node.next + return current_node + + +class LinkedList: + """ + Class implementing a Singly Linked List + Supported methods are: insert, remove, contains, length, iterator + """ + + def __init__(self) -> None: + """ + Initialize new linked list; + doesn't use a sentinel and keeps track of its size in a variable. + """ + self._head = None + self._size = 0 + + def __str__(self) -> str: + """Override string method to provide more readable output.""" + if not self._head: + return "SLL []" + + content = str(self._head) + node = self._head.next + while node: + content += ' -> ' + str(node) + node = node.next + return 'SLL [' + content + ']' + + def __iter__(self) -> LinkedListIterator: + """Return an iterator for the list, starting at the head.""" + return LinkedListIterator(self._head) + + def insert(self, key: str, value: object) -> None: + """Insert new node at front of the list.""" + self._head = SLNode(key, value, self._head) + self._size += 1 + + def remove(self, key: str) -> bool: + """ + Remove first node with matching key. + Return True if removal was successful, False otherwise. + """ + previous, node = None, self._head + while node: + + if node.key == key: + if previous: + previous.next = node.next + else: + self._head = node.next + self._size -= 1 + return True + + previous, node = node, node.next + return False + + def contains(self, key: str) -> SLNode: + """Return node with matching key, or None if no match""" + node = self._head + while node: + if node.key == key: + return node + node = node.next + return node + + def length(self) -> int: + """Return the length of the list.""" + return self._size + + +# ---------- For use in Open Addressing (OA) HashMap ---------- # + +class HashEntry: + + def __init__(self, key: str, value: object) -> None: + """Initialize an entry for use in a hash map.""" + self.key = key + self.value = value + self.is_tombstone = False + + def __str__(self) -> str: + """Override string method to provide more readable output.""" + return f"K: {self.key} V: {self.value} TS: {self.is_tombstone}" diff --git a/hash_map_oa.py b/hash_map_oa.py new file mode 100644 index 0000000..56f49cc --- /dev/null +++ b/hash_map_oa.py @@ -0,0 +1,303 @@ +# Name: Andrew Scott +# OSU Email: scottand@oregonstate.edu +# Course: CS261 - Data Structures +# Assignment: 6 +# Due Date: 2022-06-03 +# Description: HashMap implementation using Open Addressing with Quadratic +# Probing + + +from a6_include import DynamicArray, HashEntry, hash_function_1, hash_function_2 + + +class HashMap: + def __init__(self, capacity: int, function) -> None: + """ + Initialize new HashMap that uses + quadratic probing for collision resolution + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + self._buckets = DynamicArray() + for _ in range(capacity): + self._buckets.append(None) + + self._capacity = capacity + self._hash_function = function + self._size = 0 + + def __str__(self) -> str: + """ + Override string method to provide more readable output + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + out = "" + for i in range(self._buckets.length()): + out += str(i) + ": " + str(self._buckets[i]) + "\n" + return out + + def get_size(self) -> int: + """ + Return size of map + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + return self._size + + def get_capacity(self) -> int: + """ + Return capacity of map + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + return self._capacity + + # ------------------------------------------------------------------ # + + def put(self, key: str, value: object) -> None: + """ + TODO: Write this implementation + """ + # remember, if the load factor is greater than or equal to 0.5, + # resize the table before putting the new key/value pair + pass + + def table_load(self) -> float: + """ + TODO: Write this implementation + """ + pass + + def empty_buckets(self) -> int: + """ + TODO: Write this implementation + """ + pass + + def resize_table(self, new_capacity: int) -> None: + """ + TODO: Write this implementation + """ + # remember to rehash non-deleted entries into new table + pass + + def get(self, key: str) -> object: + """ + TODO: Write this implementation + """ + pass + + def contains_key(self, key: str) -> bool: + """ + TODO: Write this implementation + """ + pass + + def remove(self, key: str) -> None: + """ + TODO: Write this implementation + """ + pass + + def clear(self) -> None: + """ + TODO: Write this implementation + """ + pass + + def get_keys(self) -> DynamicArray: + """ + TODO: Write this implementation + """ + pass + + +# ------------------- BASIC TESTING ---------------------------------------- # + +if __name__ == "__main__": + + print("\nPDF - put example 1") + print("-------------------") + m = HashMap(50, hash_function_1) + for i in range(150): + m.put("str" + str(i), i * 100) + if i % 25 == 24: + print(m.empty_buckets(), m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - put example 2") + print("-------------------") + m = HashMap(40, hash_function_2) + for i in range(50): + m.put("str" + str(i // 3), i * 100) + if i % 10 == 9: + print(m.empty_buckets(), m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - table_load example 1") + print("--------------------------") + m = HashMap(100, hash_function_1) + print(m.table_load()) + m.put("key1", 10) + print(m.table_load()) + m.put("key2", 20) + print(m.table_load()) + m.put("key1", 30) + print(m.table_load()) + + print("\nPDF - table_load example 2") + print("--------------------------") + m = HashMap(50, hash_function_1) + for i in range(50): + m.put("key" + str(i), i * 100) + if i % 10 == 0: + print(m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - empty_buckets example 1") + print("-----------------------------") + m = HashMap(100, hash_function_1) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key1", 10) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key2", 20) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key1", 30) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key4", 40) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + + print("\nPDF - empty_buckets example 2") + print("-----------------------------") + m = HashMap(50, hash_function_1) + for i in range(150): + m.put("key" + str(i), i * 100) + if i % 30 == 0: + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + + print("\nPDF - resize example 1") + print("----------------------") + m = HashMap(20, hash_function_1) + m.put("key1", 10) + print(m.get_size(), m.get_capacity(), m.get("key1"), m.contains_key("key1")) + m.resize_table(30) + print(m.get_size(), m.get_capacity(), m.get("key1"), m.contains_key("key1")) + + print("\nPDF - resize example 2") + print("----------------------") + m = HashMap(75, hash_function_2) + keys = [i for i in range(1, 1000, 13)] + for key in keys: + m.put(str(key), key * 42) + print(m.get_size(), m.get_capacity()) + + for capacity in range(111, 1000, 117): + m.resize_table(capacity) + + if m.table_load() >= 0.5: + print( + "Check that capacity gets updated during resize(); " + "don't wait until the next put()" + ) + + m.put("some key", "some value") + result = m.contains_key("some key") + m.remove("some key") + + for key in keys: + # all inserted keys must be present + result &= m.contains_key(str(key)) + # NOT inserted keys must be absent + result &= not m.contains_key(str(key + 1)) + print( + capacity, result, m.get_size(), m.get_capacity(), round(m.table_load(), 2) + ) + + print("\nPDF - get example 1") + print("-------------------") + m = HashMap(30, hash_function_1) + print(m.get("key")) + m.put("key1", 10) + print(m.get("key1")) + + print("\nPDF - get example 2") + print("-------------------") + m = HashMap(150, hash_function_2) + for i in range(200, 300, 7): + m.put(str(i), i * 10) + print(m.get_size(), m.get_capacity()) + for i in range(200, 300, 21): + print(i, m.get(str(i)), m.get(str(i)) == i * 10) + print(i + 1, m.get(str(i + 1)), m.get(str(i + 1)) == (i + 1) * 10) + + print("\nPDF - contains_key example 1") + print("----------------------------") + m = HashMap(10, hash_function_1) + print(m.contains_key("key1")) + m.put("key1", 10) + m.put("key2", 20) + m.put("key3", 30) + print(m.contains_key("key1")) + print(m.contains_key("key4")) + print(m.contains_key("key2")) + print(m.contains_key("key3")) + m.remove("key3") + print(m.contains_key("key3")) + + print("\nPDF - contains_key example 2") + print("----------------------------") + m = HashMap(75, hash_function_2) + keys = [i for i in range(1, 1000, 20)] + for key in keys: + m.put(str(key), key * 42) + print(m.get_size(), m.get_capacity()) + result = True + for key in keys: + # all inserted keys must be present + result &= m.contains_key(str(key)) + # NOT inserted keys must be absent + result &= not m.contains_key(str(key + 1)) + print(result) + + print("\nPDF - remove example 1") + print("----------------------") + m = HashMap(50, hash_function_1) + print(m.get("key1")) + m.put("key1", 10) + print(m.get("key1")) + m.remove("key1") + print(m.get("key1")) + m.remove("key4") + + print("\nPDF - clear example 1") + print("---------------------") + m = HashMap(100, hash_function_1) + print(m.get_size(), m.get_capacity()) + m.put("key1", 10) + m.put("key2", 20) + m.put("key1", 30) + print(m.get_size(), m.get_capacity()) + m.clear() + print(m.get_size(), m.get_capacity()) + + print("\nPDF - clear example 2") + print("---------------------") + m = HashMap(50, hash_function_1) + print(m.get_size(), m.get_capacity()) + m.put("key1", 10) + print(m.get_size(), m.get_capacity()) + m.put("key2", 20) + print(m.get_size(), m.get_capacity()) + m.resize_table(100) + print(m.get_size(), m.get_capacity()) + m.clear() + print(m.get_size(), m.get_capacity()) + + print("\nPDF - get_keys example 1") + print("------------------------") + m = HashMap(10, hash_function_2) + for i in range(100, 200, 10): + m.put(str(i), str(i * 10)) + print(m.get_keys()) + + m.resize_table(1) + print(m.get_keys()) + + m.put("200", "2000") + m.remove("100") + m.resize_table(2) + print(m.get_keys()) diff --git a/hash_map_sc.py b/hash_map_sc.py new file mode 100644 index 0000000..daac4f1 --- /dev/null +++ b/hash_map_sc.py @@ -0,0 +1,334 @@ +# Name: Andrew Scott +# OSU Email: scottand@oregonstate.edu +# Course: CS261 - Data Structures +# Assignment: 6 +# Due Date: 2022-06-03 +# Description: HashMap implementation using Separate Chaining + + +from a6_include import DynamicArray, LinkedList, hash_function_1, hash_function_2 + + +class HashMap: + def __init__(self, capacity: int, function) -> None: + """ + Initialize new HashMap that uses + separate chaining for collision resolution + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + self._buckets = DynamicArray() + for _ in range(capacity): + self._buckets.append(LinkedList()) + + self._capacity = capacity + self._hash_function = function + self._size = 0 + + def __str__(self) -> str: + """ + Override string method to provide more readable output + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + out = "" + for i in range(self._buckets.length()): + out += str(i) + ": " + str(self._buckets[i]) + "\n" + return out + + def get_size(self) -> int: + """ + Return size of map + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + return self._size + + def get_capacity(self) -> int: + """ + Return capacity of map + DO NOT CHANGE THIS METHOD IN ANY WAY + """ + return self._capacity + + # ------------------------------------------------------------------ # + + def put(self, key: str, value: object) -> None: + """ + TODO: Write this implementation + """ + pass + + def empty_buckets(self) -> int: + """ + TODO: Write this implementation + """ + pass + + def table_load(self) -> float: + """ + TODO: Write this implementation + """ + pass + + def clear(self) -> None: + """ + TODO: Write this implementation + """ + pass + + def resize_table(self, new_capacity: int) -> None: + """ + TODO: Write this implementation + """ + pass + + def get(self, key: str) -> object: + """ + TODO: Write this implementation + """ + pass + + def contains_key(self, key: str) -> bool: + """ + TODO: Write this implementation + """ + pass + + def remove(self, key: str) -> None: + """ + TODO: Write this implementation + """ + pass + + def get_keys(self) -> DynamicArray: + """ + TODO: Write this implementation + """ + pass + + +def find_mode(da: DynamicArray) -> (DynamicArray, int): + """ + TODO: Write this implementation + """ + # if you'd like to use a hash map, + # use this instance of your Separate Chaining HashMap + map = HashMap(da.length() // 3, hash_function_1) + + +# ------------------- BASIC TESTING ---------------------------------------- # + +if __name__ == "__main__": + + print("\nPDF - put example 1") + print("-------------------") + m = HashMap(50, hash_function_1) + for i in range(150): + m.put("str" + str(i), i * 100) + if i % 25 == 24: + print(m.empty_buckets(), m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - put example 2") + print("-------------------") + m = HashMap(40, hash_function_2) + for i in range(50): + m.put("str" + str(i // 3), i * 100) + if i % 10 == 9: + print(m.empty_buckets(), m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - empty_buckets example 1") + print("-----------------------------") + m = HashMap(100, hash_function_1) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key1", 10) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key2", 20) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key1", 30) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + m.put("key4", 40) + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + + print("\nPDF - empty_buckets example 2") + print("-----------------------------") + m = HashMap(50, hash_function_1) + for i in range(150): + m.put("key" + str(i), i * 100) + if i % 30 == 0: + print(m.empty_buckets(), m.get_size(), m.get_capacity()) + + print("\nPDF - table_load example 1") + print("--------------------------") + m = HashMap(100, hash_function_1) + print(m.table_load()) + m.put("key1", 10) + print(m.table_load()) + m.put("key2", 20) + print(m.table_load()) + m.put("key1", 30) + print(m.table_load()) + + print("\nPDF - table_load example 2") + print("--------------------------") + m = HashMap(50, hash_function_1) + for i in range(50): + m.put("key" + str(i), i * 100) + if i % 10 == 0: + print(m.table_load(), m.get_size(), m.get_capacity()) + + print("\nPDF - clear example 1") + print("---------------------") + m = HashMap(100, hash_function_1) + print(m.get_size(), m.get_capacity()) + m.put("key1", 10) + m.put("key2", 20) + m.put("key1", 30) + print(m.get_size(), m.get_capacity()) + m.clear() + print(m.get_size(), m.get_capacity()) + + print("\nPDF - clear example 2") + print("---------------------") + m = HashMap(50, hash_function_1) + print(m.get_size(), m.get_capacity()) + m.put("key1", 10) + print(m.get_size(), m.get_capacity()) + m.put("key2", 20) + print(m.get_size(), m.get_capacity()) + m.resize_table(100) + print(m.get_size(), m.get_capacity()) + m.clear() + print(m.get_size(), m.get_capacity()) + + print("\nPDF - resize example 1") + print("----------------------") + m = HashMap(20, hash_function_1) + m.put("key1", 10) + print(m.get_size(), m.get_capacity(), m.get("key1"), m.contains_key("key1")) + m.resize_table(30) + print(m.get_size(), m.get_capacity(), m.get("key1"), m.contains_key("key1")) + + print("\nPDF - resize example 2") + print("----------------------") + m = HashMap(75, hash_function_2) + keys = [i for i in range(1, 1000, 13)] + for key in keys: + m.put(str(key), key * 42) + print(m.get_size(), m.get_capacity()) + + for capacity in range(111, 1000, 117): + m.resize_table(capacity) + + m.put("some key", "some value") + result = m.contains_key("some key") + m.remove("some key") + + for key in keys: + # all inserted keys must be present + result &= m.contains_key(str(key)) + # NOT inserted keys must be absent + result &= not m.contains_key(str(key + 1)) + print( + capacity, result, m.get_size(), m.get_capacity(), round(m.table_load(), 2) + ) + + print("\nPDF - get example 1") + print("-------------------") + m = HashMap(30, hash_function_1) + print(m.get("key")) + m.put("key1", 10) + print(m.get("key1")) + + print("\nPDF - get example 2") + print("-------------------") + m = HashMap(150, hash_function_2) + for i in range(200, 300, 7): + m.put(str(i), i * 10) + print(m.get_size(), m.get_capacity()) + for i in range(200, 300, 21): + print(i, m.get(str(i)), m.get(str(i)) == i * 10) + print(i + 1, m.get(str(i + 1)), m.get(str(i + 1)) == (i + 1) * 10) + + print("\nPDF - contains_key example 1") + print("----------------------------") + m = HashMap(10, hash_function_1) + print(m.contains_key("key1")) + m.put("key1", 10) + m.put("key2", 20) + m.put("key3", 30) + print(m.contains_key("key1")) + print(m.contains_key("key4")) + print(m.contains_key("key2")) + print(m.contains_key("key3")) + m.remove("key3") + print(m.contains_key("key3")) + + print("\nPDF - contains_key example 2") + print("----------------------------") + m = HashMap(75, hash_function_2) + keys = [i for i in range(1, 1000, 20)] + for key in keys: + m.put(str(key), key * 42) + print(m.get_size(), m.get_capacity()) + result = True + for key in keys: + # all inserted keys must be present + result &= m.contains_key(str(key)) + # NOT inserted keys must be absent + result &= not m.contains_key(str(key + 1)) + print(result) + + print("\nPDF - remove example 1") + print("----------------------") + m = HashMap(50, hash_function_1) + print(m.get("key1")) + m.put("key1", 10) + print(m.get("key1")) + m.remove("key1") + print(m.get("key1")) + m.remove("key4") + + print("\nPDF - get_keys example 1") + print("------------------------") + m = HashMap(10, hash_function_2) + for i in range(100, 200, 10): + m.put(str(i), str(i * 10)) + print(m.get_keys()) + + m.resize_table(1) + print(m.get_keys()) + + m.put("200", "2000") + m.remove("100") + m.resize_table(2) + print(m.get_keys()) + + print("\nPDF - find_mode example 1") + print("-----------------------------") + da = DynamicArray(["apple", "apple", "grape", "melon", "melon", "peach"]) + map = HashMap(da.length() // 3, hash_function_1) + mode, frequency = find_mode(da) + print(f"Input: {da}\nMode: {mode}, Frequency: {frequency}") + + print("\nPDF - find_mode example 2") + print("-----------------------------") + test_cases = ( + [ + "Arch", + "Manjaro", + "Manjaro", + "Mint", + "Mint", + "Mint", + "Ubuntu", + "Ubuntu", + "Ubuntu", + "Ubuntu", + ], + ["one", "two", "three", "four", "five"], + ["2", "4", "2", "6", "8", "4", "1", "3", "4", "5", "7", "3", "3", "2"], + ) + + for case in test_cases: + da = DynamicArray(case) + map = HashMap(da.length() // 3, hash_function_2) + mode, frequency = find_mode(da) + print(f"Input: {da}\nMode: {mode}, Frequency: {frequency}\n")