Basic Data Structures
Stack
class StackError(Exception):
def __init__(self, message):
super().__init__(message)
class Stack:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def push(self, item):
self.items.append(item)
def pop(self):
return self.items.pop()
def peek(self):
if self.isEmpty():
raise StackError(f"peek cannot deal with empty stack.")
else:
return self.items[-1]
def size(self):
return len(self.items)
Queue
class Queue:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def enqueue(self, item):
self.items.append(item)
def dequeue(self):
return self.items.pop(0)
def size(self):
return len(self.items)
Notice: When I use enqueue()
, the time
complexity is \(O(1)\); When I use
dequeue()
, the time complexity is \(O(n)\).
If I want both of them to be \(O(1)\), then use
collections.deque
from collections import deque
queue = deque()
queue.append('item1')
queue.append('item2')
first_item = queue.popleft()
print(first_item) # item1
print(queue) # deque(['item2'])
Deque
class Deque:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def addFront(self, item):
self.items.insert(0, item)
def addRear(self, item):
self.items.append(item)
def removeFront(self):
return self.items.pop(0)
def removeRear(self):
return self.items.pop()
def size(self):
return len(self.items)
Notice: When I use addFront()
or
removeFront()
, the time complexity is \(O(1)\); When I use addRear()
or removeRear()
, the time complexity is \(O(n)\).
Binary tree
Construct a binary tree.
class BinaryTree:
def __init__(self, rootObj):
self.key = rootObj # String
self.left = None # BinaryTree
self.right = None # BinaryTree
self.parent = None # BinaryTree
def insert_left(self, newNode):
if self.left is None:
node = BinaryTree(newNode)
self.left = node
node.parent = self
else:
node = BinaryTree(newNode)
node.left = self.left
self.left.parent = node
self.left = node
node.parent = self
def insert_right(self, newKey):
if self.right is None:
node = BinaryTree(newKey)
self.right = node
node.parent = self
else:
node = BinaryTree(newKey)
node.left = self.right
self.right.parent = node
self.right = node
node.parent = self
Pre-order traversal, In-order traversal, Post-order traversal.
def traverse_tree(node, traverse_type, key_list=None):
if key_list is None:
key_list = []
if node is None:
return
if traverse_type == 'pre': # pre-order traversal
key_list.append(node.key)
traverse_tree(node.left, traverse_type, key_list)
traverse_tree(node.right, traverse_type, key_list)
elif traverse_type == 'in': # in-order traversal
traverse_tree(node.left, traverse_type, key_list)
key_list.append(node.key)
traverse_tree(node.right, traverse_type, key_list)
elif traverse_type == 'post': # post-order traversal
traverse_tree(node.left, traverse_type, key_list)
traverse_tree(node.right, traverse_type, key_list)
key_list.append(node.key)
return key_list
Binary heap
Values increase from root to leaves.
class BinaryHeap:
def __init__(self):
self.heapList = [0]
self.size = 0
def perc_up(self, i):
while i // 2 > 0:
if self.heapList[i] < self.heapList[i//2]:
self.heapList[i], self.heapList[i//2] = self.heapList[i//2], self.heapList[i]
i = i // 2
else:
break
def perc_down(self, i): # i is the index of heapList
while i * 2 <= self.size:
if i * 2 + 1 > self.size:
target_index = i * 2
else:
if self.heapList[i * 2] < self.heapList[i * 2 + 1]:
target_index = i * 2
else:
target_index = i * 2 + 1
if self.heapList[i] > self.heapList[target_index]:
self.heapList[i], self.heapList[target_index] = self.heapList[target_index], self.heapList[i]
i = target_index
else:
break
def insert(self, item):
self.heapList.append(item)
self.size += 1
self.perc_up(self.size)
def build_heap(self, alist):
self.heapList = [0] + alist
self.size = len(alist)
i = self.size // 2
while i > 0: # heapList[1] = alist[0]
self.perc_down(i)
i -= 1
def pop_min(self):
min_value = self.heapList[1]
self.heapList[1] = self.heapList.pop()
self.size -= 1
self.perc_down(1)
return min_value
create a binary heap(build_heap()
)
\(n\) is the array’s length including the initial \(0\).
Time complexity for building a binary heap(worst case): \(O(n)\) \[ \begin{aligned} &2^0\cdot(\log_2^n-1)+2^1\cdot(\log_2^n-2)+\cdots+2^{\log_2^n-2}\cdot\big{(}\log_2^n-(\log_2^n-1)\big{)}\\ &=log_2^n\cdot(2^0+2^1+\cdots+2^{\log_2^n-2})-\big{(}1\cdot2^0+2\cdot2^1+3\cdot2^2+\cdots+(\log_2^n-1)\cdot2^{\log_2^n-2}\big{)}\\ &=n-\log_2^n-1 \end{aligned} \]
myHeap = BinaryHeap()
myHeap.build_heap([10, 9, 7, 8, 3, 4, 1, 7, 1, 1, 2])
retrieve the min value(pop_min()
)
Time complexity for popping all values: \(O(n\log_2^n)\)
print(myHeap.pop_min(), myHeap.pop_min(), myHeap.pop_min(), myHeap.pop_min())
## 1 1 1 2
Graph-Prim
Prim算法用于找到一个有权无向图的最小生成树。
输入:起始节点、邻接矩阵
输出:依次加入生成树的的节点
# Prim Algorithm
M = float('inf')
nodeSet = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
distanceMatrix = [[0, 28, M, M, M, 10, M],
[28, 0, 16, M, M, M, 14],
[M, 16, 0, 12, M, M, M],
[M, M, 12, 0, 22, M, 18],
[M, M, M, 22, 0, 25, 24],
[10, M, M, M, 25, 0, M],
[M, 14, M, 18, 24, M, 0]]
def prim(nodeSet: list, distanceMatrix: list, initNode: str) -> list:
# 真实值转化为索引
nodeSetIndex = list(range(len(nodeSet)))
initNodeIndex = nodeSet.index(initNode)
setU = [initNodeIndex]
setV = {}
for nodeIndex in nodeSetIndex:
if nodeIndex != initNodeIndex:
setV[nodeIndex] = distanceMatrix[initNodeIndex][nodeIndex]
while len(setU) < len(nodeSet):
closestNodeIndex = min(setV, key=setV.get)
setU.append(closestNodeIndex)
setV.pop(closestNodeIndex)
for nodeIndex in setV:
if distanceMatrix[closestNodeIndex][nodeIndex] < setV[nodeIndex]:
setV[nodeIndex] = distanceMatrix[closestNodeIndex][nodeIndex]
return [nodeSet[nodeIndex] for nodeIndex in setU] # 索引转化为真实值
print(prim(nodeSet, distanceMatrix, 'A'))
## ['A', 'F', 'E', 'D', 'C', 'B', 'G']
Graph-Dijkstra
Dijkstra算法用于找到一个图中起始节点到其他所有节点的最短路径。
输入:起始节点、邻接矩阵
输出:记录与其他所有节点最短路径长度的数组、各个节点途径点数组
# Dijkstra Algorithm
M = float('inf')
nodeSet = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
distanceMatrix = [[0, 28, M, M, M, 10, M],
[28, 0, 16, M, M, M, 14],
[M, 12, 0, 12, M, M, M],
[M, M, 16, 0, 22, M, 18],
[M, M, M, 22, 0, 25, 24],
[10, M, M, M, 25, 0, M],
[M, 14, M, 18, 24, M, 0]]
def dijskstra(nodeSet: list, distanceMatrix: list, initNode: str) -> tuple:
# 真实值转化为索引
nodeSetIndex = list(range(len(nodeSet)))
initNodeIndex = nodeSet.index(initNode)
# 初始化必要数据结构
setU = [initNodeIndex]
setV = {} # nodeIndex: (distance from initNodeIndex to nodeIndex)
for nodeIndex in nodeSetIndex:
if nodeIndex != initNodeIndex:
setV[nodeIndex] = distanceMatrix[initNodeIndex][nodeIndex]
distance, path = [0] * len(nodeSetIndex), [None] * len(nodeSetIndex)
for index, dist in enumerate(distanceMatrix[initNodeIndex]):
if dist < M and dist != 0:
distance[index] = dist
path[index] = initNodeIndex
while len(setU) < len(nodeSetIndex):
closestNodeIndex = min(setV, key=setV.get) # 最近节点索引
closestDistance = setV[closestNodeIndex] # 最近节点对应的距离
if closestDistance >= M:
break
distance[closestNodeIndex] = closestDistance
setU.append(closestNodeIndex)
setV.pop(closestNodeIndex)
# 更新setV、记录途径点
for nodeIndex in setV:
distViaCloestNode = closestDistance + distanceMatrix[closestNodeIndex][nodeIndex]
if (distViaCloestNode < setV[nodeIndex]):
# 如果,以上述最近节点为中间节点,比到待选节点的已记录的最短距离更近
setV[nodeIndex] = distViaCloestNode
path[nodeIndex] = closestNodeIndex
return distance, [nodeSet[nodeIndex] if nodeIndex is not None else None for nodeIndex in path] # 索引转化为真实值
distance, path = dijskstra(nodeSet, distanceMatrix, 'A')
print(distance, path)
## [0, 28, 44, 56, 35, 10, 42] [None, 'A', 'B', 'C', 'F', 'A', 'B']
Search And Sort
Bubble sort
\(n\): length of array
Time complexity: \(O(n^2)\)
One exchange needs three assignment operations.
def bubble_sort(nums):
sortcompleted = False
i = 0
while i < len(nums) - 1 and not sortcompleted:
sortcompleted = True
for j in range(0, len(nums)-1-i):
if nums[j] > nums[j+1]:
nums[j], nums[j+1] = nums[j+1], nums[j]
sortcompleted = False
i += 1
return nums
Insertion sort
\(n\): length of array
Time complexity: \(O(n^2)\)
Without exchange operation and less assignment operations, it is faster than Bubble Sort.
def intertion_sort(nums):
for i in range(len(nums)-1):
j = i
temp = nums[j+1]
while temp < nums[j] and j >= 0:
nums[j+1] = nums[j]
j -= 1
nums[j+1] = temp
return nums
Merge sort
pass sliced array into function
There will be \(\log_2^n\) binary split. In each split, there will be \(n\) times for array slice and \(n\) times for sort.
Time complexity: \(O\big{(}(n+n)log_2^n\big{)}=O(n\log_2^n)\)
Space complexity: \(O(n+\frac{n}{2^1}+\frac{n}{2^2}+\cdots+1)=O(2n-1)\)
def merge(left_array, right_array):
merged_array = []
i, j = 0, 0
while i < len(left_array) and j < len(right_array):
if left_array[i] < right_array[j]:
merged_array.append(left_array[i])
i += 1
else:
merged_array.append(right_array[j])
j += 1
if i < len(left_array):
merged_array.extend(left_array[i:])
elif j < len(right_array):
merged_array.extend(right_array[j:])
return merged_array
def merge_sort(nums):
if len(nums) == 1:
return nums
left_array = merge_sort(nums[:len(nums) // 2])
right_array = merge_sort(nums[len(nums) // 2:])
sorted_merged_array = merge(left_array, right_array)
return sorted_merged_array
pass array’s index into function
Time complexity: \(O\big{(}nlog_2^n\big{)}=O(n\log_2^n)\) reduce \(n\) times for slicing array.
Space complexity: \(O(n+2\log_2^n)=O(n)\) only pass \(2\) indices into recursion function.
def merge(left_array, right_array):
merged_array = []
i, j = 0, 0
while i < len(left_array) and j < len(right_array):
if left_array[i] < right_array[j]:
merged_array.append(left_array[i])
i += 1
else:
merged_array.append(right_array[j])
j += 1
if i < len(left_array):
merged_array.extend(left_array[i:])
elif j < len(right_array):
merged_array.extend(right_array[j:])
return merged_array
def merge_sort(nums, begin, end):
if end - begin == 0:
return [nums[begin]]
left_array = merge_sort(nums, begin, (begin+end) // 2)
right_array = merge_sort(nums, (begin+end) // 2 + 1, end)
sorted_merged_array = merge(left_array, right_array)
return sorted_merged_array
nums = [4, 1, 6, 0, 9, 1, 1, 1, -1]
sorted_result = merge_sort(nums, 0, len(nums)-1)
Basic Problems
Binary search for ordered sequence
no duplication
Binary search for ordered sequence without duplicate elements.
def binary_search(nums, target):
"""
:nums: ordered number sequence without duplicate elements
:target: target number
:return: target number index or -1
"""
length_nums = len(nums)
begin, end = 0, length_nums-1
while begin <= end:
mid = (end + begin) // 2
if target == nums[mid]:
return mid # find it
elif target == nums[begin]:
return begin # find it
elif target == nums[end]:
return end # find it
elif nums[begin] < target < nums[mid]:
end = mid - 1
continue
elif nums[mid] < target < nums[end]:
begin = mid + 1
continue
else: # target > num[-1] or target < num[0]
return -1
return -1
nums = [1, 2, 4, 5, 6, 7]
nums = [3]
target_index = binary_search(nums, 2)
print(target_index)
\(n\): length of array
Time complexity: \(O(\log_2^n)\)
Space complexity: \(O(1)\)
no duplication (recursion)
Binary search for ordered sequence without duplicate elements.
def binary_search(nums, target):
if len(nums) == 0:
return False
else:
mid = len(nums) // 2
if nums[mid] == target:
return True
elif nums[mid] > target:
return binary_search(nums[:mid], target)
else:
return binary_search(nums[mid+1:], target)
\(n\): length of array
Time complexity: \(O(\log_2^n)\)
Worst-case space complexity: \(O(n+\frac{n}{2}+\frac{n}{4}+\cdots+1)=O(n)\)
duplication
Binary search for ordered sequence with duplicate elements.
\(n\): length of array
\(k\): the max length of cluster
- If \(n\) is enormous and \(k\) is a constant: Use
binary search
to find target number index and then traverse around the cluster.
\[O(\log_2^n+k)=O(\log_2^n)\]
def binary_search(nums, target):
"""
:nums: ordered number sequence without duplicate elements
:target: target number
:return: target number index or -1
"""
length_nums = len(nums)
begin, end = 0, length_nums-1
while begin <= end:
mid = (end + begin) // 2
if target == nums[mid]:
left, right = mid, mid
while nums[left] == target:
left -= 1
while nums[right] == target:
right += 1
return left+1, right-1
elif target == nums[begin]:
left, right = begin, begin
while nums[right] == target:
right += 1
return left, right-1
elif target == nums[end]:
left, right = end, end
while nums[left] == target:
left -= 1
return left+1, right
elif nums[begin] < target < nums[mid]:
end = mid - 1
continue
elif nums[mid] < target < nums[end]:
begin = mid + 1
continue
else: # target > num[-1] or target < num[0]
return -1
return -1
- If \(n\) and \(k\) are both enormous: Use
binary search
to find target number index and then usebinary search
once more (or twice more) to find the two ends of a cluster.
\[O(\log_2^n+\log_2^k)\]
def find_left_right_index(begin, mid, end, target):
"""
Property1: All target numbers are between num[begin] and num[end]
"""
left_index, right_index = None, None
if nums[mid] == target:
left, right = begin, mid
while left <= right:
m = (left + right) // 2
if nums[m] < target:
left = m + 1
else:
if m-1 == -1 or nums[m-1] != target:
left_index = m
break
else:
right = m - 1
left, right = mid, end
while left <= right:
m = (left + right) // 2
if nums[m] > target:
right = m - 1
else:
if m+1 == len(nums) or nums[m+1] != target:
right_index = m
break
else:
left = m + 1
return left_index, right_index
elif nums[begin] == target:
left, right = begin, mid
while left <= right:
m = (left + right) // 2
if nums[m] > target:
right = m - 1
else:
if m+1 == len(nums) or nums[m+1] != target:
right_index = m
break
else:
left = m + 1
return begin, right_index
elif nums[end] == target:
left, right = mid, end
while left <= right:
m = (left + right) // 2
if nums[m] < target:
left = m + 1
else:
if m-1 == -1 or nums[m-1] != target:
left_index = m
break
else:
right = m - 1
return left_index, end
def binary_search(nums, target):
"""
:nums: ordered number sequence without duplicate elements
:target: target number
:return: target number index or -1
"""
length_nums = len(nums)
begin, end = 0, length_nums-1
while begin <= end:
mid = (end + begin) // 2
if target == nums[mid]:
return find_left_right_index(begin, mid, end, target)
elif target == nums[begin]:
return find_left_right_index(begin, mid, end, target)
elif target == nums[end]:
return find_left_right_index(begin, mid, end, target)
elif nums[begin] < target < nums[mid]:
end = mid - 1
continue
elif nums[mid] < target < nums[end]:
begin = mid + 1
continue
else: # target > nums[-1] or target < nums[0]
return -1
return -1
Find the only duplicate or the only unique number
Find the only duplicate number
slow pointer moves by one point every time. Set \(s_1\) be the distance traveled by slow pointer.
fast pointer moves by two points every time. Set \(s_2\) be the distance traveled by fast pointer.
Set \(s\) be the distance from the beginning point to the loop beginning point.
Two pointers move from the begin point at the same time, and they will finally encounter at the point in the loop. Set \(l\) be the distance from the loop beginning point to the meeting point. And set \(K\in N\).
Therefore,
\[s_1=s+K\cdot L+l\]
\[\frac{s_2-s_1}{L}=\frac{s_1}{L}=K+\frac{s+l}{L}\in N^*\]
Let one of the pointers go back to the beginning point and the other stay still. Then both of the pointers move by one point every time, then are bound to meet at loop beginning point.
任务:
设有\(n+1\)个元素,每个元素的值是位于\(1\)到\(n\)之间的连续的整数。这些元素中,仅有一个值会出现重复元素。找出这个重复值。
将元素值设为索引,可以将该数组变成一个链表,既然有重复数字,那必然可以形成一个环。
def findDepulicate(nums):
# 慢指针p1和快指针p2位于起始点,p1每次走一步,p2每次走两步,直至相遇。
# p1和p2分别表示列表中的索引
p1, p2 = 0, 0
while True:
p1 = nums[p1]
p2 = nums[nums[p2]]
if p1 == p2:
break
# 慢指针p1回到起始点,此后p1和p2每次都只走一步,直到再次在循环起始点相遇
# 相遇点的索引即为重复的元素
p1 = 0
while True:
p1 = nums[p1]
p2 = nums[p2]
if p1 == p2:
return p1
print(findDepulicate([2, 4, 3, 1, 3]))
## 3
Find the only unique numbers
异或运算:两二进制数的相同位次上的数相同则得0,相异则得1。
异或运算性质:
\(a \oplus a=0\)
\(a \oplus 0=a\)
满足交换律
所以:\[a \oplus b \oplus c \oplus a \oplus c=(a \oplus a) \oplus (c \oplus c) \oplus b=b\]
任务:
给定一个数组,数组中除了一个元素有奇数次重复外,其他所有元素均为偶数次重复。找到这个奇数次重复的数。
def findUnique(nums):
if len(nums) == 1:
return nums[0]
else:
target_num = nums[0]
for i in range(1, len(nums)):
target_num ^= nums[i]
return target_num
nums = [1, 3, 5, 7, 3, 5, 0, 0, 1]
print(findUnique(nums))
## 7
DP
0/1 package problem
The storage complexity of this method is \(O(NM)\), where \(N\) is the number of input item and the \(M\ (M\leq\text{capacity})\) is the Volume.
def knapsack(values, weights, capacity):
n = len(values)
# 初始化DP表,大小为(n+1) x (capacity+1),全部填充为0
dp = [[0 for _ in range(capacity + 1)] for _ in range(n + 1)]
# 填充DP表
for i in range(1, n + 1):
for w in range(1, capacity + 1):
# 如果当前物品i的重量大于当前容量w,则不装入背包
if weights[i-1] > w:
dp[i][w] = dp[i-1][w]
else:
# 选择不装入或装入背包中,取价值较大的
dp[i][w] = max(dp[i-1][w], dp[i-1][w-weights[i-1]] + values[i-1])
# 返回DP表最后一个元素,即为背包能装下的所有物品的最大价值
return dp[n][capacity]
# 示例
weights = [2, 3, 6, 5]
values = [6, 3, 5, 4]
capacity = 9
print(knapsack(values, weights, capacity))
## 11
0/1 package problem with less storage complexity
The storage complexity of this optimized method is \(O(M)\), where the \(M\ (M\leq\text{capacity})\) is the Volume.
def knapsack(values, weights, capacity):
N = len(values)
dp = [0] * (capacity + 1)
for i in range(N):
for j in range(capacity, 0, -1):
if weights[i] > j:
# 放不下
break
else:
# 能放下
dp[j] = max(dp[j - weights[i]] + values[i], dp[j])
return dp[-1]
# 示例
weights = [2, 3, 6, 5]
values = [6, 3, 5, 4]
capacity = 9
print(knapsack(values, weights, capacity))
## 11