0%

Top K out of N numbers

Prerequisite

  1. What is a binary-heap
  2. It takes O(N) to heapify an array contains N integer

N*Log(K)

1
2
3
4
5
6
7
def get_k_smallest(k: int, arr: List[int]):
heap = []
for num in arr:
heapq.heappush(heap, -num) # we push -num as we want to build a max-heap
if len(heap) > k:
heapq.heappop(heap)
return [-num for num in heap]

K*Log(N)

1
2
3
def get_k_smallest(k: int, arr: List[int]):
heapq.heapify(arr)
return [heapq.heappop(arr) for i in range(k)]

K*Log(K)

1
2
3
4
5
6
7
8
9
10
def get_k_smallest(k: int, arr: List[int]):
heapq.heapify(arr)
heap = [(arr[0], 0)]
ret = []
for _ in range(k):
num, idx = heapq.heappop(heap)
ret.append(num)
heapq.heappush(heap, (arr[idx*2+1], idx*2+1))
heapq.heappush(heap, (arr[idx*2+2], idx*2+2))
return ret

Test

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import heapq
import timeit


def timer(function):
def new_function(*args, **kw):
start_time = timeit.default_timer()
ret = function(*args, **kw)
elapsed = timeit.default_timer() - start_time
print('Function "{name}" took {time} seconds to complete.'.format(name=function.__name__, time=elapsed))
return ret
return new_function

@timer
def addition():
total = 0
for i in range(0,1000000):
total += i
return total



@timer
def get_k_smallest1(k: int, arr: List[int]):
heap = []
for num in arr:
heapq.heappush(heap, -num) # we push -num as we want to build a max-heap
if len(heap) > k:
heapq.heappop(heap)
return [-num for num in heap]


@timer
def get_k_smallest2(k: int, arr: List[int]):
heapq.heapify(arr)
return [heapq.heappop(arr) for i in range(k)]


@timer
def get_k_smallest3(k: int, arr: List[int]):
heapq.heapify(arr)
heap = [(arr[0], 0)]
ret = []
for _ in range(k):
num, idx = heapq.heappop(heap)
ret.append(num)
heapq.heappush(heap, (arr[idx*2+1], idx*2+1))
heapq.heappush(heap, (arr[idx*2+2], idx*2+2))
return ret

k = 5
arr = [randrange(0, 100000000) for i in range(10000000)]

ans = set(sorted(arr)[:k])
assert ans == set(get_k_smallest1(k, list(arr)))
assert ans == set(get_k_smallest2(k, list(arr)))
assert ans == set(get_k_smallest3(k, list(arr)))

'''
ยป /usr/local/opt/python@3.8/bin/python3.8 /Users/yongcao/Program/tmp/py.py yongs-MBP
Function "get_k_smallest1" took 3.168419736999999 seconds to complete.
Function "get_k_smallest2" took 0.3814774750000005 seconds to complete.
Function "get_k_smallest3" took 0.37742361599999974 seconds to complete.
'''