-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReservoirSampling.py
36 lines (25 loc) · 1.04 KB
/
ReservoirSampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import random
class ReservoirSampling(object):
# choose K elements from a unknown length of data stream,
# where each element has a k/N probability to enter the selected reservoir
# N is the length of the current data stream.
# Assume N >> k
def __init__(self, K):
self.reservoir = []
self.sizeK = K
self.N = 0
# a new element coming from the data stream
# this new element has k/n probability entering the reservoir
def get_next_element(self, newEle):
# add first k elements into the reservoir
if len(self.reservoir) < self.sizeK:
self.reservoir.append(newEle)
else:
# for every new element,put this element into the reservoir with prob = k/N
# choose a integer from [0,N-1]
j = random.randint(0,self.N-1)
if j < self.sizeK:
self.reservoir[j] = newEle
self.N += 1