Skip to content

Commit

Permalink
Rotating bloom (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
barrust authored Nov 12, 2018
1 parent 022ef22 commit d313ebc
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 23 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# PyProbables Changelog

### Version 0.2.6
* Bloom Filters:
* Addition of a Rotating Bloom Filter

### Version 0.2.5
* Bloom Filters:
* Addition of an Expanding Bloom Filter

### Version 0.2.0
* Use __slots__

Expand Down
7 changes: 7 additions & 0 deletions docs/source/code.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ ExpandingBloomFilter
.. autoclass:: probables.ExpandingBloomFilter
:members:

RotatingBloomFilter
+++++++++++++++++++++++++++++++

.. autoclass:: probables.RotatingBloomFilter
:members:
:inherited-members:

CountingBloomFilter
+++++++++++++++++++++++++++++++

Expand Down
11 changes: 11 additions & 0 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ determine the number of elements that will be added.
At this time, it is not possible to import or export an **Expanding Bloom
Filter** but that is a planned feature.

Rotating Bloom Filter
"""""""""""""""""""""""""""""""""""""""""""""""

The **Rotating Bloom Filter** is a specialized version of the standard
Bloom Filter that rolls of earlier entries into the filter as they become more
stale. The popping of the queue can be done either programmatically or
automatically.

At this time, it is not possible to import or export an **Expanding Bloom
Filter** but that is a planned feature.


Counting Bloom Filter
"""""""""""""""""""""""""""""""""""""""""""""""
Expand Down
6 changes: 3 additions & 3 deletions probables/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
''' pyprobables module '''
from __future__ import (unicode_literals, absolute_import, print_function)
from . blooms import (BloomFilter, BloomFilterOnDisk, CountingBloomFilter,
ExpandingBloomFilter)
ExpandingBloomFilter, RotatingBloomFilter)
from . countminsketch import (CountMinSketch, HeavyHitters, StreamThreshold,
CountMeanSketch, CountMeanMinSketch)
from . cuckoo import (CuckooFilter, CountingCuckooFilter)
Expand All @@ -12,7 +12,7 @@
__maintainer__ = 'Tyler Barrus'
__email__ = '[email protected]'
__license__ = 'MIT'
__version__ = '0.2.5'
__version__ = '0.2.6'
__credits__ = []
__url__ = 'https://github.com/barrust/pyprobables'
__bugtrack_url__ = 'https://github.com/barrust/pyprobables/issues'
Expand All @@ -22,4 +22,4 @@
'HeavyHitters', 'StreamThreshold', 'CuckooFilter',
'CountingCuckooFilter', 'InitializationError', 'NotSupportedError',
'ProbablesBaseException', 'CuckooFilterFullError',
'ExpandingBloomFilter']
'ExpandingBloomFilter', 'RotatingBloomFilter']
4 changes: 2 additions & 2 deletions probables/blooms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from . bloom import (BloomFilter, BloomFilterOnDisk)
from . countingbloom import (CountingBloomFilter)
from . expandingbloom import (ExpandingBloomFilter)
from . expandingbloom import (ExpandingBloomFilter, RotatingBloomFilter)

__all__ = ['BloomFilter', 'BloomFilterOnDisk', 'CountingBloomFilter',
'ExpandingBloomFilter']
'ExpandingBloomFilter', 'RotatingBloomFilter']
112 changes: 99 additions & 13 deletions probables/blooms/expandingbloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class ExpandingBloomFilter(object):
At this point, the expanding Bloom Filter does not support \
`export` or `import` '''

__slots__ = ['_blooms', '__fpr', '__est_elements', '__hash_func',
'__added_elements']

def __init__(self, est_elements=None, false_positive_rate=None,
hash_function=None):
''' initialize '''
Expand Down Expand Up @@ -62,16 +65,6 @@ def elements_added(self):
''' int: The total number of elements added '''
return self.__added_elements

def __add_bloom_filter(self):
''' build a new bloom and add it on! '''
blm = BloomFilter(self.__est_elements, self.__fpr, self.__hash_func)
self._blooms.append(blm)

def __check_for_growth(self):
''' detereming if the bloom filter should automatically grow '''
if self._blooms[-1].elements_added >= self.__est_elements:
self.__add_bloom_filter()

def check(self, key):
''' Check to see if the key is in the Bloom Filter
Expand Down Expand Up @@ -103,8 +96,8 @@ def add(self, key, force=False):
Args:
key (str): The element to be inserted
force (bool): `True` will force it to be inserted, even if it \
likely has been inserted before \
`False` will only insert if not found in the Bloom Filter '''
likely has been inserted before `False` will \
only insert if not found in the Bloom Filter '''
hashes = self._blooms[0].hashes(key)
self.add_alt(hashes, force)

Expand All @@ -115,8 +108,101 @@ def add_alt(self, hashes, force=False):
hashes (list): A list of integers representing the key to insert
force (bool): `True` will force it to be inserted, even if \
it likely has been inserted before \
`False` will only insert if not found in the Bloom Filter '''
`False` will only insert if not found in the \
Bloom Filter '''
self.__added_elements += 1
if force or not self.check_alt(hashes):
self.__check_for_growth()
self._blooms[-1].add_alt(hashes)

def __add_bloom_filter(self):
''' build a new bloom and add it on! '''
blm = BloomFilter(est_elements=self.__est_elements,
false_positive_rate=self.__fpr,
hash_function=self.__hash_func)
self._blooms.append(blm)

def __check_for_growth(self):
''' detereming if the bloom filter should automatically grow '''
if self._blooms[-1].elements_added >= self.__est_elements:
self.__add_bloom_filter()


class RotatingBloomFilter(ExpandingBloomFilter):
''' Simple Rotating Bloom Filter implementation that allows for the "older"
elements added to be removed, in chunks. As the queue fills up, those
elements inserted earlier will be bulk removed. This also provides the
user with the oportunity to force the removal instead of it being time
based.
Args:
est_elements (int): The number of estimated elements to be added
false_positive_rate (float): The desired false positive rate
max_queue_size (int): This is the number is used to determine the \
maximum number of Bloom Filters. Total elements added is based on \
`max_queue_size * est_elements`
hash_function (function): Hashing strategy function to use \
`hf(key, number)`
'''
__slots__ = ['_blooms', '__fpr', '__est_elements', '__hash_func',
'__added_elements', '_queue_size']

def __init__(self, est_elements=None, false_positive_rate=None,
max_queue_size=10, hash_function=None):
''' initialize '''
super(RotatingBloomFilter,
self).__init__(est_elements=est_elements,
false_positive_rate=false_positive_rate,
hash_function=hash_function)
self.__fpr = false_positive_rate
self.__est_elements = est_elements
self.__hash_func = hash_function
self._queue_size = max_queue_size
self.__added_elements = 0

@property
def max_queue_size(self):
''' int: The maximum size for the queue '''
return self._queue_size

@property
def current_queue_size(self):
''' int: The current size of the queue '''
return len(self._blooms)

def add_alt(self, hashes, force=False):
''' Add the element represented by hashes into the Bloom Filter
Args:
hashes (list): A list of integers representing the key to insert
force (bool): `True` will force it to be inserted, even if \
it likely has been inserted before \
`False` will only insert if not found in the \
Bloom Filter '''
self.__added_elements += 1
if force or not self.check_alt(hashes):
self.__rotate_bloom_filter()
self._blooms[-1].add_alt(hashes)

def pop(self):
''' Pop an element off of the queue '''
self.__rotate_bloom_filter(force=True)

def __rotate_bloom_filter(self, force=False):
''' handle determining if/when the Bloom Filter queue needs to be
rotated '''
blm = self._blooms[-1]
ready_to_rotate = blm.elements_added == blm.estimated_elements
neeeds_to_pop = self.current_queue_size < self._queue_size
if force or (ready_to_rotate and neeeds_to_pop):
self.__add_bloom_filter()
elif force or ready_to_rotate:
blm = self._blooms.pop(0)
self.__add_bloom_filter()

def __add_bloom_filter(self):
''' build a new bloom and add it on! '''
blm = BloomFilter(est_elements=self.__est_elements,
false_positive_rate=self.__fpr,
hash_function=self.__hash_func)
self._blooms.append(blm)
8 changes: 4 additions & 4 deletions tests/cuckoo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,23 @@ def test_cuckoo_filter_fing_size(self):
''' test bad fingerprint size < 1 '''
def runner():
''' runner '''
cko = CuckooFilter(capacity=100, bucket_size=2, finger_size=0)
CuckooFilter(capacity=100, bucket_size=2, finger_size=0)

self.assertRaises(ValueError, runner)

def test_cuckoo_filter_fing_size_2(self):
''' test bad fingerprint size > 4 '''
def runner():
''' runner '''
cko = CuckooFilter(capacity=100, bucket_size=2, finger_size=5)
CuckooFilter(capacity=100, bucket_size=2, finger_size=5)

self.assertRaises(ValueError, runner)

def test_cuckoo_filter_fing_size_3(self):
''' test valid fingerprint size '''
try:
cko = CuckooFilter(capacity=100, bucket_size=2, finger_size=1)
except:
CuckooFilter(capacity=100, bucket_size=2, finger_size=1)
except ValueError:
self.assertEqual(True, False)
self.assertEqual(True, True)

Expand Down
45 changes: 44 additions & 1 deletion tests/expandingbloom_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
''' Unittest class '''
from __future__ import (unicode_literals, absolute_import, print_function)
import unittest
from probables import (ExpandingBloomFilter)
from probables import (ExpandingBloomFilter, RotatingBloomFilter)

class TestExpandingBloomFilter(unittest.TestCase):

Expand Down Expand Up @@ -54,3 +54,46 @@ def test_ebf_contains(self):
self.assertEqual('this is another test' in blm, True)
self.assertEqual('this is yet another test' in blm, False)
self.assertEqual('this is not another test' in blm, False)


class TestRotatingBloomFilter(unittest.TestCase):

def test_rbf_init(self):
''' test the initialization of an rotating bloom filter '''
blm = RotatingBloomFilter(est_elements=10, false_positive_rate=0.05,
max_queue_size=10)
self.assertEqual(blm.expansions, 0)
self.assertEqual(blm.max_queue_size, 10)

def test_rfb_rotate(self):
''' test that the bloom filter rotates the first bloom off the stack '''
blm = RotatingBloomFilter(est_elements=10, false_positive_rate=0.05,
max_queue_size=5)
self.assertEqual(blm.expansions, 0)
blm.add('test')
self.assertEqual(blm.expansions, 0)
for i in range(10):
blm.add('{}'.format(i), force=True)
self.assertEqual(blm.expansions, 1)
self.assertEqual(blm.current_queue_size, 2)
self.assertEqual(blm.check('test'), True)

for i in range(10, 20):
blm.add('{}'.format(i), force=True)
self.assertEqual(blm.check('test'), True)
self.assertEqual(blm.current_queue_size, 3)

for i in range(20, 30):
blm.add('{}'.format(i), force=True)
self.assertEqual(blm.check('test'), True)
self.assertEqual(blm.current_queue_size, 4)

for i in range(30, 40):
blm.add('{}'.format(i), force=True)
self.assertEqual(blm.check('test'), True)
self.assertEqual(blm.current_queue_size, 5)

for i in range(40, 50):
blm.add('{}'.format(i), force=True)
self.assertEqual(blm.check('test'), False) # it should roll off
self.assertEqual(blm.current_queue_size, 5)

0 comments on commit d313ebc

Please sign in to comment.