Skip to content

Commit

Permalink
Ensure basename is always a bytestring
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Holland committed Jun 6, 2019
1 parent 8b8c46d commit 6451757
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions datasketch/lsh.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ def _integration(f, a, b):
# For when no scipy installed
integrate = _integration


def _ensure_bytestring(bytes_or_str):
if isinstance(bytes_or_str, str):
return bytes_or_str.encode('utf-8')
elif isinstance(bytes_or_str, bytes):
return bytes_or_str
else:
raise ValueError("basename must be either bytes or string type")

def _false_positive_probability(threshold, b, r):
_probability = lambda s : 1 - (1 - s**float(r))**float(b)
a, err = integrate(_probability, 0.0, threshold)
Expand Down Expand Up @@ -115,12 +122,12 @@ def __init__(self, threshold=0.9, num_perm=128, weights=(0.5, 0.5),

self.prepickle = storage_config['type'] == 'redis' if prepickle is None else prepickle

basename = storage_config.get('basename', _random_name(11))
basename = _ensure_bytestring(storage_config.get('basename', _random_name(11)))
self.hashtables = [
unordered_storage(storage_config, name=b''.join([bytes(basename, 'utf-8'), b'_bucket_', bytes([i])]))
unordered_storage(storage_config, name=b''.join([basename, b'_bucket_', bytes([i])]))
for i in range(self.b)]
self.hashranges = [(i*self.r, (i+1)*self.r) for i in range(self.b)]
self.keys = ordered_storage(storage_config, name=b''.join([bytes(basename, 'utf-8'), b'_keys']))
self.keys = ordered_storage(storage_config, name=b''.join([basename, b'_keys']))

@property
def buffer_size(self):
Expand Down

0 comments on commit 6451757

Please sign in to comment.