Skip to content

Commit

Permalink
HyperLogLog: add warning for when estimate is close to error correcti…
Browse files Browse the repository at this point in the history
…on threshold (#92)

HyperLogLog: add warning for when estimate is close to error correction threshold
  • Loading branch information
KPJoshi authored and ekzhu committed May 31, 2019
1 parent cd90e35 commit 8a27de9
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion datasketch/hyperloglog.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import struct, copy
import numpy as np
import warnings
try:
from .hyperloglog_const import _thresholds, _raw_estimate, _bias
except ImportError:
Expand Down Expand Up @@ -133,7 +134,11 @@ def count(self):
# Use HyperLogLog estimation function
e = self.alpha * float(self.m ** 2) / np.sum(2.0**(-self.reg))
# Small range correction
if e <= (5.0 / 2.0) * self.m:
small_range_threshold = (5.0 / 2.0) * self.m
if abs(e-small_range_threshold)/small_range_threshold < 0.15:
warnings.warn(("Warning: estimate is close to error correction threshold. "
+"Output may not satisfy HyperLogLog accuracy guarantee."))
if e <= small_range_threshold:
num_zero = self.m - np.count_nonzero(self.reg)
return self._linearcounting(num_zero)
# Normal range, no correction
Expand Down

0 comments on commit 8a27de9

Please sign in to comment.