diff --git a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java index 89639c289..068b03e55 100644 --- a/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java +++ b/src/main/java/com/clearspring/analytics/stream/cardinality/RegisterSet.java @@ -57,6 +57,7 @@ public static int getSizeForCount(int count) { } public void set(int position, int value) { + value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucketPos = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucketPos * LOG2_BITS_PER_WORD)); this.M[bucketPos] = (this.M[bucketPos] & ~(0x1f << shift)) | (value << shift); @@ -69,6 +70,7 @@ public int get(int position) { } public boolean updateIfGreater(int position, int value) { + value = value & 0x1F; //make sure value would not longger than REGISTER_SIZE int bucket = position / LOG2_BITS_PER_WORD; int shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD)); int mask = 0x1f << shift; diff --git a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java index a374b088a..b778fd902 100644 --- a/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java +++ b/src/main/java/com/clearspring/analytics/stream/membership/BloomFilter.java @@ -178,6 +178,17 @@ public static BloomFilter deserialize(byte[] bytes) { return filter; } + /** + * this method return the cardinality estimation of current bloom filter + * base on article "cardinality estimation for dynamic bloomfilter" + * */ + public long getCardinality(){ + int nhash = this.getHashCount(); + int mbits = this.buckets(); + + int bitsUsed = this.buckets() - this.emptyBuckets(); + return Math.round(1.0/nhash * Math.log(1.0-1.0*bitsUsed/mbits)/Math.log(1-1.0/mbits)); + } } class BloomFilterSerializer implements ICompactSerializer { diff --git a/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java b/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java index ee3c996b2..86fd4cee8 100644 --- a/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java +++ b/src/test/java/com/clearspring/analytics/stream/cardinality/RegisterSetTest.java @@ -28,7 +28,14 @@ public class RegisterSetTest { public void testGetAndSet() throws Exception { RegisterSet rs = new RegisterSet((int) Math.pow(2, 4)); rs.set(0, 11); + rs.set(1, 11); + rs.set(2, 11); assertEquals(11, rs.get(0)); + assertEquals(11, rs.get(1)); + assertEquals(11, rs.get(2)); + rs.set(0, 0xFF); + assertEquals(11, rs.get(1)); + assertEquals(11, rs.get(2)); } @Test diff --git a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java index 178d53c6a..09a75ef29 100644 --- a/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java +++ b/src/test/java/com/clearspring/analytics/stream/membership/BloomFilterTest.java @@ -27,6 +27,7 @@ import java.util.Set; import java.util.UUID; +import com.clearspring.analytics.TestUtils; import com.clearspring.analytics.stream.membership.KeyGenerator.RandomStringGenerator; import org.junit.Before; @@ -56,7 +57,15 @@ public BloomFilterTest() { public void clear() { bf.clear(); } - + @Test + public void testCardinality(){ + Random r = new Random(); + for(int i =0;i<7799;i++){ + String str = Integer.toHexString(r.nextInt()); + bf.add(str); + } + System.out.println(bf.getCardinality()); + } @Test public void testOne() { bf.add("a");