Skip to content

Commit

Permalink
Feature: Introducing a Cuckoo Filter for local snapshots (iotaledger#…
Browse files Browse the repository at this point in the history
…1100)

* Feat: Introducing the CuckooFilter for local snapshots

* Refactor: changed indentation
  • Loading branch information
Hans Moog authored and GalRogozinski committed Mar 26, 2019
1 parent d4f2a50 commit 32c3d06
Show file tree
Hide file tree
Showing 4 changed files with 1,019 additions and 0 deletions.
108 changes: 108 additions & 0 deletions src/main/java/com/iota/iri/utils/BitSetUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package com.iota.iri.utils;

import java.util.BitSet;

/**
* This class offers utility methods to transform BitSets into different data types.
*/
public class BitSetUtils {
/**
* This method converts a byte array to a {@link BitSet} of the given size ({@code sizeOfBitSet}) by copying the
* bits of every byte into the {@link BitSet} in reverse order (starting with the given {@code startOffset}.
*
* It first checks if the byte array is big enough to provide enough bits for the provided parameters and then
* starts the copying process.
*
* @param byteArray byte array that shall be converted
* @param startOffset the amount of bytes to skip at the start
* @param sizeOfBitSet the desired amount of bits in the resulting {@link BitSet}
* @return the {@link BitSet} containing the extracted bytes
*/
public static BitSet convertByteArrayToBitSet(byte[] byteArray, int startOffset, int sizeOfBitSet) {
if((byteArray.length - startOffset) * 8 < sizeOfBitSet) {
throw new IllegalArgumentException("the byte[] is too small to create a BitSet of length " + sizeOfBitSet);
}

BitSet result = new BitSet(sizeOfBitSet);

int bitMask = 128;
for(int i = 0; i < sizeOfBitSet; i++) {
// insert the bits in reverse order
result.set(i, (byteArray[i / 8 + startOffset] & bitMask) != 0);

bitMask = bitMask / 2;

if(bitMask == 0) {
bitMask = 128;
}
}

return result;
}

/**
* Does the same as {@link #convertByteArrayToBitSet(byte[], int, int)} but defaults to copy all remaining bytes
* following the {@code startOffset}.
*
* @param byteArray byte array that shall be converted
* @param startOffset the amount of bytes to skip at the start
* @return the {@link BitSet} containing the extracted bytes
*/
public static BitSet convertByteArrayToBitSet(byte[] byteArray, int startOffset) {
return convertByteArrayToBitSet(byteArray, startOffset, (byteArray.length - startOffset) * 8);
}

/**
* Does the same as {@link #convertByteArrayToBitSet(byte[], int, int)} but defaults to a {@code startOffset} of 0
* and the full length for {@code sizeOfBitSet} resulting in converting the full byte array.
*
* @param byteArray byte array that shall be converted
* @return the {@link BitSet} containing the bytes of the byte array
*/
public static BitSet convertByteArrayToBitSet(byte[] byteArray) {
return convertByteArrayToBitSet(byteArray, 0);
}

/**
* Converts a {@link BitSet} into a byte array by copying the bits in groups of 8 into the resulting bytes of the
* array.
*
* It first calculates the size of the resulting array and then iterates over the bits of the {@link BitSet} to
* write them into the correct index of the byte array. We write the bits in reverse order, shifting them to the
* left before every step.
*
* If the {@link BitSet} is not big enough to fill up the last byte, we fill the remaining bits with zeros by
* shifting the previously written bits to the left accordingly.
*
* @param bitSet the {@link BitSet} that shall be converted.
* @return the byte array containing the bits of the {@link BitSet} in groups of 8
*/
public static byte[] convertBitSetToByteArray(BitSet bitSet) {
int lengthOfBitSet = bitSet.length();
int lengthOfArray = (int) Math.ceil(lengthOfBitSet / 8.0);

byte[] result = new byte[lengthOfArray];

for(int i = 0; i < lengthOfBitSet; i++) {
// for every new index -> start with a 1 so the shifting keeps track of the position we are on (gets shifted
// out when we arrive at the last bit of the current byte)
if(i % 8 == 0) {
result[i / 8] = 1;
}

// shift the existing bits to the left to make space for the bit that gets written now
result[i / 8] <<= 1;

// write the current bit
result[i / 8] ^= bitSet.get(i) ? 1 : 0;

// if we are at the last bit of the BitSet -> shift the missing bytes to "fill up" the remaining space (in
// case the BitSet was not long enough to fill up a full byte)
if(i == (lengthOfBitSet - 1)) {
result[i / 8] <<= (8 - (i % 8) - 1);
}
}

return result;
}
}
86 changes: 86 additions & 0 deletions src/main/java/com/iota/iri/utils/datastructure/CuckooFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package com.iota.iri.utils.datastructure;

/**
* The Cuckoo Filter is a probabilistic data structure that supports fast set membership testing.
*
* It is very similar to a bloom filter in that they both are very fast and space efficient. Both the bloom filter and
* cuckoo filter also report false positives on set membership.
*
* Cuckoo filters are a relatively new data structure, described in a paper in 2014 by Fan, Andersen, Kaminsky, and
* Mitzenmacher (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf). They improve upon the design of the bloom
* filter by offering deletion, limited counting, and a bounded false positive probability, while still maintaining a
* similar space complexity.
*
* They use cuckoo hashing to resolve collisions and are essentially a compact cuckoo hash table.
*/
public interface CuckooFilter {
/**
* Adds a new elements to the filter that then can be queried with {@link #contains(String)}.
*
* @param item element that shall be stored in the filter
* @return true if the insertion was successful (if the filter is too full this can return false)
* @throws IndexOutOfBoundsException if we try to add an element to an already too full filter
*/
boolean add(String item) throws IndexOutOfBoundsException;

/**
* Adds a new elements to the filter that then can be queried with {@link #contains(byte[])}.
*
* @param item element that shall be stored in the filter
* @return true if the insertion was successful (if the filter is too full this can return false)
* @throws IndexOutOfBoundsException if we try to add an element to an already too full filter
*/
boolean add(byte[] item) throws IndexOutOfBoundsException;

/**
* Queries for the existence of an element in the filter.
*
* @param item element that shall be checked
* @return true if it is "probably" in the filter (~3% false positives) or false if it is "definitely" not in there
*/
boolean contains(String item);

/**
* Queries for the existence of an element in the filter.
*
* @param item element that shall be checked
* @return true if it is "probably" in the filter (~3% false positives) or false if it is "definitely" not in there
*/
boolean contains(byte[] item);

/**
* Deletes an element from the filter.
*
* @param item element that shall be deleted from filter
* @return true if something was deleted matching the element or false otherwise
*/
boolean delete(String item);

/**
* Deletes an element from the filter.
*
* @param item element that shall be deleted from filter
* @return true if something was deleted matching the element or false otherwise
*/
boolean delete(byte[] item);

/**
* This method returns the actual capacity of the filter.
*
* Since the capacity has to be a power of two and we want to reach a load factor of less than 0.955, the actual
* capacity is bigger than the amount of items we passed into the constructor.
*
* @return the actual capacity of the filter
*/
int getCapacity();

/**
* This method returns the amount of elements that are stored in the filter.
*
* Since a cuckoo filter can have collisions the size is not necessarily identical with the amount of items that we
* added.
*
* @return the amount of stored items
*/
int size();
}
Loading

0 comments on commit 32c3d06

Please sign in to comment.