forked from iotaledger/iri
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature: Introducing a Cuckoo Filter for local snapshots (iotaledger#…
…1100) * Feat: Introducing the CuckooFilter for local snapshots * Refactor: changed indentation
- Loading branch information
1 parent
d4f2a50
commit 32c3d06
Showing
4 changed files
with
1,019 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package com.iota.iri.utils; | ||
|
||
import java.util.BitSet; | ||
|
||
/** | ||
* This class offers utility methods to transform BitSets into different data types. | ||
*/ | ||
public class BitSetUtils { | ||
/** | ||
* This method converts a byte array to a {@link BitSet} of the given size ({@code sizeOfBitSet}) by copying the | ||
* bits of every byte into the {@link BitSet} in reverse order (starting with the given {@code startOffset}. | ||
* | ||
* It first checks if the byte array is big enough to provide enough bits for the provided parameters and then | ||
* starts the copying process. | ||
* | ||
* @param byteArray byte array that shall be converted | ||
* @param startOffset the amount of bytes to skip at the start | ||
* @param sizeOfBitSet the desired amount of bits in the resulting {@link BitSet} | ||
* @return the {@link BitSet} containing the extracted bytes | ||
*/ | ||
public static BitSet convertByteArrayToBitSet(byte[] byteArray, int startOffset, int sizeOfBitSet) { | ||
if((byteArray.length - startOffset) * 8 < sizeOfBitSet) { | ||
throw new IllegalArgumentException("the byte[] is too small to create a BitSet of length " + sizeOfBitSet); | ||
} | ||
|
||
BitSet result = new BitSet(sizeOfBitSet); | ||
|
||
int bitMask = 128; | ||
for(int i = 0; i < sizeOfBitSet; i++) { | ||
// insert the bits in reverse order | ||
result.set(i, (byteArray[i / 8 + startOffset] & bitMask) != 0); | ||
|
||
bitMask = bitMask / 2; | ||
|
||
if(bitMask == 0) { | ||
bitMask = 128; | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
* Does the same as {@link #convertByteArrayToBitSet(byte[], int, int)} but defaults to copy all remaining bytes | ||
* following the {@code startOffset}. | ||
* | ||
* @param byteArray byte array that shall be converted | ||
* @param startOffset the amount of bytes to skip at the start | ||
* @return the {@link BitSet} containing the extracted bytes | ||
*/ | ||
public static BitSet convertByteArrayToBitSet(byte[] byteArray, int startOffset) { | ||
return convertByteArrayToBitSet(byteArray, startOffset, (byteArray.length - startOffset) * 8); | ||
} | ||
|
||
/** | ||
* Does the same as {@link #convertByteArrayToBitSet(byte[], int, int)} but defaults to a {@code startOffset} of 0 | ||
* and the full length for {@code sizeOfBitSet} resulting in converting the full byte array. | ||
* | ||
* @param byteArray byte array that shall be converted | ||
* @return the {@link BitSet} containing the bytes of the byte array | ||
*/ | ||
public static BitSet convertByteArrayToBitSet(byte[] byteArray) { | ||
return convertByteArrayToBitSet(byteArray, 0); | ||
} | ||
|
||
/** | ||
* Converts a {@link BitSet} into a byte array by copying the bits in groups of 8 into the resulting bytes of the | ||
* array. | ||
* | ||
* It first calculates the size of the resulting array and then iterates over the bits of the {@link BitSet} to | ||
* write them into the correct index of the byte array. We write the bits in reverse order, shifting them to the | ||
* left before every step. | ||
* | ||
* If the {@link BitSet} is not big enough to fill up the last byte, we fill the remaining bits with zeros by | ||
* shifting the previously written bits to the left accordingly. | ||
* | ||
* @param bitSet the {@link BitSet} that shall be converted. | ||
* @return the byte array containing the bits of the {@link BitSet} in groups of 8 | ||
*/ | ||
public static byte[] convertBitSetToByteArray(BitSet bitSet) { | ||
int lengthOfBitSet = bitSet.length(); | ||
int lengthOfArray = (int) Math.ceil(lengthOfBitSet / 8.0); | ||
|
||
byte[] result = new byte[lengthOfArray]; | ||
|
||
for(int i = 0; i < lengthOfBitSet; i++) { | ||
// for every new index -> start with a 1 so the shifting keeps track of the position we are on (gets shifted | ||
// out when we arrive at the last bit of the current byte) | ||
if(i % 8 == 0) { | ||
result[i / 8] = 1; | ||
} | ||
|
||
// shift the existing bits to the left to make space for the bit that gets written now | ||
result[i / 8] <<= 1; | ||
|
||
// write the current bit | ||
result[i / 8] ^= bitSet.get(i) ? 1 : 0; | ||
|
||
// if we are at the last bit of the BitSet -> shift the missing bytes to "fill up" the remaining space (in | ||
// case the BitSet was not long enough to fill up a full byte) | ||
if(i == (lengthOfBitSet - 1)) { | ||
result[i / 8] <<= (8 - (i % 8) - 1); | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
} |
86 changes: 86 additions & 0 deletions
86
src/main/java/com/iota/iri/utils/datastructure/CuckooFilter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package com.iota.iri.utils.datastructure; | ||
|
||
/** | ||
* The Cuckoo Filter is a probabilistic data structure that supports fast set membership testing. | ||
* | ||
* It is very similar to a bloom filter in that they both are very fast and space efficient. Both the bloom filter and | ||
* cuckoo filter also report false positives on set membership. | ||
* | ||
* Cuckoo filters are a relatively new data structure, described in a paper in 2014 by Fan, Andersen, Kaminsky, and | ||
* Mitzenmacher (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf). They improve upon the design of the bloom | ||
* filter by offering deletion, limited counting, and a bounded false positive probability, while still maintaining a | ||
* similar space complexity. | ||
* | ||
* They use cuckoo hashing to resolve collisions and are essentially a compact cuckoo hash table. | ||
*/ | ||
public interface CuckooFilter { | ||
/** | ||
* Adds a new elements to the filter that then can be queried with {@link #contains(String)}. | ||
* | ||
* @param item element that shall be stored in the filter | ||
* @return true if the insertion was successful (if the filter is too full this can return false) | ||
* @throws IndexOutOfBoundsException if we try to add an element to an already too full filter | ||
*/ | ||
boolean add(String item) throws IndexOutOfBoundsException; | ||
|
||
/** | ||
* Adds a new elements to the filter that then can be queried with {@link #contains(byte[])}. | ||
* | ||
* @param item element that shall be stored in the filter | ||
* @return true if the insertion was successful (if the filter is too full this can return false) | ||
* @throws IndexOutOfBoundsException if we try to add an element to an already too full filter | ||
*/ | ||
boolean add(byte[] item) throws IndexOutOfBoundsException; | ||
|
||
/** | ||
* Queries for the existence of an element in the filter. | ||
* | ||
* @param item element that shall be checked | ||
* @return true if it is "probably" in the filter (~3% false positives) or false if it is "definitely" not in there | ||
*/ | ||
boolean contains(String item); | ||
|
||
/** | ||
* Queries for the existence of an element in the filter. | ||
* | ||
* @param item element that shall be checked | ||
* @return true if it is "probably" in the filter (~3% false positives) or false if it is "definitely" not in there | ||
*/ | ||
boolean contains(byte[] item); | ||
|
||
/** | ||
* Deletes an element from the filter. | ||
* | ||
* @param item element that shall be deleted from filter | ||
* @return true if something was deleted matching the element or false otherwise | ||
*/ | ||
boolean delete(String item); | ||
|
||
/** | ||
* Deletes an element from the filter. | ||
* | ||
* @param item element that shall be deleted from filter | ||
* @return true if something was deleted matching the element or false otherwise | ||
*/ | ||
boolean delete(byte[] item); | ||
|
||
/** | ||
* This method returns the actual capacity of the filter. | ||
* | ||
* Since the capacity has to be a power of two and we want to reach a load factor of less than 0.955, the actual | ||
* capacity is bigger than the amount of items we passed into the constructor. | ||
* | ||
* @return the actual capacity of the filter | ||
*/ | ||
int getCapacity(); | ||
|
||
/** | ||
* This method returns the amount of elements that are stored in the filter. | ||
* | ||
* Since a cuckoo filter can have collisions the size is not necessarily identical with the amount of items that we | ||
* added. | ||
* | ||
* @return the amount of stored items | ||
*/ | ||
int size(); | ||
} |
Oops, something went wrong.