From 744a81af25f7a2f8159216a2cc874ea509073c93 Mon Sep 17 00:00:00 2001 From: chengfu Date: Mon, 11 Jan 2021 13:55:50 +0800 Subject: [PATCH] support Ser && Deser --- Makefile | 6 +++- example/test1.cc | 75 ++++++++++++++++++++++++++++++++++++++++++++++ src/cuckoofilter.h | 17 +++++++++++ src/hashutil.h | 48 +++++++++++++++++++++++++++++ src/packedtable.h | 13 ++++++++ src/singletable.h | 15 ++++++++++ 6 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 example/test1.cc diff --git a/Makefile b/Makefile index 9da574b..bf20ead 100644 --- a/Makefile +++ b/Makefile @@ -23,9 +23,13 @@ all: $(TEST) clean: rm -f $(TEST) */*.o -test: example/test.o $(LIBOBJECTS) +test: example/test.o $(LIBOBJECTS) $(CC) example/test.o $(LIBOBJECTS) $(LDFLAGS) -o $@ +test1: example/test1.o $(LIBOBJECTS) + $(CC) example/test1.o $(LIBOBJECTS) $(LDFLAGS) -o $@ + + %.o: %.cc ${HEADERS} Makefile $(CC) $(CFLAGS) $< -o $@ diff --git a/example/test1.cc b/example/test1.cc new file mode 100644 index 0000000..8ca59f1 --- /dev/null +++ b/example/test1.cc @@ -0,0 +1,75 @@ +#include "cuckoofilter.h" + +#include +#include + +#include +#include +#include + +using cuckoofilter::CuckooFilter; + +template +void fpr(T& t, size_t total_items) { + // Check non-existing items, a few false positives expected + size_t total_queries = 0; + size_t false_queries = 0; + for (size_t i = total_items; i < 2 * total_items; i++) { + if (t.Contain(i) == cuckoofilter::Ok) { + false_queries++; + } + total_queries++; + } + + // Output the measured false positive rate + std::cout << "false positive rate is " + << 100.0 * false_queries / total_queries << "%\n"; +} + +int main(int argc, char **argv) { + size_t total_items = 1000000; + + // Create a cuckoo filter where each item is of type size_t and + // use 12 bits for each item: + // CuckooFilter filter(total_items); + // To enable semi-sorting, define the storage of cuckoo filter to be + // PackedTable, accepting keys of size_t type and making 13 bits + // for each key: + CuckooFilter filter(total_items); + // CuckooFilter filter(total_items); + using FilterType = decltype(filter); + + // Insert items to this cuckoo filter + size_t num_inserted = 0; + for (size_t i = 0; i < total_items; i++, num_inserted++) { + if (filter.Add(i) != cuckoofilter::Ok) { + break; + } + } + + std::cout << "actual num_inserted: " << num_inserted << std::endl; + num_inserted = 1000000; + std::cout << "num_inserted: " << num_inserted << std::endl; + + // Check if previously inserted items are in the filter, expected + // true for all items + for (size_t i = 0; i < num_inserted; i++) { + if(filter.Contain(i) != cuckoofilter::Ok) { + std::cout << "I: " << i << " not ok" << std::endl; + break; + } + } + + std::ofstream os("filter.meta", std::ios_base::binary); + filter.Serialize(os); + os.close(); + fpr(filter, total_items); + + FilterType filter1(total_items); + std::ifstream handler("filter.meta", std::ios_base::binary); + filter1.Deserialize(handler); + handler.close(); + fpr(filter1, total_items); + + return 0; +} diff --git a/src/cuckoofilter.h b/src/cuckoofilter.h index bfcfa18..ffc60dc 100644 --- a/src/cuckoofilter.h +++ b/src/cuckoofilter.h @@ -99,6 +99,23 @@ class CuckooFilter { ~CuckooFilter() { delete table_; } + void Serialize(std::ofstream& handler) { + uint64_t bytes = sizeof(VictimCache); + std::cout << "Write from file: with size: " << bytes << std::endl; + handler.write(reinterpret_cast(&victim_), bytes); + table_->Serialize(handler); + hasher_.Serialize(handler); + } + + void Deserialize(std::ifstream& handler) { + char* buffer = reinterpret_cast(&victim_); + uint64_t length = sizeof(VictimCache); + std::cout << "Read from file: with size: " << length << std::endl; + handler.read(buffer, length); + table_->Deserialize(handler); + hasher_.Deserialize(handler); + } + // Add an item to the filter. Status Add(const ItemType &item); diff --git a/src/hashutil.h b/src/hashutil.h index ac903d6..7e68050 100644 --- a/src/hashutil.h +++ b/src/hashutil.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include @@ -64,6 +66,27 @@ class TwoIndependentMultiplyShift { uint64_t operator()(uint64_t key) const { return (add_ + multiply_ * static_cast(key)) >> 64; } + + void Serialize(std::ofstream& handler) { + uint64_t bytes = sizeof(multiply_); + handler.write(reinterpret_cast(&multiply_), bytes); + std::cout << "Write multiply_ to file: total bytes: " << bytes << std::endl; + bytes = sizeof(add_); + handler.write(reinterpret_cast(&add_), bytes); + std::cout << "Write add_ to file: total bytes: " << bytes << std::endl; + } + + void Deserialize(std::ifstream& handler) { + char* buffer = reinterpret_cast(&multiply_); + uint64_t length = sizeof(multiply_); + std::cout << "Read multiply_ from file: with size: " << length << std::endl; + handler.read(buffer, length); + buffer = reinterpret_cast(&add_); + length = sizeof(add_); + std::cout << "Read add_ from file: with size: " << length << std::endl; + handler.read(buffer, length); + } + }; // See Patrascu and Thorup's "The Power of Simple Tabulation Hashing" @@ -87,6 +110,31 @@ class SimpleTabulation { } return result; } + void Serialize(std::ofstream& handler) { + int row = sizeof(uint64_t); + int col = (1 << CHAR_BIT); + uint64_t bytes = sizeof(uint64_t); + uint64_t total_bytes = row * col * bytes; + for (int i = 0; i < row; ++i) { + for (int j = 0; j < col; ++j) { + handler.write(reinterpret_cast(&tables_[i][j]), bytes); + } + } + std::cout << "Write table_ to file: total bytes: " << total_bytes << std::endl; + } + + void Deserialize(std::ifstream& handler) { + int row = sizeof(uint64_t); + int col = (1 << CHAR_BIT); + uint64_t bytes = sizeof(uint64_t); + uint64_t total_bytes = row * col * bytes; + for (int i = 0; i < row; ++i) { + for (int j = 0; j < col; ++j) { + handler.read(reinterpret_cast(&tables_[i][j]), bytes); + } + } + std::cout << "Read table_ to file: total bytes: " << total_bytes << std::endl; + } }; } diff --git a/src/packedtable.h b/src/packedtable.h index 5b84473..2e70663 100644 --- a/src/packedtable.h +++ b/src/packedtable.h @@ -1,6 +1,8 @@ #ifndef CUCKOO_FILTER_PACKED_TABLE_H_ #define CUCKOO_FILTER_PACKED_TABLE_H_ +#include +#include #include #include @@ -426,6 +428,17 @@ class PackedTable { return false; } + void Serialize(std::ofstream& handler) { + std::cout << "Write to file: "<< "total bytes: " << len_ << std::endl; + handler.write(buckets_, len_); + } + + void Deserialize(std::ifstream& handler) { + std::cout << "Read from file: with size: " << len_ << std::endl; + handler.read(buckets_, len_); + } + + // inline size_t NumTagsInBucket(const size_t i) { // size_t num = 0; // for (size_t j = 0; j < tags_per_bucket; j++ ){ diff --git a/src/singletable.h b/src/singletable.h index 8fd40b3..0a16fb3 100644 --- a/src/singletable.h +++ b/src/singletable.h @@ -3,6 +3,8 @@ #include +#include +#include #include #include "bitsutil.h" @@ -53,6 +55,19 @@ class SingleTable { return kTagsPerBucket * num_buckets_; } + void Serialize(std::ofstream& handler) { + uint64_t bytes = kBytesPerBucket * (num_buckets_ + kPaddingBuckets); + std::cout << "Write to file: "<< "total bytes: " << bytes << std::endl; + handler.write(reinterpret_cast(buckets_), bytes); + } + + void Deserialize(std::ifstream& handler) { + char* buffer = reinterpret_cast(buckets_); + uint64_t length = kBytesPerBucket * (num_buckets_ + kPaddingBuckets); + std::cout << "Read from file: with size: " << length << std::endl; + handler.read(buffer, length); + } + std::string Info() const { std::stringstream ss; ss << "SingleHashtable with tag size: " << bits_per_tag << " bits \n";