diff --git a/README.md b/README.md index c54ab0f..f5d314a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Spatial Bloom Filters have been first proposed for use in location-privacy appli The libSBF-cpp repository contains the C++ implementation of the SBF data structure. The SBF class is provided, as well as various methods for managing the filter: - once the filter is constructed, the user can insert elements into it through the `Insert` method. The `Check` method, on the contrary, is used to verify weather an element belongs to one of the mapped sets. -- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp`, `GetAreaEmersion` and `GetAreaFlotation` allow to compute and return several probabilistic properties of the constructed filter. +- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp` and `GetAreaEmersion` allow to compute and return several probabilistic properties of the constructed filter. - finally, two methods are provided to print out the filter: `PrintFilter` prints the filter and related statistics to the standard output whereas `SaveToDisk` writes the filter onto a CSV file. For more details on the implementation, and how to use the library please refer to the [homepage](http://sbf.csr.unibo.it/ "SBF project homepage") of the project. diff --git a/sbf.cpp b/sbf.cpp index 1e0ae45..dca879a 100644 --- a/sbf.cpp +++ b/sbf.cpp @@ -265,6 +265,7 @@ void SBF::PrintFilter(int mode) printf("Number of cells: %d\n",this->cells); printf("Size in Bytes: %d\n",this->size); printf("Filter sparsity: %.5f\n",this->GetFilterSparsity()); + printf("Filter a-priori fpp: %.5f\n", this->GetFilterAPrioriFpp()); printf("Filter fpp: %.5f\n",this->GetFilterFpp()); printf("Number of mapped elements: %d\n",this->members); printf("Number of hash collisions: %d\n",this->collisions); @@ -299,10 +300,9 @@ void SBF::PrintFilter(int mode) printf("\n"); } - printf("\nEmersion and Fpp:\n"); + printf("\nEmersion, Fpp, Isep:\n"); for(int j = 1; j < this->AREA_number+1; j++){ - if(this->GetAreaFlotation(j)) printf("Area %d: emersion %.5f, flotation safe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]); - else printf("Area %d: emersion %.5f, flotation unsafe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]); + printf("Area %d: emersion %.5f, a-priori fpp %.5f, fpp %.5f, a-priori isep %.5f",j,this->GetAreaEmersion(j),this->AREA_a_priori_fpp[j],this->AREA_fpp[j],this->AREA_a_priori_isep[j]); printf("\n"); } printf("\n"); @@ -333,11 +333,13 @@ void SBF::SaveToDisk(std::string path, int mode) myfile << "members" << ";" << this->members << std::endl; myfile << "collisions" << ";" << this->collisions << std::endl; myfile << "sparsity" << ";" << this->GetFilterSparsity() << std::endl; + myfile << "a-priori fpp" << ";" << this->GetFilterAPrioriFpp() << std::endl; myfile << "fpp" << ";" << this->GetFilterFpp() << std::endl; // area-related parameters: - // area,members,self-collisions,cells,emersion,flotation,fpp + // area,members,self-collisions,cells,emersion,a-priori fpp,fpp,a-priori isep + myfile << "area" << ";" << "members" << ";" << "self-collisions" << ";" << "cells" << ";" << "emersion" << ";" << "a-priori fpp" << ";" << "fpp" << ";" << "a-priori isep" << std::endl; for(int j = 1; j < this->AREA_number+1; j++){ - myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->GetAreaFlotation(j) << ";" << this->AREA_fpp[j] << std::endl; + myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->AREA_a_priori_fpp[j] << ";" << this->AREA_fpp[j] << ";" << this->AREA_a_priori_isep[j] << std::endl; } } @@ -486,10 +488,61 @@ int SBF::Check(char *string, int size) } +// Computes a-priori area-specific inter-set error probability (a_priori_isep) +void SBF::SetAPrioriAreaIsep() +{ + double p1; + int nfill; + + + for (int i = this->AREA_number; i>0; i--) { + nfill = 0; + + for (int j = i+1; j <= this->AREA_number; j++) { + nfill += this->AREA_members[j]; + } + + p1 = (double)(1 - 1 / (double)this->cells); + p1 = (double)(1 - (double)pow(p1, this->HASH_number*nfill)); + p1 = (double)pow(p1, this->HASH_number); + + this->AREA_a_priori_isep[i] = (float)p1; + + } +} + + +// Computes a-priori area-specific false positives probability (a_priori_fpp) +void SBF::SetAPrioriAreaFpp() +{ + double p; + int c; + + for (int i = this->AREA_number; i>0; i--) { + c = 0; + + for (int j = i; j <= this->AREA_number; j++) { + c += this->AREA_members[j]; + } + + p = (double)(1 - 1 / (double)this->cells); + p = (double)(1 - (double)pow(p, this->HASH_number*c)); + p = (double)pow(p, this->HASH_number); + + this->AREA_a_priori_fpp[i] = (float)p; + + for (int j = i; jAREA_number; j++) { + this->AREA_a_priori_fpp[i] -= this->AREA_a_priori_fpp[j + 1]; + } + if (AREA_a_priori_fpp[i]<0) AREA_a_priori_fpp[i] = 0; + } +} + + // Computes a-posteriori area-specific false positives probability (fpp) void SBF::SetAreaFpp() { - float p; + double p; int c; for(int i = this->AREA_number; i>0; i--){ @@ -499,8 +552,10 @@ void SBF::SetAreaFpp() c += this->AREA_cells[j]; } - p = (float)c/(float)this->cells; - this->AREA_fpp[i] = (float)pow(p,this->HASH_number); + p = (double)c/(double)this->cells; + p = (double)pow(p,this->HASH_number); + + this->AREA_fpp[i] = (float)p; for(int j=i; jAREA_number; j++){ this->AREA_fpp[i] -= this->AREA_fpp[j+1]; @@ -510,6 +565,13 @@ void SBF::SetAreaFpp() } +// Returns the number of inserted elements for the input area +int SBF::GetAreaMembers(int area) +{ + return this->AREA_members[area]; +} + + // Returns the sparsity of the entire SBF float SBF::GetFilterSparsity() { @@ -524,21 +586,35 @@ float SBF::GetFilterSparsity() } +// Returns the a-priori false positive probability over the entire filter +// (i.e. not area-specific) +float SBF::GetFilterAPrioriFpp() +{ + double p; + + p = (double)(1 - 1 / (double)this->cells); + p = (double)(1 - (double)pow(p, this->HASH_number*this->members)); + p = (double)pow(p, this->HASH_number); + + return (float)p; +} + + // Returns the a-posteriori false positive probability over the entire filter // (i.e. not area-specific) float SBF::GetFilterFpp() { - float p,fpp; + double p; int c = 0; // Counts non-zero cells for(int i = 1; i < this->AREA_number+1; i++){ c += this->AREA_cells[i]; } - p = (float)c/(float)this->cells; + p = (double)c/(double)this->cells; - fpp = (float)(pow(p,this->HASH_number)); + p = (double)(pow(p,this->HASH_number)); - return fpp; + return (float)p; } // Returns the emersion value for the input area @@ -555,17 +631,6 @@ float SBF::GetAreaEmersion(int area) } -// Returns the flotation value for the input area. TRUE if it is not possible -// for an element belonging to the input area to be recognized as belonging to a -// different area, FALSE if collisions may cause this to happen -bool SBF::GetAreaFlotation(int area) -{ - - if((this->AREA_members[area]==0) || (this->HASH_number==0)) return true; - else{ - return (this->AREA_members[area]*this->HASH_number) - this->AREA_self_collisions[area] - this->AREA_cells[area] < this->HASH_number; - } -} } //namespace sbf \ No newline at end of file diff --git a/sbf.h b/sbf.h index c012622..a30a7a0 100644 --- a/sbf.h +++ b/sbf.h @@ -70,7 +70,9 @@ namespace sbf { int *AREA_members; int *AREA_cells; int *AREA_self_collisions; + float *AREA_a_priori_fpp; float *AREA_fpp; + float *AREA_a_priori_isep; int BIG_end; // Private methods (commented in the sbf.cpp) @@ -176,6 +178,8 @@ namespace sbf { this->AREA_cells = new int[this->AREA_number + 1]; this->AREA_self_collisions = new int[this->AREA_number + 1]; this->AREA_fpp = new float[this->AREA_number + 1]; + this->AREA_a_priori_fpp = new float[this->AREA_number + 1]; + this->AREA_a_priori_isep = new float[this->AREA_number + 1]; // Parameter initializations this->members = 0; @@ -185,6 +189,8 @@ namespace sbf { this->AREA_cells[a] = 0; this->AREA_self_collisions[a] = 0; this->AREA_fpp[a] = -1; + this->AREA_a_priori_fpp[a] = -1; + this->AREA_a_priori_isep[a] = -1; } } @@ -197,6 +203,8 @@ namespace sbf { delete[] AREA_cells; delete[] AREA_self_collisions; delete[] AREA_fpp; + delete[] AREA_a_priori_fpp; + delete[] AREA_a_priori_isep; for (int j = 0; jHASH_number; j++) { delete[] HASH_salt[j]; } @@ -209,11 +217,14 @@ namespace sbf { void SaveToDisk(std::string path, int mode); void Insert(char *string, int size, int area); int Check(char *string, int size); + int GetAreaMembers(int area); float GetFilterSparsity(); float GetFilterFpp(); + float GetFilterAPrioriFpp(); void SetAreaFpp(); + void SetAPrioriAreaFpp(); + void SetAPrioriAreaIsep(); float GetAreaEmersion(int area); - bool GetAreaFlotation(int area); }; } //namespace sbf diff --git a/test-app/test-app.cpp b/test-app/test-app.cpp index eb3b905..6ded5cd 100644 --- a/test-app/test-app.cpp +++ b/test-app/test-app.cpp @@ -37,9 +37,12 @@ along with this program. If not, see . int main() { std::ifstream myfile; - std::string line, a, member; + std::string line, a, member, path; + std::ofstream rate_file; int len, line_count, area, area_check, n, narea, nver; - int well_recognised, false_positives, exchanged_elements; + int well_recognised, false_positives, iser; + int* area_iser; + int* area_fp; char* element; sbf::SBF* myFilter = NULL; @@ -221,7 +224,9 @@ int main() { } //calculates filter's probabilistic properties + myFilter->SetAPrioriAreaFpp(); myFilter->SetAreaFpp(); + myFilter->SetAPrioriAreaIsep(); //prints filter to the standard output or saves it to disk if (print_mode == 1) myFilter->PrintFilter(0); @@ -235,7 +240,11 @@ int main() { //operates a self check upon the filter (i.e. runs the Check method for each //of the already mapped elements) - well_recognised = 0, exchanged_elements = 0; + well_recognised = 0, iser = 0; + area_iser = new int[narea+1]; + for (int a = 0; a < narea + 1; a++) { + area_iser[a] = 0; + } myfile.open(construction_dataset.c_str()); if (myfile.is_open()) { @@ -254,13 +263,30 @@ int main() { if (area == area_check) well_recognised++; else { - exchanged_elements++; + iser++; + area_iser[area]++; } } - printf("Well recognised: %d\n", well_recognised); - printf("Elements assigned to a wrong set: %d\n", exchanged_elements); - printf("Exchange rate: %.5f\n", (float)exchanged_elements / (float)n); + printf("Elements assigned to the correct set: %d\n", well_recognised); + printf("Inter-set errors: %d\n", iser); + printf("Inter-set errors rate: %.5f\n", (float)iser / (float)n); + + + if (print_mode == 3 || print_mode == 4) { + path = "ise" + buf + ".csv"; + rate_file.open(path.c_str()); + rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield); + rate_file.precision(5); + // area-related parameters: + // area,inter-set errors,inter-set error rate + rate_file << "area" << ";" << "errors" << ";" << "rate" << std::endl; + for (int j = 1; j < narea + 1; j++) { + rate_file << j << ";" << area_iser[j] << ";" << (float)area_iser[j] / (float)myFilter->GetAreaMembers(j) << std::endl; + } + rate_file.close(); + } + myfile.close(); } else { @@ -288,6 +314,10 @@ int main() { //operates a verification using non members dataset well_recognised = 0, false_positives = 0; + area_fp = new int[narea + 1]; + for (int a = 0; a < narea + 1; a++) { + area_fp[a] = 0; + } myfile.open(verification_dataset.c_str()); if (myfile.is_open()) { @@ -301,12 +331,31 @@ int main() { memcpy(element, line.c_str(), len); area = myFilter->Check(element, len); - if (area == 0)well_recognised++; - else false_positives++; + if (area == 0) well_recognised++; + else + { + false_positives++; + area_fp[area]++; + } } - printf("Well recognised: %d\n", well_recognised); + printf("True negatives: %d\n", well_recognised); printf("False positives: %d\n", false_positives); printf("False positives rate: %.5f\n", (float)false_positives / (float)nver); + + if (print_mode == 3 || print_mode == 4) { + path = "fp" + buf + ".csv"; + rate_file.open(path.c_str()); + rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield); + rate_file.precision(5); + // area-related parameters: + // area,false positives,false positives rate + rate_file << "area" << ";" << "false positives" << ";" << "rate" << std::endl; + for (int j = 1; j < narea + 1; j++) { + rate_file << j << ";" << area_fp[j] << ";" << (float)area_fp[j] / (float)nver << std::endl; + } + rate_file.close(); + } + myfile.close(); } else {