Skip to content

Commit

Permalink
Probabilistic properties fixed
Browse files Browse the repository at this point in the history
Some probabilistic properties were refined. Added a-priori-fpp (for both
the entire filter and each subset), added a-priori-isep (inter-set
errors probability). Flotation feature was removed.
  • Loading branch information
calderonil committed May 31, 2017
1 parent 6708637 commit a26dc45
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 35 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Spatial Bloom Filters have been first proposed for use in location-privacy appli

The libSBF-cpp repository contains the C++ implementation of the SBF data structure. The SBF class is provided, as well as various methods for managing the filter:
- once the filter is constructed, the user can insert elements into it through the `Insert` method. The `Check` method, on the contrary, is used to verify weather an element belongs to one of the mapped sets.
- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp`, `GetAreaEmersion` and `GetAreaFlotation` allow to compute and return several probabilistic properties of the constructed filter.
- methods `SetAreaFpp`, `GetFilterSparsity`, `GetFilterFpp` and `GetAreaEmersion` allow to compute and return several probabilistic properties of the constructed filter.
- finally, two methods are provided to print out the filter: `PrintFilter` prints the filter and related statistics to the standard output whereas `SaveToDisk` writes the filter onto a CSV file.

For more details on the implementation, and how to use the library please refer to the [homepage](http://sbf.csr.unibo.it/ "SBF project homepage") of the project.
Expand Down
111 changes: 88 additions & 23 deletions sbf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ void SBF::PrintFilter(int mode)
printf("Number of cells: %d\n",this->cells);
printf("Size in Bytes: %d\n",this->size);
printf("Filter sparsity: %.5f\n",this->GetFilterSparsity());
printf("Filter a-priori fpp: %.5f\n", this->GetFilterAPrioriFpp());
printf("Filter fpp: %.5f\n",this->GetFilterFpp());
printf("Number of mapped elements: %d\n",this->members);
printf("Number of hash collisions: %d\n",this->collisions);
Expand Down Expand Up @@ -299,10 +300,9 @@ void SBF::PrintFilter(int mode)
printf("\n");
}

printf("\nEmersion and Fpp:\n");
printf("\nEmersion, Fpp, Isep:\n");
for(int j = 1; j < this->AREA_number+1; j++){
if(this->GetAreaFlotation(j)) printf("Area %d: emersion %.5f, flotation safe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]);
else printf("Area %d: emersion %.5f, flotation unsafe, fpp %.5f",j,this->GetAreaEmersion(j),this->AREA_fpp[j]);
printf("Area %d: emersion %.5f, a-priori fpp %.5f, fpp %.5f, a-priori isep %.5f",j,this->GetAreaEmersion(j),this->AREA_a_priori_fpp[j],this->AREA_fpp[j],this->AREA_a_priori_isep[j]);
printf("\n");
}
printf("\n");
Expand Down Expand Up @@ -333,11 +333,13 @@ void SBF::SaveToDisk(std::string path, int mode)
myfile << "members" << ";" << this->members << std::endl;
myfile << "collisions" << ";" << this->collisions << std::endl;
myfile << "sparsity" << ";" << this->GetFilterSparsity() << std::endl;
myfile << "a-priori fpp" << ";" << this->GetFilterAPrioriFpp() << std::endl;
myfile << "fpp" << ";" << this->GetFilterFpp() << std::endl;
// area-related parameters:
// area,members,self-collisions,cells,emersion,flotation,fpp
// area,members,self-collisions,cells,emersion,a-priori fpp,fpp,a-priori isep
myfile << "area" << ";" << "members" << ";" << "self-collisions" << ";" << "cells" << ";" << "emersion" << ";" << "a-priori fpp" << ";" << "fpp" << ";" << "a-priori isep" << std::endl;
for(int j = 1; j < this->AREA_number+1; j++){
myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->GetAreaFlotation(j) << ";" << this->AREA_fpp[j] << std::endl;
myfile << j << ";" << this->AREA_members[j] << ";" << this->AREA_self_collisions[j] << ";" << this->AREA_cells[j] << ";" << this->GetAreaEmersion(j) << ";" << this->AREA_a_priori_fpp[j] << ";" << this->AREA_fpp[j] << ";" << this->AREA_a_priori_isep[j] << std::endl;
}

}
Expand Down Expand Up @@ -486,10 +488,61 @@ int SBF::Check(char *string, int size)
}


// Computes a-priori area-specific inter-set error probability (a_priori_isep)
void SBF::SetAPrioriAreaIsep()
{
double p1;
int nfill;


for (int i = this->AREA_number; i>0; i--) {
nfill = 0;

for (int j = i+1; j <= this->AREA_number; j++) {
nfill += this->AREA_members[j];
}

p1 = (double)(1 - 1 / (double)this->cells);
p1 = (double)(1 - (double)pow(p1, this->HASH_number*nfill));
p1 = (double)pow(p1, this->HASH_number);

this->AREA_a_priori_isep[i] = (float)p1;

}
}


// Computes a-priori area-specific false positives probability (a_priori_fpp)
void SBF::SetAPrioriAreaFpp()
{
double p;
int c;

for (int i = this->AREA_number; i>0; i--) {
c = 0;

for (int j = i; j <= this->AREA_number; j++) {
c += this->AREA_members[j];
}

p = (double)(1 - 1 / (double)this->cells);
p = (double)(1 - (double)pow(p, this->HASH_number*c));
p = (double)pow(p, this->HASH_number);

this->AREA_a_priori_fpp[i] = (float)p;

for (int j = i; j<this->AREA_number; j++) {
this->AREA_a_priori_fpp[i] -= this->AREA_a_priori_fpp[j + 1];
}
if (AREA_a_priori_fpp[i]<0) AREA_a_priori_fpp[i] = 0;
}
}


// Computes a-posteriori area-specific false positives probability (fpp)
void SBF::SetAreaFpp()
{
float p;
double p;
int c;

for(int i = this->AREA_number; i>0; i--){
Expand All @@ -499,8 +552,10 @@ void SBF::SetAreaFpp()
c += this->AREA_cells[j];
}

p = (float)c/(float)this->cells;
this->AREA_fpp[i] = (float)pow(p,this->HASH_number);
p = (double)c/(double)this->cells;
p = (double)pow(p,this->HASH_number);

this->AREA_fpp[i] = (float)p;

for(int j=i; j<this->AREA_number; j++){
this->AREA_fpp[i] -= this->AREA_fpp[j+1];
Expand All @@ -510,6 +565,13 @@ void SBF::SetAreaFpp()
}


// Returns the number of inserted elements for the input area
int SBF::GetAreaMembers(int area)
{
return this->AREA_members[area];
}


// Returns the sparsity of the entire SBF
float SBF::GetFilterSparsity()
{
Expand All @@ -524,21 +586,35 @@ float SBF::GetFilterSparsity()
}


// Returns the a-priori false positive probability over the entire filter
// (i.e. not area-specific)
float SBF::GetFilterAPrioriFpp()
{
double p;

p = (double)(1 - 1 / (double)this->cells);
p = (double)(1 - (double)pow(p, this->HASH_number*this->members));
p = (double)pow(p, this->HASH_number);

return (float)p;
}


// Returns the a-posteriori false positive probability over the entire filter
// (i.e. not area-specific)
float SBF::GetFilterFpp()
{
float p,fpp;
double p;
int c = 0;
// Counts non-zero cells
for(int i = 1; i < this->AREA_number+1; i++){
c += this->AREA_cells[i];
}
p = (float)c/(float)this->cells;
p = (double)c/(double)this->cells;

fpp = (float)(pow(p,this->HASH_number));
p = (double)(pow(p,this->HASH_number));

return fpp;
return (float)p;
}

// Returns the emersion value for the input area
Expand All @@ -555,17 +631,6 @@ float SBF::GetAreaEmersion(int area)
}


// Returns the flotation value for the input area. TRUE if it is not possible
// for an element belonging to the input area to be recognized as belonging to a
// different area, FALSE if collisions may cause this to happen
bool SBF::GetAreaFlotation(int area)
{

if((this->AREA_members[area]==0) || (this->HASH_number==0)) return true;
else{
return (this->AREA_members[area]*this->HASH_number) - this->AREA_self_collisions[area] - this->AREA_cells[area] < this->HASH_number;
}
}


} //namespace sbf
13 changes: 12 additions & 1 deletion sbf.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ namespace sbf {
int *AREA_members;
int *AREA_cells;
int *AREA_self_collisions;
float *AREA_a_priori_fpp;
float *AREA_fpp;
float *AREA_a_priori_isep;
int BIG_end;

// Private methods (commented in the sbf.cpp)
Expand Down Expand Up @@ -176,6 +178,8 @@ namespace sbf {
this->AREA_cells = new int[this->AREA_number + 1];
this->AREA_self_collisions = new int[this->AREA_number + 1];
this->AREA_fpp = new float[this->AREA_number + 1];
this->AREA_a_priori_fpp = new float[this->AREA_number + 1];
this->AREA_a_priori_isep = new float[this->AREA_number + 1];

// Parameter initializations
this->members = 0;
Expand All @@ -185,6 +189,8 @@ namespace sbf {
this->AREA_cells[a] = 0;
this->AREA_self_collisions[a] = 0;
this->AREA_fpp[a] = -1;
this->AREA_a_priori_fpp[a] = -1;
this->AREA_a_priori_isep[a] = -1;
}
}

Expand All @@ -197,6 +203,8 @@ namespace sbf {
delete[] AREA_cells;
delete[] AREA_self_collisions;
delete[] AREA_fpp;
delete[] AREA_a_priori_fpp;
delete[] AREA_a_priori_isep;
for (int j = 0; j<this->HASH_number; j++) {
delete[] HASH_salt[j];
}
Expand All @@ -209,11 +217,14 @@ namespace sbf {
void SaveToDisk(std::string path, int mode);
void Insert(char *string, int size, int area);
int Check(char *string, int size);
int GetAreaMembers(int area);
float GetFilterSparsity();
float GetFilterFpp();
float GetFilterAPrioriFpp();
void SetAreaFpp();
void SetAPrioriAreaFpp();
void SetAPrioriAreaIsep();
float GetAreaEmersion(int area);
bool GetAreaFlotation(int area);
};

} //namespace sbf
Expand Down
69 changes: 59 additions & 10 deletions test-app/test-app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
int main() {

std::ifstream myfile;
std::string line, a, member;
std::string line, a, member, path;
std::ofstream rate_file;
int len, line_count, area, area_check, n, narea, nver;
int well_recognised, false_positives, exchanged_elements;
int well_recognised, false_positives, iser;
int* area_iser;
int* area_fp;
char* element;
sbf::SBF* myFilter = NULL;

Expand Down Expand Up @@ -221,7 +224,9 @@ int main() {
}

//calculates filter's probabilistic properties
myFilter->SetAPrioriAreaFpp();
myFilter->SetAreaFpp();
myFilter->SetAPrioriAreaIsep();

//prints filter to the standard output or saves it to disk
if (print_mode == 1) myFilter->PrintFilter(0);
Expand All @@ -235,7 +240,11 @@ int main() {

//operates a self check upon the filter (i.e. runs the Check method for each
//of the already mapped elements)
well_recognised = 0, exchanged_elements = 0;
well_recognised = 0, iser = 0;
area_iser = new int[narea+1];
for (int a = 0; a < narea + 1; a++) {
area_iser[a] = 0;
}
myfile.open(construction_dataset.c_str());

if (myfile.is_open()) {
Expand All @@ -254,13 +263,30 @@ int main() {

if (area == area_check) well_recognised++;
else {
exchanged_elements++;
iser++;
area_iser[area]++;
}

}
printf("Well recognised: %d\n", well_recognised);
printf("Elements assigned to a wrong set: %d\n", exchanged_elements);
printf("Exchange rate: %.5f\n", (float)exchanged_elements / (float)n);
printf("Elements assigned to the correct set: %d\n", well_recognised);
printf("Inter-set errors: %d\n", iser);
printf("Inter-set errors rate: %.5f\n", (float)iser / (float)n);


if (print_mode == 3 || print_mode == 4) {
path = "ise" + buf + ".csv";
rate_file.open(path.c_str());
rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield);
rate_file.precision(5);
// area-related parameters:
// area,inter-set errors,inter-set error rate
rate_file << "area" << ";" << "errors" << ";" << "rate" << std::endl;
for (int j = 1; j < narea + 1; j++) {
rate_file << j << ";" << area_iser[j] << ";" << (float)area_iser[j] / (float)myFilter->GetAreaMembers(j) << std::endl;
}
rate_file.close();
}

myfile.close();
}
else {
Expand Down Expand Up @@ -288,6 +314,10 @@ int main() {

//operates a verification using non members dataset
well_recognised = 0, false_positives = 0;
area_fp = new int[narea + 1];
for (int a = 0; a < narea + 1; a++) {
area_fp[a] = 0;
}
myfile.open(verification_dataset.c_str());

if (myfile.is_open()) {
Expand All @@ -301,12 +331,31 @@ int main() {
memcpy(element, line.c_str(), len);
area = myFilter->Check(element, len);

if (area == 0)well_recognised++;
else false_positives++;
if (area == 0) well_recognised++;
else
{
false_positives++;
area_fp[area]++;
}
}
printf("Well recognised: %d\n", well_recognised);
printf("True negatives: %d\n", well_recognised);
printf("False positives: %d\n", false_positives);
printf("False positives rate: %.5f\n", (float)false_positives / (float)nver);

if (print_mode == 3 || print_mode == 4) {
path = "fp" + buf + ".csv";
rate_file.open(path.c_str());
rate_file.setf(std::ios_base::fixed, std::ios_base::floatfield);
rate_file.precision(5);
// area-related parameters:
// area,false positives,false positives rate
rate_file << "area" << ";" << "false positives" << ";" << "rate" << std::endl;
for (int j = 1; j < narea + 1; j++) {
rate_file << j << ";" << area_fp[j] << ";" << (float)area_fp[j] / (float)nver << std::endl;
}
rate_file.close();
}

myfile.close();
}
else {
Expand Down

0 comments on commit a26dc45

Please sign in to comment.