Skip to content

Commit

Permalink
Implement python bindings with basic example
Browse files Browse the repository at this point in the history
  • Loading branch information
nemakin authored and chernishev committed Sep 23, 2024
1 parent da9972d commit 988bbe6
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 25 deletions.
33 changes: 33 additions & 0 deletions examples/basic/verifying_pfd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import desbordante

ERROR = 0.2
PER_TUPLE = 'per_tuple'
PER_VALUE = 'per_value'
TABLE = 'examples/datasets/glitchy_sensor_2.csv'


def print_results(pfd_verifier):
error = pfd_verifier.get_error()
if error <= ERROR:
print('PFD holds')
else:
print(f'PFD with error {ERROR} does not hold')
print(f'instead it holds with error {error}')
print(f'Clusters violating PFD ({pfd_verifier.get_num_violating_clusters()}):')
for cluster in pfd_verifier.get_violating_clusters():
print(cluster)
print()


# Loading input data
algo = desbordante.pfd_verification.algorithms.PFDVerifier()
algo.load_data(table=(TABLE, ',', True))

algo.execute(lhs_indices=[1], rhs_indices=[2], error=ERROR, error_measure=PER_VALUE)
print('Checking whether pFD [device_id] -> [data]')
print(f'with error {ERROR} and PerValue error measure holds:')
print_results(algo)

algo.execute(lhs_indices=[1], rhs_indices=[2], error=ERROR, error_measure=PER_TUPLE)
print('Checking whether the same PFD holds for PerTuple error measure:')
print_results(algo)
11 changes: 11 additions & 0 deletions examples/datasets/glitchy_sensor_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Id,DeviceId,Data
1,D-1,1001
2,D-1,1002
3,D-1,1003
4,D-1,1004
5,D-1,1005
6,D-1,1006
7,D-2,1000
8,D-3,1000
9,D-4,1000
10,D-5,1000
2 changes: 1 addition & 1 deletion src/core/algorithms/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "algorithms/cfd/mining_algorithms.h"
#include "algorithms/dd/mining_algorithms.h"
#include "algorithms/fd/mining_algorithms.h"
#include "algorithms/fd/pfdtane/verification_algorithms.h"
#include "algorithms/fd/verification_algorithms.h"
#include "algorithms/gfd/verification_algorithms.h"
#include "algorithms/ind/mining_algorithms.h"
Expand All @@ -13,4 +14,3 @@
#include "algorithms/statistics/algorithms.h"
#include "algorithms/ucc/mining_algorithms.h"
#include "algorithms/ucc/verification_algorithms.h"
#include "algorithms/fd/pfdtane/verification_algorithms.h"
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include "config/error/type.h"
#include "config/indices/type.h"
#include "config/error_measure/type.h"
#include "config/indices/type.h"
#include "model/table/column_layout_relation_data.h"
Expand All @@ -12,7 +11,6 @@ namespace algos {
class PFDStatsCalculator {
private:
std::shared_ptr<ColumnLayoutRelationData> relation_;
config::ErrorType max_fd_error_;
config::ErrorMeasureType error_measure_;

std::vector<model::PLI::Cluster> clusters_violating_pfd_;
Expand All @@ -21,19 +19,15 @@ class PFDStatsCalculator {

public:
explicit PFDStatsCalculator(std::shared_ptr<ColumnLayoutRelationData> relation,
config::ErrorMeasureType measure, config::ErrorType max_fd_error)
: relation_(std::move(relation)), max_fd_error_(max_fd_error), error_measure_(measure) {}
config::ErrorMeasureType measure)
: relation_(std::move(relation)), error_measure_(measure) {}

void ResetState() {
clusters_violating_pfd_.clear();
num_rows_violating_pfd_ = 0;
error_ = 0;
}

bool PFDHolds() const {
return error_ <= max_fd_error_;
}

size_t GetNumViolatingClusters() const {
return clusters_violating_pfd_.size();
}
Expand Down
7 changes: 2 additions & 5 deletions src/core/algorithms/fd/pfdtane/pfd_verifier/pfd_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "config/names.h"
#include "config/tabular_data/input_table/option.h"
#include "equal_nulls/option.h"
#include "error/option.h"
#include "error_measure/option.h"
#include "indices/option.h"

Expand All @@ -20,12 +19,11 @@ void PFDVerifier::RegisterOptions() {
RegisterOption(config::kLhsIndicesOpt(&lhs_indices_, get_schema_cols));
RegisterOption(config::kRhsIndicesOpt(&rhs_indices_, get_schema_cols));
RegisterOption(config::kErrorMeasureOpt(&error_measure_));
RegisterOption(config::kErrorOpt(&max_fd_error_));
}

void PFDVerifier::MakeExecuteOptsAvailable() {
using namespace config::names;
MakeOptionsAvailable({kLhsIndices, kRhsIndices, kErrorMeasure, kError});
MakeOptionsAvailable({kLhsIndices, kRhsIndices, kErrorMeasure});
}

void PFDVerifier::LoadDataInternal() {
Expand All @@ -37,8 +35,7 @@ void PFDVerifier::LoadDataInternal() {

unsigned long long PFDVerifier::ExecuteInternal() {
auto start_time = std::chrono::system_clock::now();
stats_calculator_ =
std::make_unique<PFDStatsCalculator>(relation_, error_measure_, max_fd_error_);
stats_calculator_ = std::make_unique<PFDStatsCalculator>(relation_, error_measure_);
VerifyPFD();
auto elapsed_milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now() - start_time);
Expand Down
6 changes: 0 additions & 6 deletions src/core/algorithms/fd/pfdtane/pfd_verifier/pfd_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class PFDVerifier : public Algorithm {
config::IndicesType lhs_indices_;
config::IndicesType rhs_indices_;
config::EqNullsType is_null_equal_null_;
config::ErrorType max_fd_error_;
config::ErrorMeasureType error_measure_ = +ErrorMeasure::per_tuple;

std::shared_ptr<ColumnLayoutRelationData> relation_;
Expand All @@ -41,11 +40,6 @@ class PFDVerifier : public Algorithm {
std::shared_ptr<model::PLI const> CalculatePLI(config::IndicesType const& indices) const;

public:
bool PFDHolds() const {
assert(stats_calculator_);
return stats_calculator_->PFDHolds();
}

size_t GetNumViolatingClusters() const {
assert(stats_calculator_);
return stats_calculator_->GetNumViolatingClusters();
Expand Down
4 changes: 3 additions & 1 deletion src/python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "nd/bind_nd.h"
#include "nd/bind_nd_verification.h"
#include "od/bind_od.h"
#include "pfd/bind_pfd_verification.h"
#include "statistics/bind_statistics.h"
#include "ucc/bind_ucc.h"
#include "ucc/bind_ucc_verification.h"
Expand All @@ -40,7 +41,8 @@ PYBIND11_MODULE(desbordante, module, pybind11::mod_gil_not_used()) {
for (auto bind_func :
{BindMainClasses, BindDataTypes, BindFd, BindCfd, BindAr, BindUcc, BindAc, BindOd, BindNd,
BindFdVerification, BindMfdVerification, BindUccVerification, BindStatistics, BindInd,
BindGfdVerification, BindSplit, BindDynamicFdVerification, BindNdVerification}) {
BindGfdVerification, BindSplit, BindDynamicFdVerification, BindNdVerification,
BindPfdVerification}) {
bind_func(module);
}
}
Expand Down
25 changes: 25 additions & 0 deletions src/python_bindings/pfd/bind_pfd_verification.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "bind_pfd_verification.h"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#include "algorithms/fd/pfdtane/pfd_verifier/pfd_verifier.h"
#include "py_util/bind_primitive.h"

namespace {
namespace py = pybind11;
} // namespace

namespace python_bindings {
void BindPfdVerification(py::module_& main_module) {
using namespace algos;
auto pfd_verification_module = main_module.def_submodule("pfd_verification");

BindPrimitiveNoBase<PFDVerifier>(pfd_verification_module, "PFDVerifier")
.def("get_num_violating_clusters", &PFDVerifier::GetNumViolatingClusters)
.def("get_num_violating_rows", &PFDVerifier::GetNumViolatingRows)
.def("get_violating_clusters", &PFDVerifier::GetViolatingClusters)
.def("get_error", &PFDVerifier::GetError);
main_module.attr("pfd_verification") = pfd_verification_module;
}
} // namespace python_bindings
7 changes: 7 additions & 0 deletions src/python_bindings/pfd/bind_pfd_verification.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once

#include <pybind11/pybind11.h>

namespace python_bindings {
void BindPfdVerification(pybind11::module_& main_module);
} // namespace python_bindings
8 changes: 4 additions & 4 deletions src/tests/test_pfd_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ struct PFDVerifyingParams {
size_t num_violating_clusters, size_t num_violating_rows,
std::vector<model::PLI::Cluster> clusters_violating_pfd,
CSVConfig const& csv_config)
: params({{onam::kCsvConfig, csv_config},
: params({
{onam::kCsvConfig, csv_config},
{onam::kEqualNulls, true},
{onam::kLhsIndices, std::move(lhs_indices)},
{onam::kRhsIndices, std::move(rhs_indices)},
{onam::kErrorMeasure, error_measure},
{onam::kError, error}}),
}),
expected_error(error),
num_violating_clusters(num_violating_clusters),
num_violating_rows(num_violating_rows),
Expand All @@ -44,7 +45,6 @@ TEST_P(TestPFDVerifying, DefaultTest) {
auto verifier = algos::CreateAndLoadAlgorithm<algos::PFDVerifier>(p.params);
double const eps = 0.0001;
verifier->Execute();
EXPECT_TRUE(verifier->PFDHolds());
EXPECT_NEAR(p.expected_error, verifier->GetError(), eps);
EXPECT_EQ(p.num_violating_clusters, verifier->GetNumViolatingClusters());
EXPECT_EQ(p.num_violating_rows, verifier->GetNumViolatingRows());
Expand All @@ -70,4 +70,4 @@ INSTANTIATE_TEST_SUITE_P(
PFDVerifyingParams({5}, {1}, +algos::ErrorMeasure::per_tuple, 0.0, 0, 0,
{}, kTestFD)));

} // namespace tests
} // namespace tests

0 comments on commit 988bbe6

Please sign in to comment.