Skip to content

Commit

Permalink
v1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Dennis Rohde committed Feb 22, 2020
1 parent d71ddbf commit f907ab5
Show file tree
Hide file tree
Showing 22 changed files with 659 additions and 240 deletions.
24 changes: 19 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
project(Fred LANGUAGES CXX)
project(Fred LANGUAGES CXX C)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
Expand All @@ -12,15 +12,29 @@ include_directories(${CMAKE_SOURCE_DIR}/include)

find_package(PythonInterp REQUIRED)
find_package(PythonLibs REQUIRED)
find_package(Boost 1.63 COMPONENTS chrono ${BPY} ${BNPY} REQUIRED)
find_package(Boost 1.63 COMPONENTS system chrono ${BPY} ${BNPY} REQUIRED)
find_package(OpenMP REQUIRED)

add_definitions(-D_GLIBCXX_PARALLEL)

include_directories(${PYTHON_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
link_libraries(${PYTHON_LIBRARIES} ${Boost_LIBRARIES} OpenMP::OpenMP_CXX)
link_libraries(${PYTHON_LIBRARIES} ${Boost_LIBRARIES})

add_definitions(-D_GLIBCXX_PARALLEL)
if(OpenMP_CXX_FOUND)
link_libraries(OpenMP::OpenMP_CXX)
endif()
if(NOT TARGET OpenMP::OpenMP_CXX)
find_package(Threads REQUIRED)
add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
set_property(TARGET OpenMP::OpenMP_CXX
PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
set_property(TARGET OpenMP::OpenMP_CXX
PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)

endif()
link_libraries(OpenMP::OpenMP_CXX)

PYTHON_ADD_MODULE(Fred
PYTHON_ADD_MODULE(backend
src/fred_python_wrapper.cpp
src/curve.cpp
src/point.cpp
Expand Down
13 changes: 6 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
pre:
sudo apt install libboost-all-dev
sudo apt-get install python3-setuptools
sudo apt-get install python3-numpy
sudo apt-get install python3-pandas
sudo apt-get install python-setuptools
sudo apt-get install python-numpy
sudo apt-get install python-pandas
sudo apt install -y libboost-all-dev
sudo apt-get install -y python3-setuptools
sudo apt-get install -y python3-numpy
sudo apt-get install -y python-setuptools
sudo apt-get install -y python-numpy
sudo apt-get install -y cmake

python3:
cd py && python3 ./setup.py install --user
Expand Down
28 changes: 15 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
# Fred ![alt text](https://raw.githubusercontent.com/derohde/Fred/master/logo/logo.png "Fred logo")
A fast, scalable and light-weight C++ Fréchet distance library, exposed to python.

## Ingredients
## Ingredients C++ Backend
`import Fred.backend as fred`

- continous Fréchet distance
- signature: `Fred.continuous_frechet(curve1, curve2, approximation_error)` default for approximation_error is 0.001
- returns: `Fred.Continuous_Frechet_Result` with members `value`, `time_bounds`: running-time for upper and lower bound, `number_searches`: number of free space diagrams built, `time_searches`: running-time for free spaces
- signature: `fred.continuous_frechet(curve1, curve2, approximation_error)` default for approximation_error is 0.001
- returns: `fred.Continuous_Frechet_Result` with members `value`, `time_bounds`: running-time for upper and lower bound, `number_searches`: number of free space diagrams built, `time_searches`: running-time for free spaces
- discrete Fréchet distance
- signature: `Fred.discrete_frechet(curve1, curve2)`
- returns: `Fred.Discrete_Frechet_Result` with members `value` and `time`
- signature: `fred.discrete_frechet(curve1, curve2)`
- returns: `fred.Discrete_Frechet_Result` with members `value` and `time`
- discrete k-center clustering (continuous Fréchet) [Without simplification; from **Approximating (k,l)-center clustering for curves**](https://dl.acm.org/doi/10.5555/3310435.3310616)
- signature: `Fred.discrete_kcenter(k, curves, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `Fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- signature: `fred.discrete_kcenter(k, curves, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- discrete k-median clustering (continuous Fréchet) [Algorithm 6 in **Coresets for (k,l)-Clustering under the Fréchet distance**](https://arxiv.org/pdf/1901.01870.pdf)
- signature: `Fred.discrete_kmedian(k, curves, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `Fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- signature: `fred.discrete_kmedian(k, curves, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- discrete one-median clustering (continuous Fréchet) via sampling [Section 3 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves)
- signature: `Fred.discrete_onemedian_sampling(curves, epsilon_sampling, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `epsilon_sampling`: (1+epsilon) approximation parameter, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `Fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- signature: `fred.discrete_onemedian_sampling(curves, epsilon_sampling, approximation_error, with_assignment)` with parameters `approximation_error`: see continuous Fréchet, `epsilon_sampling`: (1+epsilon) approximation parameter, `with_assignment`: defaults to false; assigns curves to nearest centers if true
- returns: `fred.Clustering_Result` with mebers `value`: objective value, `time`, `assignment`: empty if with_assignment=false
- dimension reduction via. gaussian random projection [Section 2 in **Random Projections and Sampling Algorithms for Clustering of High Dimensional Polygonal Curves**](https://papers.nips.cc/paper/9443-random-projections-and-sampling-algorithms-for-clustering-of-high-dimensional-polygonal-curves)
- signature: `Fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate)
- returns: `Fred.Curves` collection of curves
- signature: `fred.dimension_reduction(curves, epsilon, empirical_constant)` with parameters `epsilon`: (1+epsilon) approximation parameter, `empirical_constant`: use constant of empirical study (faster, but less accurate)
- returns: `fred.Curves` collection of curves

## Installation
Get requirements under Ubuntu: `make pre`
Expand Down
64 changes: 35 additions & 29 deletions include/clustering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,25 @@ struct Clustering_Result {
};


inline void cheap_dist(const curve_size_t i, const curve_size_t j, const Curves &in, std::vector<std::vector<distance_t>> &distances, const distance_t eps) {
inline void cheap_dist(const curve_size_t i, const curve_size_t j, const Curves &in, std::vector<std::vector<distance_t>> &distances,
const distance_t eps, const bool round) {
if (distances[i][j] < 0) {
const auto dist = Frechet::Continuous::distance(in[i], in[j], eps);
const auto dist = Frechet::Continuous::distance(in[i], in[j], eps, round);
distances[j][i] = dist.value;
distances[i][j] = dist.value;
}
}

inline curve_size_t getNearestCenter(const curve_size_t i, const Curves &in, const Centers &centers,
std::vector<std::vector<distance_t>> &distances, const distance_t eps) {
inline curve_size_t nearest_center(const curve_size_t i, const Curves &in, const Centers &centers,
std::vector<std::vector<distance_t>> &distances, const distance_t eps, const bool round) {
const auto infty = std::numeric_limits<distance_t>::infinity();
// cost for curve is infinity
auto min_cost_elem = infty;
curve_size_t nearest = 0;

// except there is a center with smaller cost, then choose the one with smallest cost
for (curve_size_t j = 0; j < centers.size(); ++j) {
cheap_dist(i, centers[j], in, distances, eps);
cheap_dist(i, centers[j], in, distances, eps, round);
if (distances[i][centers[j]] < min_cost_elem) {
min_cost_elem = distances[i][centers[j]];
nearest = j;
Expand All @@ -80,14 +81,15 @@ inline curve_size_t getNearestCenter(const curve_size_t i, const Curves &in, con
return nearest;
}

inline auto curve_cost(const curve_size_t i, const Curves &in, const Centers &centers, std::vector<std::vector<distance_t>> &distances, const distance_t eps) {
inline auto curve_cost(const curve_size_t i, const Curves &in, const Centers &centers,
std::vector<std::vector<distance_t>> &distances, const distance_t eps, const bool round) {
const auto infty = std::numeric_limits<distance_t>::infinity();
// cost for curve is infinity
auto min_cost_elem = infty;

// except there is a center with smaller cost, then choose the one with smallest cost
for (curve_size_t j = 0; j < centers.size(); ++j) {
cheap_dist(i, centers[j], in, distances, eps);
cheap_dist(i, centers[j], in, distances, eps, round);
if (distances[i][centers[j]] < min_cost_elem) {
min_cost_elem = distances[i][centers[j]];
}
Expand All @@ -96,31 +98,34 @@ inline auto curve_cost(const curve_size_t i, const Curves &in, const Centers &ce
return min_cost_elem;
}

inline auto center_cost_sum(const Curves &in, const Centers &centers, std::vector<std::vector<distance_t>> &distances, const distance_t eps) {
double cost = 0.0;
inline auto center_cost_sum(const Curves &in, const Centers &centers,
std::vector<std::vector<distance_t>> &distances, const distance_t eps, const bool round) {
distance_t cost = 0.0;

// for all curves
for (curve_size_t i = 0; i < in.size(); ++i) {
const auto min_cost_elem = curve_cost(i, in, centers, distances, eps);
const auto min_cost_elem = curve_cost(i, in, centers, distances, eps, round);
cost += min_cost_elem;
}
return cost;
}

inline Cluster_Assignment getClusterAssignment(const Curves &in, const Centers &centers, std::vector<std::vector<distance_t>> &distances, const distance_t eps) {
inline Cluster_Assignment cluster_assignment(const Curves &in, const Centers &centers,
std::vector<std::vector<distance_t>> &distances, const distance_t eps, const bool round) {
Cluster_Assignment result;
const auto k = centers.size();

if (k == 0) return result;

for (curve_size_t i = 0; i < k; ++i) result.emplace(i, std::vector<curve_size_t>());

for (curve_size_t i = 0; i < in.size(); ++i) result[getNearestCenter(i, in, centers, distances, eps)].push_back(i);
for (curve_size_t i = 0; i < in.size(); ++i) result[nearest_center(i, in, centers, distances, eps, round)].push_back(i);

return result;
}

Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, const distance_t eps, const bool arya = false, const bool with_assignment = false) {
Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, const distance_t eps,
const bool round = true, const bool arya = false, const bool with_assignment = false) {
const auto start = boost::chrono::process_real_cpu_clock::now();
Clustering_Result result;

Expand Down Expand Up @@ -151,7 +156,7 @@ Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, con
// all curves
for (curve_size_t j = 0; j < in.size(); ++j) {

auto curr_curve_cost = curve_cost(j, in, centers, distances, eps);
auto curr_curve_cost = curve_cost(j, in, centers, distances, eps, round);

if (curr_curve_cost > curr_maxdist) {
curr_maxdist = curr_curve_cost;
Expand All @@ -170,7 +175,7 @@ Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, con

if (arya) {

auto cost = center_cost_sum(in, centers, distances, eps);
auto cost = center_cost_sum(in, centers, distances, eps, round);
auto approxcost = cost;
auto gamma = 1/(3 * num_centers * in.size());
auto found = false;
Expand All @@ -191,7 +196,7 @@ Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, con
// swap
curr_centers[i] = j;
// new cost
auto curr_cost = center_cost_sum(in, curr_centers, distances, eps);
auto curr_cost = center_cost_sum(in, curr_centers, distances, eps, round);
// check if improvement is done
if (cost - gamma * approxcost > curr_cost) {
cost = curr_cost;
Expand All @@ -206,7 +211,7 @@ Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, con
}

if (with_assignment) {
result.assignment = getClusterAssignment(in, centers, distances, eps);
result.assignment = cluster_assignment(in, centers, distances, eps, round);
}

auto end = boost::chrono::process_real_cpu_clock::now();
Expand All @@ -216,11 +221,12 @@ Clustering_Result gonzalez(const curve_size_t num_centers, const Curves &in, con
return result;
}

Clustering_Result arya(const curve_size_t num_centers, const Curves &in, const distance_t eps, const bool with_assignment = false) {
return gonzalez(num_centers, in, eps, true, with_assignment);
Clustering_Result arya(const curve_size_t num_centers, const Curves &in, const distance_t eps, const bool round = true, const bool with_assignment = false) {
return gonzalez(num_centers, in, eps, round, true, with_assignment);
}

Clustering_Result one_median_sampling(const double epsilon, const Curves &in, const distance_t eps, const bool with_assignment = false) {
Clustering_Result one_median_sampling(const double epsilon, const Curves &in, const distance_t eps,
const bool round = true, const bool with_assignment = false) {
const auto start = boost::chrono::process_real_cpu_clock::now();
Clustering_Result result;
Centers centers;
Expand All @@ -230,7 +236,7 @@ Clustering_Result one_median_sampling(const double epsilon, const Curves &in, co
const auto s = std::ceil(60);
const auto t = std::ceil(std::log(60)/(epsilon*epsilon));

Uniform_Random_Generator<double> ugen;
Random::Uniform_Random_Generator<double> ugen;

const auto candidates = ugen.get(s);
const auto witnesses = ugen.get(t);
Expand All @@ -244,12 +250,12 @@ Clustering_Result one_median_sampling(const double epsilon, const Curves &in, co
for (curve_size_t i = 0; i < candidates.size(); ++i) {

const curve_size_t candidate = std::floor(candidates[i] * n);
double objective = 0;
distance_t objective = 0;

for (curve_size_t j = 0; j < witnesses.size(); ++j) {
const curve_size_t witness = std::floor(witnesses[j] * n);

cheap_dist(candidate, witness, in, distances, eps);
cheap_dist(candidate, witness, in, distances, eps, round);
objective += distances[candidate][witness];
}

Expand All @@ -261,17 +267,17 @@ Clustering_Result one_median_sampling(const double epsilon, const Curves &in, co
centers.push_back(best_candidate);

if (with_assignment) {
result.assignment = getClusterAssignment(in, centers, distances, eps);
result.assignment = cluster_assignment(in, centers, distances, eps, round);
}

auto end = boost::chrono::process_real_cpu_clock::now();
result.centers = centers;
result.value = center_cost_sum(in, centers, distances, eps);
result.value = center_cost_sum(in, centers, distances, eps, round);
result.running_time = (end-start).count() / 1000000000.0;
return result;
}

Clustering_Result one_median_exhaustive(const Curves &in, const distance_t eps, const bool with_assignment = false) {
Clustering_Result one_median_exhaustive(const Curves &in, const distance_t eps, const bool round = true, const bool with_assignment = false) {
const auto start = boost::chrono::process_real_cpu_clock::now();
Clustering_Result result;
Centers centers;
Expand All @@ -286,10 +292,10 @@ Clustering_Result one_median_exhaustive(const Curves &in, const distance_t eps,

for (curve_size_t i = 0; i < in.size(); ++i) {

double objective = 0;
distance_t objective = 0;

for (curve_size_t j = 0; j < in.size(); ++j) {
cheap_dist(i, j, in, distances, eps);
cheap_dist(i, j, in, distances, eps, round);
objective += distances[i][j];
}

Expand All @@ -301,7 +307,7 @@ Clustering_Result one_median_exhaustive(const Curves &in, const distance_t eps,
centers.push_back(best_candidate);

if (with_assignment) {
result.assignment = getClusterAssignment(in, centers, distances, eps);
result.assignment = cluster_assignment(in, centers, distances, eps, round);
}

auto end = boost::chrono::process_real_cpu_clock::now();
Expand Down
Loading

0 comments on commit f907ab5

Please sign in to comment.