Skip to content

Commit

Permalink
Merge pull request #17 from ornlneutronimaging/Improve_abs_efficiency
Browse files Browse the repository at this point in the history
Improve abs efficiency
  • Loading branch information
suannchong authored Aug 29, 2023
2 parents 4032448 + bac1584 commit 6c46d99
Show file tree
Hide file tree
Showing 7 changed files with 291 additions and 21 deletions.
2 changes: 1 addition & 1 deletion sophiread/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ include(GoogleTest)
file(COPY ${CMAKE_SOURCE_DIR}/resources/data DESTINATION ${CMAKE_BINARY_DIR})

# Add compiler flags
add_compile_options(-O3 -std=c++14 -pthread -Wall -Wextra ${OpenMP_CXX_FLAGS})
add_compile_options(-O3 -std=c++14 -march=native -ffast-math -pthread -Wall -Wextra ${OpenMP_CXX_FLAGS})

# Add Sophiread library
# NOTE: this will take care of the testing as well
Expand Down
33 changes: 33 additions & 0 deletions sophiread/SophireadLib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,36 @@ gtest_discover_tests(SophireadTests_CLUSTER)
add_executable(SophireadTests_PEAKFITTING tests/test_peakfitting.cpp)
target_link_libraries(SophireadTests_PEAKFITTING SophireadLib GTest::GTest GTest::Main hdf5 hdf5_cpp OpenMP::OpenMP_CXX)
gtest_discover_tests(SophireadTests_PEAKFITTING)

# ------------------ Benchmarks ------------------ #
# ABS
add_executable(
SophireadBenchmarks_ABS
benchmarks/benchmark_abs.cpp
)
target_link_libraries(
SophireadBenchmarks_ABS
SophireadLib
)
# symlink executable to the build directory
add_custom_command(TARGET SophireadLib POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
${PROJECT_BINARY_DIR}/SophireadLib/SophireadBenchmarks_ABS
${PROJECT_BINARY_DIR}/SophireadBenchmarks_ABS.app
)

add_executable(
SophireadBenchmarks_ABS_Thread
benchmarks/benchmark_abs_thread.cpp
)
target_link_libraries(
SophireadBenchmarks_ABS_Thread
SophireadLib
pthread
)
# symlink executable to the build directory
add_custom_command(TARGET SophireadLib POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
${PROJECT_BINARY_DIR}/SophireadLib/SophireadBenchmarks_ABS_Thread
${PROJECT_BINARY_DIR}/SophireadBenchmarks_ABS_Thread.app
)
102 changes: 102 additions & 0 deletions sophiread/SophireadLib/benchmarks/benchmark_abs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* @file abs.cpp
* @author Chen Zhang ([email protected])
* @brief Benchmark the performance of abs clustering method
* @version 0.1
* @date 2023-08-25
*
* @copyright Copyright (c) 2023
*
*/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <iostream>
#include <random>
#include <vector>

#include "abs.h"

using namespace std;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> pos(0, 2);
std::uniform_real_distribution<> tot(0, 100);
std::uniform_real_distribution<> toa(0, 1000);
std::uniform_real_distribution<> ftoa(0, 255);
std::uniform_real_distribution<> tof(0, 2000);
std::uniform_real_distribution<> spidertime(-1, 1);

/*
Target processing speed: 120,000,000 hits / sec -> 120 hits/us
-> 12 clusters, 10 hits each == 120 hits -> 1 us
-> 12000 clusters, 10 hits each == 120000 hits -> 1000 us
*/

std::vector<Hit> fake_hits() {
std::vector<Hit> hits;
// generate 12000 clusters of 10 hits each
for (int i = 0; i < 12000; i++) {
// cluster center
int x = 10 * i + pos(gen);
int y = 10 * i + pos(gen);
int stime = 10 * i + spidertime(gen);
// cluster
for (int j = 0; j < 10; j++) {
hits.push_back(Hit(x, y, tot(gen), toa(gen), ftoa(gen), tof(gen), stime));
}
}
return hits;
}

double run_single_test(std::vector<Hit> hits, double &fit_time,
double &events_time) {
// create ABS algorithm
ABS abs_alg(5.0, 1, 75);

// fit hits into clusters
auto start_fit = chrono::high_resolution_clock::now();
abs_alg.fit(hits);
auto end_fit = chrono::high_resolution_clock::now();
auto duration_fit =
chrono::duration_cast<chrono::microseconds>(end_fit - start_fit).count();
cout << "abs::fit " << duration_fit << " us" << endl;

// convert to neutron events
auto start_events = chrono::high_resolution_clock::now();
abs_alg.set_method("centroid");
auto events = abs_alg.get_events(hits);
auto end_events = chrono::high_resolution_clock::now();
auto duration_events =
chrono::duration_cast<chrono::microseconds>(end_events - start_events)
.count();
cout << "abs::get_events " << duration_events << " us" << endl;

fit_time += duration_fit;
events_time += duration_events;

// release memory
abs_alg.reset();

return duration_fit + duration_events;
}

int main() {
// generate fake hits
auto hits = fake_hits();

// run 100 tests and get the average
const int num_tests = 100;
double total_time = 0;
double fit_time = 0;
double events_time = 0;
for (int i = 0; i < num_tests; i++) {
total_time += run_single_test(hits, fit_time, events_time);
}
cout << "For 120,000 hits (ref time cap: 1000 us):" << endl
<< "Average total time: " << total_time / num_tests << " us" << endl
<< "Average fit time: " << fit_time / num_tests << " us" << endl
<< "Average events time: " << events_time / num_tests << " us" << endl;

return 0;
}
121 changes: 121 additions & 0 deletions sophiread/SophireadLib/benchmarks/benchmark_abs_thread.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/**
* @file benchmark_abs_pthread.cpp
* @author Chen Zhang ([email protected])
* @brief benchmark abs clustering method with std::thread
* @version 0.1
* @date 2023-08-25
*
* @copyright Copyright (c) 2023
*
*/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <iostream>
#include <random>
#include <thread>
#include <vector>

#include "abs.h"

using namespace std;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> pos(0, 2);
std::uniform_real_distribution<> tot(0, 100);
std::uniform_real_distribution<> toa(0, 1000);
std::uniform_real_distribution<> ftoa(0, 255);
std::uniform_real_distribution<> tof(0, 2000);
std::uniform_real_distribution<> spidertime(-1, 1);

/*
Target processing speed: 120,000,000 hits / sec -> 120 hits/us
-> 12 clusters, 10 hits each == 120 hits -> 1 us
-> 12000 clusters, 10 hits each == 120000 hits -> 1000 us
*/

std::vector<Hit> fake_hits() {
std::vector<Hit> hits;
const int num_clusters = 12000000;
const int num_hits_per_cluster = 10;
hits.reserve(num_clusters * num_hits_per_cluster);

// generate
for (int i = 0; i < num_clusters; i++) {
// cluster center
int x = 10 * i + pos(gen);
int y = 10 * i + pos(gen);
int stime = 10 * i + spidertime(gen);
// cluster
for (int j = 0; j < num_hits_per_cluster; j++) {
hits.emplace_back(
Hit(x, y, tot(gen), toa(gen), ftoa(gen), tof(gen), stime));
}
}
return hits;
}

struct thread_data {
std::vector<Hit>::const_iterator begin;
std::vector<Hit>::const_iterator end;

void run() {
ABS abs_alg(5.0, 1, 75);
// fit hits into clusters
abs_alg.fit(std::vector<Hit>(begin, end));

// get neutron events
abs_alg.get_events(std::vector<Hit>(begin, end));
}
};

double single_test(const std::vector<Hit>& hits, int num_thread) {
// record time
auto start = std::chrono::high_resolution_clock::now();

// chunk size
size_t chunk_size = hits.size() / num_thread;

std::vector<thread_data> thread_data_list(num_thread);
std::vector<std::thread> threads(num_thread);

// start threads
for (int i = 0; i < num_thread; ++i) {
thread_data_list[i].begin = hits.begin() + i * chunk_size;
thread_data_list[i].end = (i == num_thread - 1)
? hits.end()
: hits.begin() + (i + 1) * chunk_size;
threads[i] = std::thread(&thread_data::run, std::ref(thread_data_list[i]));
}

// join threads
for (int i = 0; i < num_thread; ++i) {
threads[i].join();
}

// record time
auto end = std::chrono::high_resolution_clock::now();

auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start)
.count() /
1e6;
cout << "[user]total " << duration << " sec" << endl;

return duration;
}

int main() {
// create fake hits
auto hits = fake_hits();
size_t num_threads = 64;

// run 100 tests and get the average
const int num_tests = 1000;
double total_time = 0;
for (int i = 0; i < num_tests; i++) {
total_time += single_test(hits, num_threads);
}
cout << "average " << total_time / num_tests << " sec" << endl;
return 0;
}
2 changes: 1 addition & 1 deletion sophiread/SophireadLib/include/abs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ABS : public ClusteringAlgorithm {
std::vector<int> clusterLabels_; // The cluster labels for each hit
std::vector<std::vector<int>> clusterIndices_; // The cluster indices for
// each cluster
const int numClusters_ = 128; // The number of clusters use in runtime
const int numClusters_ = 4; // The number of clusters use in runtime
unsigned long int m_min_cluster_size = 1; // The maximum cluster size
unsigned long int spiderTimeRange_ = 75; // The spider time range (in ns)
PeakFittingAlgorithm* peakFittingAlgorithm_; // The clustering algorithm
Expand Down
29 changes: 18 additions & 11 deletions sophiread/SophireadLib/src/abs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,28 +131,35 @@ std::vector<NeutronEvent> ABS::get_events(const std::vector<Hit>& data) {
std::max_element(clusterLabels_.begin(), clusterLabels_.end());
int max_label = *max_label_it;

// loop over all clusterIndices_
#pragma omp parallel for
// determine fitting algorithm
std::unique_ptr<PeakFittingAlgorithm> alg;
if (m_method == "centroid") {
alg = std::make_unique<Centroid>(true);
} else if (m_method == "fast_gaussian") {
alg = std::make_unique<FastGaussian>();
} else {
throw std::runtime_error("ERROR: peak fitting method not supported!");
}

// pre-allocate memory for events
events.reserve(max_label + 1);

// loop over all clusterIndices_
for (int label = 0; label <= max_label; label++) {
std::vector<Hit> cluster;
cluster.reserve(clusterIndices_[label].size());

for (auto& index : clusterIndices_[label]) {
cluster.push_back(data[index]);
}
if (cluster.size() < m_min_cluster_size) {
continue;
}

// get the neutron event
PeakFittingAlgorithm* alg;
if (m_method == "centroid") {
alg = new Centroid(true);
} else if (m_method == "fast_gaussian") {
alg = new FastGaussian();
} else {
throw std::runtime_error("ERROR: peak fitting method not supported!");
}
auto event = alg->fit(cluster);

// Add the event to the list
#pragma omp critical
if (event.getX() >= 0.0 && event.getY() >= 0.0) {
// x, y = -1 means a failed fit
events.push_back(event);
Expand Down
23 changes: 15 additions & 8 deletions sophiread/SophireadLib/src/centroid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,29 @@ NeutronEvent Centroid::fit(const std::vector<Hit>& data) {

if (weighted_by_tot) {
for (const auto& hit : data) {
x += DSCALE * hit.getX() * hit.getTOT();
y += DSCALE * hit.getY() * hit.getTOT();
tof += hit.getTOF();
tot += hit.getTOT();
const auto hit_x = hit.getX();
const auto hit_y = hit.getY();
const auto hit_tot = hit.getTOT();
const auto hit_tof = hit.getTOF();

x += DSCALE * hit_x * hit_tot;
y += DSCALE * hit_y * hit_tot;
tof += hit_tof;
tot += hit_tot;
}
x /= tot;
y /= tot;
const auto tot_inv = 1.0 / tot;
x *= tot_inv;
y *= tot_inv;
} else {
for (const auto& hit : data) {
x += DSCALE * hit.getX();
y += DSCALE * hit.getY();
tof += hit.getTOF();
tot += hit.getTOT();
}
x /= data.size();
y /= data.size();
const auto data_size_inv = 1.0 / data.size();
x *= data_size_inv;
y *= data_size_inv;
}

tof /= data.size();
Expand Down

0 comments on commit 6c46d99

Please sign in to comment.