-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from ornlneutronimaging/Improve_abs_efficiency
Improve abs efficiency
- Loading branch information
Showing
7 changed files
with
291 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/** | ||
* @file abs.cpp | ||
* @author Chen Zhang ([email protected]) | ||
* @brief Benchmark the performance of abs clustering method | ||
* @version 0.1 | ||
* @date 2023-08-25 | ||
* | ||
* @copyright Copyright (c) 2023 | ||
* | ||
*/ | ||
#include <algorithm> | ||
#include <chrono> | ||
#include <cmath> | ||
#include <iostream> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include "abs.h" | ||
|
||
using namespace std; | ||
std::random_device rd; | ||
std::mt19937 gen(rd()); | ||
std::uniform_real_distribution<> pos(0, 2); | ||
std::uniform_real_distribution<> tot(0, 100); | ||
std::uniform_real_distribution<> toa(0, 1000); | ||
std::uniform_real_distribution<> ftoa(0, 255); | ||
std::uniform_real_distribution<> tof(0, 2000); | ||
std::uniform_real_distribution<> spidertime(-1, 1); | ||
|
||
/* | ||
Target processing speed: 120,000,000 hits / sec -> 120 hits/us | ||
-> 12 clusters, 10 hits each == 120 hits -> 1 us | ||
-> 12000 clusters, 10 hits each == 120000 hits -> 1000 us | ||
*/ | ||
|
||
std::vector<Hit> fake_hits() { | ||
std::vector<Hit> hits; | ||
// generate 12000 clusters of 10 hits each | ||
for (int i = 0; i < 12000; i++) { | ||
// cluster center | ||
int x = 10 * i + pos(gen); | ||
int y = 10 * i + pos(gen); | ||
int stime = 10 * i + spidertime(gen); | ||
// cluster | ||
for (int j = 0; j < 10; j++) { | ||
hits.push_back(Hit(x, y, tot(gen), toa(gen), ftoa(gen), tof(gen), stime)); | ||
} | ||
} | ||
return hits; | ||
} | ||
|
||
double run_single_test(std::vector<Hit> hits, double &fit_time, | ||
double &events_time) { | ||
// create ABS algorithm | ||
ABS abs_alg(5.0, 1, 75); | ||
|
||
// fit hits into clusters | ||
auto start_fit = chrono::high_resolution_clock::now(); | ||
abs_alg.fit(hits); | ||
auto end_fit = chrono::high_resolution_clock::now(); | ||
auto duration_fit = | ||
chrono::duration_cast<chrono::microseconds>(end_fit - start_fit).count(); | ||
cout << "abs::fit " << duration_fit << " us" << endl; | ||
|
||
// convert to neutron events | ||
auto start_events = chrono::high_resolution_clock::now(); | ||
abs_alg.set_method("centroid"); | ||
auto events = abs_alg.get_events(hits); | ||
auto end_events = chrono::high_resolution_clock::now(); | ||
auto duration_events = | ||
chrono::duration_cast<chrono::microseconds>(end_events - start_events) | ||
.count(); | ||
cout << "abs::get_events " << duration_events << " us" << endl; | ||
|
||
fit_time += duration_fit; | ||
events_time += duration_events; | ||
|
||
// release memory | ||
abs_alg.reset(); | ||
|
||
return duration_fit + duration_events; | ||
} | ||
|
||
int main() { | ||
// generate fake hits | ||
auto hits = fake_hits(); | ||
|
||
// run 100 tests and get the average | ||
const int num_tests = 100; | ||
double total_time = 0; | ||
double fit_time = 0; | ||
double events_time = 0; | ||
for (int i = 0; i < num_tests; i++) { | ||
total_time += run_single_test(hits, fit_time, events_time); | ||
} | ||
cout << "For 120,000 hits (ref time cap: 1000 us):" << endl | ||
<< "Average total time: " << total_time / num_tests << " us" << endl | ||
<< "Average fit time: " << fit_time / num_tests << " us" << endl | ||
<< "Average events time: " << events_time / num_tests << " us" << endl; | ||
|
||
return 0; | ||
} |
121 changes: 121 additions & 0 deletions
121
sophiread/SophireadLib/benchmarks/benchmark_abs_thread.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/** | ||
* @file benchmark_abs_pthread.cpp | ||
* @author Chen Zhang ([email protected]) | ||
* @brief benchmark abs clustering method with std::thread | ||
* @version 0.1 | ||
* @date 2023-08-25 | ||
* | ||
* @copyright Copyright (c) 2023 | ||
* | ||
*/ | ||
#include <algorithm> | ||
#include <chrono> | ||
#include <cmath> | ||
#include <iostream> | ||
#include <random> | ||
#include <thread> | ||
#include <vector> | ||
|
||
#include "abs.h" | ||
|
||
using namespace std; | ||
std::random_device rd; | ||
std::mt19937 gen(rd()); | ||
std::uniform_real_distribution<> pos(0, 2); | ||
std::uniform_real_distribution<> tot(0, 100); | ||
std::uniform_real_distribution<> toa(0, 1000); | ||
std::uniform_real_distribution<> ftoa(0, 255); | ||
std::uniform_real_distribution<> tof(0, 2000); | ||
std::uniform_real_distribution<> spidertime(-1, 1); | ||
|
||
/* | ||
Target processing speed: 120,000,000 hits / sec -> 120 hits/us | ||
-> 12 clusters, 10 hits each == 120 hits -> 1 us | ||
-> 12000 clusters, 10 hits each == 120000 hits -> 1000 us | ||
*/ | ||
|
||
std::vector<Hit> fake_hits() { | ||
std::vector<Hit> hits; | ||
const int num_clusters = 12000000; | ||
const int num_hits_per_cluster = 10; | ||
hits.reserve(num_clusters * num_hits_per_cluster); | ||
|
||
// generate | ||
for (int i = 0; i < num_clusters; i++) { | ||
// cluster center | ||
int x = 10 * i + pos(gen); | ||
int y = 10 * i + pos(gen); | ||
int stime = 10 * i + spidertime(gen); | ||
// cluster | ||
for (int j = 0; j < num_hits_per_cluster; j++) { | ||
hits.emplace_back( | ||
Hit(x, y, tot(gen), toa(gen), ftoa(gen), tof(gen), stime)); | ||
} | ||
} | ||
return hits; | ||
} | ||
|
||
struct thread_data { | ||
std::vector<Hit>::const_iterator begin; | ||
std::vector<Hit>::const_iterator end; | ||
|
||
void run() { | ||
ABS abs_alg(5.0, 1, 75); | ||
// fit hits into clusters | ||
abs_alg.fit(std::vector<Hit>(begin, end)); | ||
|
||
// get neutron events | ||
abs_alg.get_events(std::vector<Hit>(begin, end)); | ||
} | ||
}; | ||
|
||
double single_test(const std::vector<Hit>& hits, int num_thread) { | ||
// record time | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
|
||
// chunk size | ||
size_t chunk_size = hits.size() / num_thread; | ||
|
||
std::vector<thread_data> thread_data_list(num_thread); | ||
std::vector<std::thread> threads(num_thread); | ||
|
||
// start threads | ||
for (int i = 0; i < num_thread; ++i) { | ||
thread_data_list[i].begin = hits.begin() + i * chunk_size; | ||
thread_data_list[i].end = (i == num_thread - 1) | ||
? hits.end() | ||
: hits.begin() + (i + 1) * chunk_size; | ||
threads[i] = std::thread(&thread_data::run, std::ref(thread_data_list[i])); | ||
} | ||
|
||
// join threads | ||
for (int i = 0; i < num_thread; ++i) { | ||
threads[i].join(); | ||
} | ||
|
||
// record time | ||
auto end = std::chrono::high_resolution_clock::now(); | ||
|
||
auto duration = | ||
std::chrono::duration_cast<std::chrono::microseconds>(end - start) | ||
.count() / | ||
1e6; | ||
cout << "[user]total " << duration << " sec" << endl; | ||
|
||
return duration; | ||
} | ||
|
||
int main() { | ||
// create fake hits | ||
auto hits = fake_hits(); | ||
size_t num_threads = 64; | ||
|
||
// run 100 tests and get the average | ||
const int num_tests = 1000; | ||
double total_time = 0; | ||
for (int i = 0; i < num_tests; i++) { | ||
total_time += single_test(hits, num_threads); | ||
} | ||
cout << "average " << total_time / num_tests << " sec" << endl; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters