Merge pull request #74 from soumyasen1809/rayon_test

[Proof of Concept] Initial support for parallelization using Rayon
Axect · Oct 16, 2024 · fc41e3c · fc41e3c
2 parents 8d7816b + 863ad6a
commit fc41e3c
Show file tree

Hide file tree

Showing 15 changed files with 948 additions and 202 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" }
 
 [dev-dependencies]
 float-cmp = "0.9"
+criterion = { version = "0.5.1", features = ["html_reports"] }
 
 [dependencies]
 csv = { version = "1.3", optional = true, default-features = false }
@@ -40,6 +41,7 @@ json = { version = "0.12", optional = true }
 arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true }
 num-complex = { version = "0.4", optional = true }
 lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] }
+rayon = "1.10"
 
 [package.metadata.docs.rs]
 rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"]
@@ -51,3 +53,8 @@ plot = ["pyo3"]
 nc = ["netcdf"]
 parquet = ["arrow2"]
 complex = ["num-complex", "matrixmultiply/cgemm"]
+
+[[bench]]
+path = "benches/parallel_rayon/matrix_benchmark.rs"
+name = "matrix_benchmark"
+harness = false
diff --git a/benches/data/rayon_matrix_benchmark_results.md b/benches/data/rayon_matrix_benchmark_results.md
@@ -0,0 +1,194 @@
+Lib used for benchmarking: Criterion
+Matrix size: 1000x1000
+
+Running benches/parallel_rayon/matrix_benchmark.rs
+
+ser_matrix_bench        time:   [535.12 µs 544.51 µs 556.68 µs]
+Found 11 outliers among 100 measurements (11.00%)
+  4 (4.00%) high mild
+  7 (7.00%) high severe
+
+par_matrix_bench        time:   [5.0912 ms 5.1431 ms 5.1995 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  1 (1.00%) low mild
+  5 (5.00%) high mild
+  1 (1.00%) high severe
+
+ser_py_matrix_bench     time:   [4.3100 ms 4.3309 ms 4.3544 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  2 (2.00%) high mild
+  5 (5.00%) high severe
+
+par_py_matrix_bench     time:   [11.667 ms 11.789 ms 11.920 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  6 (6.00%) high mild
+  4 (4.00%) high severe
+
+ser_matrix_change_shape_bench
+                        time:   [7.3630 ms 7.4075 ms 7.4608 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  1 (1.00%) high mild
+  4 (4.00%) high severe
+
+par_matrix_change_shape_bench
+                        time:   [10.276 ms 10.385 ms 10.499 ms]
+Found 3 outliers among 100 measurements (3.00%)
+  2 (2.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_extract_row_bench
+                        time:   [613.39 µs 622.44 µs 633.72 µs]
+Found 7 outliers among 100 measurements (7.00%)
+  7 (7.00%) high severe
+
+par_matrix_extract_row_bench
+                        time:   [5.4321 ms 5.4851 ms 5.5399 ms]
+Found 4 outliers among 100 measurements (4.00%)
+  4 (4.00%) high mild
+
+ser_matrix_from_index_bench
+                        time:   [2.4174 ms 2.4490 ms 2.4851 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  1 (1.00%) high mild
+  13 (13.00%) high severe
+
+par_matrix_from_index_bench
+                        time:   [2.3912 ms 2.4090 ms 2.4304 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  2 (2.00%) high mild
+  7 (7.00%) high severe
+
+ser_matrix_to_vec_bench time:   [2.4800 ms 2.5082 ms 2.5423 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  4 (4.00%) high mild
+  6 (6.00%) high severe
+
+par_matrix_to_vec_bench time:   [6.4041 ms 6.4618 ms 6.5250 ms]
+Found 6 outliers among 100 measurements (6.00%)
+  5 (5.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_to_diag_bench
+                        time:   [2.4335 ms 2.4526 ms 2.4750 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  6 (6.00%) high mild
+  8 (8.00%) high severe
+
+par_matrix_to_diag_bench
+                        time:   [13.514 ms 13.684 ms 13.868 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  7 (7.00%) high mild
+  3 (3.00%) high severe
+
+Benchmarking ser_matrix_submat_bench: Warming up for 3.0000 s
+Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.3s, enable flat sampling, or reduce sample count to 50.
+ser_matrix_submat_bench time:   [1.6077 ms 1.6243 ms 1.6451 ms]
+Found 16 outliers among 100 measurements (16.00%)
+  3 (3.00%) high mild
+  13 (13.00%) high severe
+
+par_matrix_submat_bench time:   [10.611 ms 10.761 ms 10.942 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  3 (3.00%) high mild
+  2 (2.00%) high severe
+
+ser_matrix_add_vec_bench
+                        time:   [7.3077 ms 7.3485 ms 7.3946 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  2 (2.00%) high mild
+  10 (10.00%) high severe
+
+par_matrix_add_vec_bench
+                        time:   [11.331 ms 11.480 ms 11.636 ms]
+Found 2 outliers among 100 measurements (2.00%)
+  2 (2.00%) high mild
+
+ser_matrix_norm_bench   time:   [5.1600 ms 5.1864 ms 5.2165 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  1 (1.00%) high mild
+  6 (6.00%) high severe
+
+par_matrix_norm_bench   time:   [2.6565 ms 2.6810 ms 2.7091 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  2 (2.00%) high mild
+  3 (3.00%) high severe
+
+Benchmarking ser_matrix_norm_bench #2: Warming up for 3.0000 s
+Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.9s, enable flat sampling, or reduce sample count to 50.
+ser_matrix_norm_bench #2
+                        time:   [1.7262 ms 1.7391 ms 1.7541 ms]
+Found 15 outliers among 100 measurements (15.00%)
+  10 (10.00%) high mild
+  5 (5.00%) high severe
+
+par_matrix_norm_bench #2
+                        time:   [6.7071 ms 6.7883 ms 6.8703 ms]
+Found 1 outliers among 100 measurements (1.00%)
+  1 (1.00%) high mild
+
+ser_matrix_norm_bench #3
+                        time:   [9.7582 ms 9.9006 ms 10.057 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  5 (5.00%) high mild
+  7 (7.00%) high severe
+
+par_matrix_norm_bench #3
+                        time:   [9.3004 ms 9.4088 ms 9.5239 ms]
+Found 1 outliers among 100 measurements (1.00%)
+  1 (1.00%) high mild
+
+ser_matrix_inner_prod_bench
+                        time:   [5.2730 ms 5.3590 ms 5.4583 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  3 (3.00%) high mild
+  11 (11.00%) high severe
+
+par_matrix_inner_prod_bench
+                        time:   [5.0987 ms 5.1644 ms 5.2402 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  3 (3.00%) high mild
+  4 (4.00%) high severe
+
+ser_matrix_hadamard_bench
+                        time:   [5.6521 ms 5.6870 ms 5.7262 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  3 (3.00%) high mild
+  9 (9.00%) high severe
+
+par_matrix_hadamard_bench
+                        time:   [14.155 ms 14.335 ms 14.527 ms]
+Found 4 outliers among 100 measurements (4.00%)
+  3 (3.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_take_row_bench
+                        time:   [3.7894 ms 3.8234 ms 3.8613 ms]
+Found 15 outliers among 100 measurements (15.00%)
+  7 (7.00%) high mild
+  8 (8.00%) high severe
+
+par_matrix_take_row_bench
+                        time:   [8.4008 ms 8.5171 ms 8.6523 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  6 (6.00%) high mild
+  3 (3.00%) high severe
+
+ser_matrix_fpmap_bench  time:   [3.2526 ms 3.2739 ms 3.2977 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  2 (2.00%) high mild
+  10 (10.00%) high severe
+
+par_matrix_fpmap_bench  time:   [10.604 ms 10.765 ms 10.937 ms]
+Found 11 outliers among 100 measurements (11.00%)
+  8 (8.00%) high mild
+  3 (3.00%) high severe
+
+ser_matrix_reduce_bench time:   [2.6748 ms 2.6964 ms 2.7201 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  6 (6.00%) high mild
+  3 (3.00%) high severe
+
+par_matrix_reduce_bench time:   [6.2453 ms 6.3198 ms 6.4034 ms]
+Found 6 outliers among 100 measurements (6.00%)
+  4 (4.00%) high mild
+  2 (2.00%) high severe
diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs
@@ -0,0 +1,103 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use peroxide::{
+    fuga::*,
+    traits::math::{ParallelInnerProduct, ParallelNormed},
+};
+
+pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) {
+    let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64);
+    let size: (usize, usize) = (1000, 1000);
+
+    // Result: 1000x1000 matrix: 2.3662 ms
+    cr.bench_function("ser_matrix_from_index_bench", |b| {
+        b.iter(|| black_box(Matrix::from_index(f, size)))
+    });
+
+    // Result: 1000x1000 matrix:  2.3355 ms
+    cr.bench_function("par_matrix_from_index_bench", |b| {
+        b.iter(|| black_box(Matrix::from_index(f, size)))
+    });
+}
+
+// Check: better parallel results (ran test 6 times)
+pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix: [5.5969 ms 5.7555 ms 5.9515 ms 6.0843 ms 6.3072 ms 6.5636 ms]
+    cr.bench_function("ser_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::Lpq(4.0, 2.0))))
+    });
+
+    // Result: 1000x1000 matrix: [3.1796 ms 3.2714 ms 3.3714 ms 3.6123 ms 3.7398 ms 3.8761 ms]
+    cr.bench_function("par_matrix_norm_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::Lpq(4.0, 2.0)))
+        })
+    });
+}
+
+pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix: 9.0287 ms
+    cr.bench_function("ser_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::L1)))
+    });
+
+    // Result: 1000x1000 matrix: 10.393 ms
+    cr.bench_function("par_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::L1)))
+    });
+}
+
+// Check: better parallel results (ran test 6 times)
+pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    let w: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 3.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix:  [5.1075 ms 5.1505 ms 5.2013 ms 5.7617 ms 6.0196 ms 6.3009 ms]
+    cr.bench_function("ser_matrix_inner_prod_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).dot(&matrix(
+                w.clone(),
+                1000,
+                1000,
+                Shape::Row,
+            )))
+        })
+    });
+
+    // Result: 1000x1000 matrix: [4.9931 ms 5.0244 ms 5.0642 ms 5.0322 ms 5.0819 ms 5.1404 ms]
+    cr.bench_function("par_matrix_inner_prod_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_dot(&matrix(
+                w.clone(),
+                1000,
+                1000,
+                Shape::Row,
+            )))
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    par_matrix_from_index_benchmark,
+    par_matrix_norm_lpq_benchmark,
+    par_matrix_norm_l1_benchmark,
+    par_matrix_inner_prod_benchmark,
+);
+criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
@@ -208,3 +208,5 @@ pub mod util;
 
 #[cfg(feature = "complex")]
 pub mod complex;
+
+extern crate rayon;