Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Update Ancestry type alias from vector of tuples to vector of arrays #39

Merged
merged 3 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions phylo2vec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rand = "*"
[dev-dependencies]
rstest = "0.23.0"
criterion = { version = "0.5", features = ["html_reports"] }
ndarray = "*"


[[bench]]
Expand Down
183 changes: 183 additions & 0 deletions phylo2vec/benches/benchmarks/get_ancestry_dtype.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
use std::ops::Range;
use std::time::Duration;

use criterion::{criterion_group, BenchmarkId, Criterion};
use phylo2vec::tree_vec::ops;
use phylo2vec::utils::{is_unordered, sample};

pub type AncestryTuple = Vec<(usize, usize, usize)>;
pub type AncestryVec = Vec<[usize; 3]>;
pub type AncestryNDArray = ndarray::Array2<usize>;

const RANGE: Range<u32> = 8..18;

fn compare_get_ancestry_datatypes(c: &mut Criterion) {
let mut group = c.benchmark_group("get_ancestry_datatypes");
// compare the three functions with three different data types
for j in RANGE {
let i = 2_i32.checked_pow(j).unwrap() as usize;
let v = sample(i, true);
group.bench_with_input(BenchmarkId::new("tuple", i), &v, |b, v| {
b.iter(|| {
get_ancestry_tuple(v);
});
});
group.bench_with_input(BenchmarkId::new("vector", i), &v, |b, v| {
b.iter(|| {
get_ancestry_vec(v);
});
});
group.bench_with_input(BenchmarkId::new("ndarray", i), &v, |b, v| {
b.iter(|| {
get_ancestry_ndarray(v);
});
});
}
group.finish();
}

pub fn get_ancestry_tuple(v: &Vec<usize>) -> AncestryTuple {
let pairs: ops::vector::PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
// or unordered tree vector
match is_unordered(&v) {
true => {
pairs = ops::get_pairs_avl(&v);
}
false => {
pairs = ops::get_pairs(&v);
}
}
let num_of_leaves = v.len();
// Initialize Ancestry with capacity `k`
let mut ancestry: AncestryTuple = Vec::with_capacity(num_of_leaves);
// Keep track of child->highest parent relationship
let mut parents: Vec<isize> = vec![-1; 2 * num_of_leaves + 1];

for i in 0..num_of_leaves {
let (c1, c2) = pairs[i];

let parent_of_child1 = if parents[c1] != -1 {
parents[c1] as usize
} else {
c1
};
let parent_of_child2 = if parents[c2] != -1 {
parents[c2] as usize
} else {
c2
};

// Next parent
let next_parent = (num_of_leaves + i + 1) as isize;
ancestry.push((parent_of_child1, parent_of_child2, next_parent as usize));

// Update the parents of current children
parents[c1] = next_parent;
parents[c2] = next_parent;
}

ancestry
}

pub fn get_ancestry_vec(v: &Vec<usize>) -> AncestryVec {
let pairs: ops::vector::PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
// or unordered tree vector
match is_unordered(&v) {
true => {
pairs = ops::get_pairs_avl(&v);
}
false => {
pairs = ops::get_pairs(&v);
}
}
let num_of_leaves = v.len();
// Initialize Ancestry with capacity `k`
let mut ancestry: AncestryVec = Vec::with_capacity(num_of_leaves);
// Keep track of child->highest parent relationship
let mut parents: Vec<isize> = vec![-1; 2 * num_of_leaves + 1];

for i in 0..num_of_leaves {
let (c1, c2) = pairs[i];

let parent_of_child1 = if parents[c1] != -1 {
parents[c1] as usize
} else {
c1
};
let parent_of_child2 = if parents[c2] != -1 {
parents[c2] as usize
} else {
c2
};

// Next parent
let next_parent = (num_of_leaves + i + 1) as isize;
ancestry.push([parent_of_child1, parent_of_child2, next_parent as usize]);

// Update the parents of current children
parents[c1] = next_parent;
parents[c2] = next_parent;
}

ancestry
}

pub fn get_ancestry_ndarray(v: &Vec<usize>) -> AncestryNDArray {
let pairs: ops::vector::PairsVec;

// Determine the implementation to use
// based on whether this is an ordered
// or unordered tree vector
match is_unordered(&v) {
true => {
pairs = ops::get_pairs_avl(&v);
}
false => {
pairs = ops::get_pairs(&v);
}
}
let num_of_leaves = v.len();
// Initialize Ancestry with capacity `k`
let mut ancestry: AncestryNDArray = ndarray::Array2::zeros((num_of_leaves, 3));
// Keep track of child->highest parent relationship
let mut parents: Vec<isize> = vec![-1; 2 * num_of_leaves + 1];

for i in 0..num_of_leaves {
let (c1, c2) = pairs[i];

let parent_of_child1 = if parents[c1] != -1 {
parents[c1] as usize
} else {
c1
};
let parent_of_child2 = if parents[c2] != -1 {
parents[c2] as usize
} else {
c2
};

// Next parent
let next_parent = (num_of_leaves + i + 1) as isize;
ancestry[[i, 0]] = parent_of_child1;
ancestry[[i, 1]] = parent_of_child2;
ancestry[[i, 2]] = next_parent as usize;

// Update the parents of current children
parents[c1] = next_parent;
parents[c2] = next_parent;
}

ancestry
}

criterion_group! {
name = get_ancestry_datatypes;
config = Criterion::default().sample_size(10).measurement_time(Duration::from_millis(1000)).warm_up_time(Duration::from_millis(1000));
targets = compare_get_ancestry_datatypes
}
1 change: 1 addition & 0 deletions phylo2vec/benches/benchmarks/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod get_ancestry_dtype;
pub mod get_pairs;
28 changes: 14 additions & 14 deletions phylo2vec/src/tree_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl TreeVec {
return ops::to_newick(&self.data);
}

pub fn get_ancestry(&self) -> Vec<(usize, usize, usize)> {
pub fn get_ancestry(&self) -> Vec<[usize; 3]> {
return ops::get_ancestry(&self.data);
}

Expand Down Expand Up @@ -93,19 +93,19 @@ mod tests {
}

#[rstest]
#[case(vec![0, 0, 0, 1, 3], vec![( 3, 5, 6),
( 1, 4, 7),
( 0, 6, 8),
( 8, 2, 9),
( 9, 7, 10)])]
#[case(vec![0, 1, 2, 3], vec![(3, 4, 5),
(2, 5, 6),
(1, 6, 7),
(0, 7, 8)])]
#[case(vec![0, 0, 1], vec![(1, 3, 4),
(0, 2, 5),
(5, 4, 6)])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Vec<(usize, usize, usize)>) {
#[case(vec![0, 0, 0, 1, 3], vec![[3, 5, 6],
[1, 4, 7],
[0, 6, 8],
[8, 2, 9],
[9, 7, 10]])]
#[case(vec![0, 1, 2, 3], vec![[3, 4, 5],
[2, 5, 6],
[1, 6, 7],
[0, 7, 8]])]
#[case(vec![0, 0, 1], vec![[1, 3, 4],
[0, 2, 5],
[5, 4, 6]])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Vec<[usize; 3]>) {
let tree = TreeVec::new(v, None, None);
let ancestry = tree.get_ancestry();
assert_eq!(ancestry, expected);
Expand Down
11 changes: 6 additions & 5 deletions phylo2vec/src/tree_vec/ops/vector.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::tree_vec::ops::avl::{AVLTree, Pair};
use crate::utils::is_unordered;

/// A type alias for the Ancestry type, which is a vector of tuples representing (child1, child2, parent)
pub type Ancestry = Vec<(usize, usize, usize)>;
/// A type alias for the Ancestry type, which is a vector of vectors representing [child1, child2, parent]
pub type Ancestry = Vec<[usize; 3]>;

/// A type alias for the PairsVec type, which is a vector of tuples representing (child1, child2)
pub type PairsVec = Vec<Pair>;
Expand Down Expand Up @@ -139,7 +139,7 @@ pub fn get_ancestry(v: &Vec<usize>) -> Ancestry {

// Next parent
let next_parent = (num_of_leaves + i + 1) as isize;
ancestry.push((parent_of_child1, parent_of_child2, next_parent as usize));
ancestry.push([parent_of_child1, parent_of_child2, next_parent as usize]);

// Update the parents of current children
parents[c1] = next_parent;
Expand All @@ -154,7 +154,8 @@ fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
let leaf_max = ancestry.len();

// Extract the children (c1, c2) and ignore the parent from the ancestry tuple
let (c1, c2, _) = ancestry[p - leaf_max - 1];
let c1 = ancestry[p - leaf_max - 1][0];
let c2 = ancestry[p - leaf_max - 1][1];

// Recursive calls for left and right children, checking if they are leaves or internal nodes
let left = if c1 > leaf_max {
Expand All @@ -176,7 +177,7 @@ fn _build_newick_recursive_inner(p: usize, ancestry: &Ancestry) -> String {
/// Build newick string from the ancestry matrix
pub fn build_newick(ancestry: &Ancestry) -> String {
// Get the root node, which is the parent value of the last ancestry element
let root = ancestry.last().unwrap().2;
let root = ancestry.last().unwrap()[2];

// Build the Newick string starting from the root, and append a semicolon
format!("{};", _build_newick_recursive_inner(root, ancestry))
Expand Down
6 changes: 3 additions & 3 deletions py-phylo2vec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ fn to_newick(input_vector: Vec<usize>) -> PyResult<String> {
}

#[pyfunction]
fn get_ancestry(input_vector: Vec<usize>) -> Vec<(usize, usize, usize)> {
let ancestry: Vec<(usize, usize, usize)> = ops::get_ancestry(&input_vector);
fn get_ancestry(input_vector: Vec<usize>) -> Vec<[usize; 3]> {
let ancestry: Vec<[usize; 3]> = ops::get_ancestry(&input_vector);

ancestry
}
Expand All @@ -31,7 +31,7 @@ fn get_pairs_avl(input_vector: Vec<usize>) -> Vec<(usize, usize)> {
}

#[pyfunction]
fn build_newick(input_ancestry: Vec<(usize, usize, usize)>) -> String {
fn build_newick(input_ancestry: Vec<[usize; 3]>) -> String {
let newick_string: String = ops::vector::build_newick(&input_ancestry);
newick_string
}
Expand Down
Loading