Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add 'add_leaf' and 'remove_leaf' methods #38

Merged
merged 9 commits into from
Dec 10, 2024
176 changes: 170 additions & 6 deletions phylo2vec/src/tree_vec/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use crate::utils::sample;

pub mod ops;
use ops::{
build_vector, find_coords_of_first_leaf, order_cherries, order_cherries_no_parents, Ancestry,
};

/// A vector representation of a phylogenetic tree
///
/// Contains the tree structure, branch lengths, taxa, and rootedness
#[derive(Debug, PartialEq, Clone)]
pub struct TreeVec {
n_leaf: usize,
Expand All @@ -11,7 +17,17 @@ pub struct TreeVec {
is_rooted: bool,
}

/// Implementation of the `TreeVec` struct
impl TreeVec {
/// Creates a new `TreeVec` instance
///
/// # Arguments
/// * `data` - Vector containing the tree structure
/// * `branch_lengths` - Optional vector of branch length tuples (start, end)
/// * `taxa` - Optional vector of taxon names
///
/// # Returns
/// A new `TreeVec` instance with the specified data and properties
pub fn new(
data: Vec<usize>,
branch_lengths: Option<Vec<(f64, f64)>>,
Expand All @@ -27,25 +43,127 @@ impl TreeVec {
}
}

/// Creates a new random tree with specified number of leaves
///
/// # Arguments
/// * `n_leaves` - Number of leaves in the tree
/// * `ordering` - Whether to maintain ordered structure
///
/// # Returns
/// A new randomly generated `TreeVec` instance
pub fn from_sample(n_leaves: usize, ordering: bool) -> Self {
let v = sample(n_leaves, ordering);
TreeVec::new(v, None, None)
}

/// Converts the tree to Newick format
///
/// # Returns
/// A String containing the Newick representation of the tree
pub fn to_newick(&self) -> String {
return ops::to_newick(&self.data);
}

pub fn get_ancestry(&self) -> Vec<[usize; 3]> {
/// Gets the ancestry matrix representation of the tree
///
/// # Returns
/// An `Ancestry` type containing parent-child relationships
pub fn get_ancestry(&self) -> Ancestry {
return ops::get_ancestry(&self.data);
}

pub fn add_leaf(leaf: usize, branch: usize) -> Self {
unimplemented!();
/// Adds a new leaf to the tree
///
/// # Arguments
/// * `leaf` - Index of the new leaf to add
/// * `branch` - Index of the branch to attach the leaf to
///
/// # Side effects
/// Modifies the tree structure by adding the new leaf and updating indices
pub fn add_leaf(&mut self, leaf: usize, branch: usize) {
self.data.push(branch);

let mut ancestry_add = self.get_ancestry();

println!("{:?}", ancestry_add);
let mut found_first_leaf = false;
for r in 0..ancestry_add.len() {
for c in 0..3 {
if !found_first_leaf && ancestry_add[r][c] == self.data.len() {
// Find the indices of the first leaf
// and then set the value to the new leaf
ancestry_add[r][c] = leaf;
found_first_leaf = true;
} else if ancestry_add[r][c] >= leaf {
ancestry_add[r][c] += 1;
}
}
}

// ancestry_add[leaf_coords][leaf_col] = leaf as isize;
// let ancestry_add_ref = &mut ancestry_add;
order_cherries(&mut ancestry_add);
order_cherries_no_parents(&mut ancestry_add);
self.data = build_vector(ancestry_add);
}

pub fn remove_leaf(leaf: usize) -> Self {
unimplemented!();
/// Removes a leaf from the tree
///
/// # Arguments
/// * `leaf` - Index of the leaf to remove
///
/// # Returns
/// The index of the sister node of the removed leaf
///
/// # Side effects
/// Modifies the tree structure by removing the leaf and updating indices
pub fn remove_leaf(&mut self, leaf: usize) -> usize {
let ancestry = self.get_ancestry();
let leaf_coords = find_coords_of_first_leaf(&ancestry, leaf);
let leaf_row = leaf_coords.0;
let leaf_col = leaf_coords.1;

// Find the parent of the leaf to remove
let parent = ancestry[leaf_row][2];
let sister = ancestry[leaf_row][1 - leaf_col];
let num_cherries = ancestry.len();

let mut ancestry_rm = Vec::with_capacity(num_cherries - 1);

for r in 0..num_cherries - 1 {
let mut new_row = if r < leaf_row {
ancestry[r].clone()
} else {
ancestry[r + 1].clone()
};

for c in 0..3 {
let mut node = new_row[c];

if node == parent {
node = sister;
}

// Subtract 1 for leaves > "leaf"
// (so that the vector is still valid)
if node > leaf {
node -= 1;
if node >= parent {
node -= 1;
}
}

new_row[c] = node;
}

ancestry_rm.push(new_row);
}

order_cherries(&mut ancestry_rm);
order_cherries_no_parents(&mut ancestry_rm);
self.data = build_vector(ancestry_rm);

return sister;
}
}

Expand All @@ -71,6 +189,9 @@ mod tests {
assert_eq!(tree.taxa, None);
}

/// Test the creation of a new tree from a sample
///
/// Tests are using 50 leaf tree with ordering and no ordering
#[rstest]
#[case(50, true)]
#[case(50, false)]
Expand All @@ -82,6 +203,9 @@ mod tests {
assert_eq!(tree.taxa, None);
}

/// Test the conversion of a tree to Newick format
///
/// Tests are using 5 or less leaf tree with different structures
#[rstest]
#[case(vec![0, 0, 0, 1, 3], "(((0,(3,5)6)8,2)9,(1,4)7)10;")]
#[case(vec![0, 1, 2, 3, 4], "(0,(1,(2,(3,(4,5)6)7)8)9)10;")]
Expand All @@ -92,6 +216,9 @@ mod tests {
assert_eq!(newick, expected);
}

/// Test the retrieval of the ancestry matrix
///
/// Tests are using 5 or less leaf tree with different structures
#[rstest]
#[case(vec![0, 0, 0, 1, 3], vec![[3, 5, 6],
[1, 4, 7],
Expand All @@ -105,9 +232,46 @@ mod tests {
#[case(vec![0, 0, 1], vec![[1, 3, 4],
[0, 2, 5],
[5, 4, 6]])]
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Vec<[usize; 3]>) {
fn test_get_ancestry(#[case] v: Vec<usize>, #[case] expected: Ancestry) {
let tree = TreeVec::new(v, None, None);
let ancestry = tree.get_ancestry();
assert_eq!(ancestry, expected);
}

/// Test the addition of a new leaf to the tree
///
/// Tests are using 6 leaf tree with different leaf and branch indices
#[rstest]
#[case(vec![0, 1, 2, 5, 4, 2], 5, 3, vec![0, 1, 2, 5, 3, 4, 2])]
#[case(vec![0, 1, 2, 5, 4, 2], 7, 0, vec![0, 1, 2, 5, 4, 2, 0])]
#[case(vec![0, 1, 2, 5, 4, 2], 7, 2, vec![0, 1, 2, 5, 4, 2, 2])]
fn test_add_leaf(
#[case] v: Vec<usize>,
#[case] leaf: usize,
#[case] branch: usize,
#[case] expected: Vec<usize>,
) {
let mut tree = TreeVec::new(v, None, None);
tree.add_leaf(leaf, branch);
assert_eq!(tree.data, expected);
}

/// Test the removal of a leaf from the tree
///
/// Tests are using 6 leaf tree with different leaf and sister branch indices
#[rstest]
#[case(vec![0, 1, 2, 5, 4, 2], 5, 4, vec![0, 1, 2, 5, 2])]
#[case(vec![0, 1, 2, 5, 4, 2], 6, 2, vec![0, 1, 2, 5, 4])]
#[case(vec![0, 1, 2, 5, 4, 2], 0, 11, vec![0, 1, 4, 3, 1])]
fn test_remove_leaf(
#[case] v: Vec<usize>,
#[case] leaf: usize,
#[case] branch: usize,
#[case] expected: Vec<usize>,
) {
let mut tree = TreeVec::new(v, None, None);
let sister = tree.remove_leaf(leaf);
assert_eq!(tree.data, expected);
assert_eq!(sister, branch);
}
}
5 changes: 4 additions & 1 deletion phylo2vec/src/tree_vec/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ pub mod avl;
pub mod vector;

#[allow(unused_imports)]
pub use vector::{build_newick, get_ancestry, get_pairs, get_pairs_avl, to_newick, Ancestry};
pub use vector::{
build_newick, build_vector, find_coords_of_first_leaf, get_ancestry, get_pairs, get_pairs_avl,
order_cherries, order_cherries_no_parents, to_newick, Ancestry,
};
Loading
Loading