Skip to content

Commit

Permalink
chore: Sync the current main branch (#35)
Browse files Browse the repository at this point in the history
* feat: add get_common_ancestor between two nodes

Co-authored-by: molkjar <[email protected]>

* bump numpy version; add integer conversion in get_common_ancestor for numba

* chore: Update dependencies to reflect main

---------

Co-authored-by: Neclow <[email protected]>
Co-authored-by: molkjar <[email protected]>
  • Loading branch information
3 people authored Nov 16, 2024
1 parent 01b3783 commit 31ce60c
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ default-members = ["phylo2vec"]

[workspace.package]
edition = "2021"
version = "0.1.0"
version = "0.2.0"
authors = ["Neil Scheidwasser <[email protected]>"]
description = "Phylo2Vec: integer vector representation of binary (phylogenetic) trees"
license = "LGPL-3.0"
Expand Down
21 changes: 20 additions & 1 deletion py-phylo2vec/phylo2vec/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
apply_label_mapping,
create_label_mapping,
check_v,
find_num_leaves,
get_common_ancestor,
remove_leaf,
sample,
find_num_leaves,
)


Expand Down Expand Up @@ -103,5 +104,23 @@ def test_remove_and_add(n_leaves):
assert np.array_equal(v, v_add)


@pytest.mark.parametrize("n_leaves", range(MIN_N_LEAVES, MAX_N_LEAVES + 1))
def test_get_common_ancestor(n_leaves):
for _ in range(N_REPEATS):
v = sample(n_leaves)

node1, node2 = np.random.choice(np.arange(2 * (n_leaves - 1)), 2, replace=False)

p2v_common_ancestor = get_common_ancestor(v, node1, node2)

nw = to_newick(v)

tr = Tree(nw, format=8)

ete3_common_ancestor = int(tr.get_common_ancestor(f"{node1}", f"{node2}").name)

assert p2v_common_ancestor == ete3_common_ancestor


if __name__ == "__main__":
pytest.main()
9 changes: 8 additions & 1 deletion py-phylo2vec/phylo2vec/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
)
from .random import sample, seed_everything
from .validation import check_v
from .vector import add_leaf, remove_leaf, reorder_v, reroot_at_random
from .vector import (
add_leaf,
get_common_ancestor,
remove_leaf,
reorder_v,
reroot_at_random,
)


__all__ = [
Expand All @@ -20,6 +26,7 @@
"check_v",
"create_label_mapping",
"find_num_leaves",
"get_common_ancestor",
"remove_annotations",
"remove_leaf",
"remove_parent_labels",
Expand Down
76 changes: 70 additions & 6 deletions py-phylo2vec/phylo2vec/utils/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ def remove_leaf(v, leaf):
-------
v_sub : numpy.ndarray
Phylo2Vec vector without `leaf`
sister : int
Sister node of leaf
"""

# get the triplets from v
Expand Down Expand Up @@ -288,16 +290,19 @@ def remove_leaf(v, leaf):

# We now have a correct ancestry without "leaf"
# So we build a vector from it
cherries = _find_cherries(ancestry_sub)

# Cherries have to be ordered according to the scheme presented in Fig. 2
# NOTE: not 100% sure why I need both orderings?
cherries = _find_cherries(ancestry_sub)
cherries_no_parents = _order_cherries_no_parents(cherries)

# Build the new vector
v_sub = _build_vector(_order_cherries_no_parents(cherries))
v_sub = _build_vector(cherries_no_parents)

return v_sub, sister


@nb.njit
@nb.njit(cache=True)
def add_leaf(v, leaf, pos):
"""Add a leaf to a Phylo2Vec vector v
Expand All @@ -307,7 +312,7 @@ def add_leaf(v, leaf, pos):
Phylo2Vec vector
leaf : int >= 0
A leaf node to add
leaf : int >= 0
pos : int >= 0
A branch from where the leaf will be added
Returns
Expand Down Expand Up @@ -340,9 +345,68 @@ def add_leaf(v, leaf, pos):
ancestry_add[r_leaf, c_leaf] = leaf

# Find the cherries
cherries = _order_cherries_no_parents(_find_cherries(ancestry_add))
# NOTE: not 100% sure why I need both orderings?
cherries = _find_cherries(ancestry_add)
cherries_no_parents = _order_cherries_no_parents(cherries)

# Build the new vector
v_add = _build_vector(cherries)
v_add = _build_vector(cherries_no_parents)

return v_add


@nb.njit(cache=True)
def get_ancestry_paths(v):
"""
Get the ancestry paths for each node in the Phylo2Vec vector.
Parameters
----------
v : numpy.ndarray
Phylo2Vec vector
Returns
-------
ancestry_paths : list of list of int
Ancestry paths for each node
"""
ancestry = _get_ancestry(v)
parent_vec = np.zeros(2 * len(v), dtype=np.uint64)

for i in range(len(ancestry)):
parent_vec[ancestry[i, :2]] = ancestry[i, 2]

ancestry_paths = []
for i in range(2 * len(v)):
path = [i]
while (2 * len(v)) not in path:
path.insert(0, parent_vec[int(path[0])])
ancestry_paths.append(path)

return ancestry_paths


@nb.njit(cache=True)
def get_common_ancestor(v, node1, node2):
"""Get the first recent common ancestor between two nodes in a Phylo2Vec tree
Parameters
----------
v : numpy.ndarray
Phylo2Vec vector
node1 : int
A node in the tree
node2 : int
A node in the tree
Returns
-------
mrca : int
Most recent common ancestor node between node1 and node2
"""
paths = get_ancestry_paths(v)
path1 = paths[node1]
path2 = paths[node2]
common_path = np.intersect1d(path1, path2)
mrca = common_path[0]
return mrca
2 changes: 1 addition & 1 deletion py-phylo2vec/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ classifiers = [
]
dependencies = [
"numba>=0.56.4",
"numpy==1.24",
"numpy>=1.22,<2.1",
"biopython==1.80.0",
"joblib>=1.2.0",
"ete3==3.1.3",
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name="phylo2vec",
version="0.1.10",
version="0.1.12",
description="Phylo2Vec: integer vector representation of binary (phylogenetic) trees",
long_description=LONG_DESCRIPTION,
long_description_content_type="text/markdown", # This is important!
Expand All @@ -22,7 +22,7 @@
python_requires=">=3.9",
install_requires=[
"numba>=0.56.4",
"numpy==1.23.5",
"numpy>=1.22,<2.1",
"biopython==1.80.0",
"joblib>=1.2.0",
"ete3==3.1.3",
Expand Down

0 comments on commit 31ce60c

Please sign in to comment.