From 82b3e670963cd5fd952cab71e796a639e29aa74a Mon Sep 17 00:00:00 2001 From: Fabian Egli Date: Thu, 17 Oct 2024 06:07:35 +0200 Subject: [PATCH] Fix calculating height of clades when building UPGMA tree version 2 (#3951) * Fixed a bug in calculating the height of clades when building a UPGMA tree * added my name to contrib.rst alphabetically * minor corrections * formatting changes * Add tests --------- Co-authored-by: She Zhang Co-authored-by: Fabian Egli Co-authored-by: Eric Talevich --- Bio/Phylo/TreeConstruction.py | 17 +++++------------ CONTRIB.rst | 1 + Tests/test_TreeConstruction.py | 9 ++++++++- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Bio/Phylo/TreeConstruction.py b/Bio/Phylo/TreeConstruction.py index 317da040eb9..ba0c4996048 100644 --- a/Bio/Phylo/TreeConstruction.py +++ b/Bio/Phylo/TreeConstruction.py @@ -745,15 +745,8 @@ def upgma(self, distance_matrix): inner_clade.clades.append(clade1) inner_clade.clades.append(clade2) # assign branch length - if clade1.is_terminal(): - clade1.branch_length = min_dist / 2 - else: - clade1.branch_length = min_dist / 2 - self._height_of(clade1) - - if clade2.is_terminal(): - clade2.branch_length = min_dist / 2 - else: - clade2.branch_length = min_dist / 2 - self._height_of(clade2) + clade1.branch_length = min_dist * 1.0 / 2 - self._height_of(clade1) + clade2.branch_length = min_dist * 1.0 / 2 - self._height_of(clade2) # update node list clades[min_j] = inner_clade @@ -876,11 +869,11 @@ def nj(self, distance_matrix): def _height_of(self, clade): """Calculate clade height -- the longest path to any terminal (PRIVATE).""" - height = 0 if clade.is_terminal(): - height = clade.branch_length + height = 0 else: - height = height + max(self._height_of(c) for c in clade.clades) + height = max(self._height_of(c) + c.branch_length for c in clade.clades) + return height diff --git a/CONTRIB.rst b/CONTRIB.rst index e1314eb1d01..c40deaa317b 100644 --- a/CONTRIB.rst +++ b/CONTRIB.rst @@ -300,6 +300,7 @@ please open an issue on GitHub or mention it on the mailing list. - Sergei Lebedev - Sergio Valqui - Seth Sims +- She Zhang - Shoichiro Kawauchi - Shuichiro MAKIGAKI - Shyam Saladi diff --git a/Tests/test_TreeConstruction.py b/Tests/test_TreeConstruction.py index e65ebe4a428..a2bcf9203ac 100644 --- a/Tests/test_TreeConstruction.py +++ b/Tests/test_TreeConstruction.py @@ -9,6 +9,7 @@ import tempfile import unittest from io import StringIO +from itertools import combinations from Bio import Align from Bio import AlignIO @@ -232,7 +233,13 @@ def test_upgma(self): # Phylo.write(tree, tree_file, 'newick') ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") self.assertTrue(Consensus._equal_topology(tree, ref_tree)) - # ref_tree.close() + # check for equal distance of all terminal nodes from the root + ref_tree.root_at_midpoint() + for len1, len2 in combinations( + [depth for node, depth in ref_tree.depths().items() if node.is_terminal()], + 2, + ): + self.assertAlmostEqual(len1, len2) def test_nj_msa(self): tree = self.constructor.nj(self.dm_msa)