From 39b4725734ecf0f6ca77aeb22ca1839701dc7bc7 Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Tue, 13 Aug 2024 12:17:19 -0500 Subject: [PATCH 1/2] Update Taxon.flatten(hide_root=True) to only hide root if it was automatically inserted by make_tree() Add a Taxon._artificial internal attribute for tracking this --- HISTORY.md | 1 + pyinaturalist/models/taxon.py | 41 +++++++++---- test/test_models.py | 112 ++++++++++++++++++++++------------ 3 files changed, 103 insertions(+), 51 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6bd8fc7c..6768b1e5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,6 +10,7 @@ * Add `root_id` filter to `taxon.make_tree()` to explicitly set the root taxon instead of determining it automatically * Fix `taxon.make_tree()` rank filtering to allow skipping any number of rank levels * `taxon.make_tree()` now returns copies of original taxon objects instead of modifying them in-place +* `Taxon.flatten(hide_root=True)` now only hides the root taxon if it was automatically inserted by `make_tree()` * Add shortcut properties to `Taxon` for ancestors of common ranks: `Taxon.kingdom`, `phylum`, `class_` (note the `_`; 'class' is a reserved keyword), `order`, `family`, `genus` * Update `Observation.taxon.ancestors` based on identification data, if available diff --git a/pyinaturalist/models/taxon.py b/pyinaturalist/models/taxon.py index 194bb58a..a46818fd 100644 --- a/pyinaturalist/models/taxon.py +++ b/pyinaturalist/models/taxon.py @@ -140,9 +140,10 @@ class Taxon(BaseModel): Photo.from_json_list, type=List[Photo], doc='All taxon photos shown on taxon info page' ) + # Indicates this was inserted as an artificial tree root + _artificial: bool = field(default=False, repr=False) # Indicates this is a partial record (e.g. from nested Taxon.ancestors or children) _partial: bool = field(default=False, repr=False) - # Used for tree formatting _indent_level: int = field(default=None, repr=False) @@ -253,7 +254,7 @@ def _full_name(self, markup: bool = False) -> str: else self.name ) common_name = ( - f' ({title(self.preferred_common_name)})' if self.preferred_common_name else '' + f' ({_title(self.preferred_common_name)})' if self.preferred_common_name else '' ) return f'{rank}{name}{common_name}' @@ -302,7 +303,8 @@ def flatten(self, hide_root: bool = False) -> List['Taxon']: ``Taxon.indent_level`` is set to indicate the tree depth of each taxon. Args: - hide_root: If True, exclude the current taxon from the list and from indentation level. + hide_root: Exclude the current taxon from the list if it was automatically inserted by + :py:func:`make_tree` """ def flatten_tree(taxon: Taxon, level: int = 0) -> List[Taxon]: @@ -315,7 +317,7 @@ def flatten_tree(taxon: Taxon, level: int = 0) -> List[Taxon]: ) ) - return flatten_tree(self, level=-1 if hide_root else 0) + return flatten_tree(self, level=-1 if hide_root and self._artificial else 0) @property def _row(self) -> TableRow: @@ -427,12 +429,14 @@ def get_count(self, taxon_id: int, count_field='descendant_obs_count') -> int: return super().get_count(taxon_id, count_field=count_field) -def title(value: str) -> str: - """Title case a string, with handling for apostrophes - - Borrowed/modified from ``django.template.defaultfilters.title()`` - """ - return re.sub("([a-z])['’]([A-Z])", lambda m: m[0].lower(), value.title()) +DEFAULT_ROOT = TaxonCount( + id=ROOT_TAXON_ID, + name='Life', + rank='stateofmatter', + rank_level=100, + is_active=True, + artificial=True, +) # type: ignore def make_tree( @@ -529,11 +533,14 @@ def _find_and_graft_root(taxa: Iterable[Taxon], include_ranks: Optional[List[str ] root_taxa = list({t.id: t for t in root_taxa + ungrafted}.values()) + # Single branch: return the root taxon if len(root_taxa) == 1: - return root_taxa[0] + root = root_taxa[0] + root._artificial = True + return root - # If there are multiple branches, we need to insert a 'Life' root above them - root = TaxonCount(id=ROOT_TAXON_ID, name='Life', rank='stateofmatter', is_active=True) # type: ignore + # Multiple branches: we need to insert a 'Life' root above them + root = deepcopy(DEFAULT_ROOT) root.children = root_taxa for t in root.children: t.ancestors = [root] @@ -550,3 +557,11 @@ def _sort_groupby(values, key): def _sort_rank_name(taxon): """Get a sort key by rank (descending) and name""" return (taxon.rank_level or 0) * -1, taxon.name + + +def _title(value: str) -> str: + """Title case a string, with handling for apostrophes + + Borrowed/modified from ``django.template.defaultfilters.title()`` + """ + return re.sub("([a-z])['’]([A-Z])", lambda m: m[0].lower(), value.title()) diff --git a/test/test_models.py b/test/test_models.py index 73c3e438..3261e7a4 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1217,44 +1217,6 @@ def test_make_tree__preserves_originals(): assert animalia.children[0].name == 'Arthropoda' -def test_make_tree__flattened(): - flat_list = make_tree(Taxon.from_json_list(j_life_list_1)).flatten() - assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981] - assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7] - - assert flat_list[0].ancestors == [] - assert [t.id for t in flat_list[5].ancestors] == [48460, 1, 2, 3, 573] - assert [t.id for t in flat_list[9].ancestors] == [48460, 1, 2, 3, 573, 574, 980] - - -def test_make_tree__flattened_without_root(): - taxa = Taxon.from_json_list(j_life_list_1) - flat_list = make_tree(taxa).flatten(hide_root=True) - assert [t.id for t in flat_list] == [1, 2, 3, 573, 574, 889, 890, 980, 981] - assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 5, 6] - - -def test_make_tree__flattened_filtered(): - flat_list = make_tree( - Taxon.from_json_list(j_life_list_2), - include_ranks=['kingdom', 'family', 'genus', 'subgenus'], - ).flatten() - assert [t.id for t in flat_list] == [ - 1, - 47221, - 52775, - 538903, - 538893, - 538900, - 415027, - 538902, - ] - assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 3, 3, 3, 3] - - assert flat_list[0].ancestors == [] - assert [t.id for t in flat_list[1].ancestors] == [1] - - def test_make_tree__find_root(): """With 'Life' root node removed, the next highest rank should be used as root""" taxa = Taxon.from_json_list(j_life_list_2)[1:] @@ -1314,6 +1276,80 @@ def test_make_tree__explicit_root_filtered_out(): assert len(root.children) == 1 +def test_flatten(): + flat_list = make_tree(Taxon.from_json_list(j_life_list_1)).flatten() + assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981] + assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7] + + assert flat_list[0].ancestors == [] + assert [t.id for t in flat_list[5].ancestors] == [48460, 1, 2, 3, 573] + assert [t.id for t in flat_list[9].ancestors] == [48460, 1, 2, 3, 573, 574, 980] + + +def test_flatten__filtered(): + flat_list = make_tree( + Taxon.from_json_list(j_life_list_2), + include_ranks=['kingdom', 'family', 'genus', 'subgenus'], + ).flatten() + assert [t.id for t in flat_list] == [ + 1, + 47221, + 52775, + 538903, + 538893, + 538900, + 415027, + 538902, + ] + assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 3, 3, 3, 3] + + assert flat_list[0].ancestors == [] + assert [t.id for t in flat_list[1].ancestors] == [1] + + +def test_flatten__hide_root__noop(): + """hide_root with no automatically inserted root should have no effect""" + taxa = Taxon.from_json_list(j_life_list_1) + flat_list = make_tree(taxa).flatten(hide_root=True) + assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981] + assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7] + + +def test_flatten__hide_root__artificial(): + """hide_root with an automatically inserted root should remove root taxon""" + taxa = Taxon.from_json_list(j_life_list_1) + tree = make_tree(taxa) + tree._artificial = True + flat_list = tree.flatten(hide_root=True) + assert [t.id for t in flat_list] == [1, 2, 3, 573, 574, 889, 890, 980, 981] + assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 5, 6] + + +def test_flatten__hide_root__multiple_roots(): + """hide_root with an automatically inserted root (due to multiple roots) should remove root taxon""" + fungi = Taxon(id=47170, name='Fungi', rank='kingdom', parent_id=ROOT_TAXON_ID) + taxa = Taxon.from_json_list(j_life_list_2) + [fungi] + flat_list = make_tree(taxa, include_ranks=COMMON_RANKS).flatten(hide_root=True) + assert [t.id for t in flat_list] == [ + 1, + 47120, + 47158, + 47201, + 47221, + 52775, + 52779, + 143854, + 52774, + 541839, + 118970, + 155085, + 127905, + 121517, + 128670, + 47170, + ] + + # Users # -------------------- From 171d1800f405b49fbc97af16d3c7f9222efff72b Mon Sep 17 00:00:00 2001 From: Jordan Cook Date: Tue, 13 Aug 2024 13:10:09 -0500 Subject: [PATCH 2/2] Handle case when all available ranks are filtered out in make_tree() --- HISTORY.md | 5 +++-- pyinaturalist/models/taxon.py | 7 +++++-- test/test_models.py | 9 +++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6768b1e5..fd6ba70e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -9,8 +9,9 @@ * Add `User.annotated_observations_count` field * Add `root_id` filter to `taxon.make_tree()` to explicitly set the root taxon instead of determining it automatically * Fix `taxon.make_tree()` rank filtering to allow skipping any number of rank levels -* `taxon.make_tree()` now returns copies of original taxon objects instead of modifying them in-place -* `Taxon.flatten(hide_root=True)` now only hides the root taxon if it was automatically inserted by `make_tree()` +* Fix `taxon.make_tree()` rank filtering to handle all available ranks filtered out +* Update `taxon.make_tree()` to return copies of original taxon objects instead of modifying them in-place +* Update `Taxon.flatten(hide_root=True)` to only hide the root taxon if it was automatically inserted by `make_tree()` * Add shortcut properties to `Taxon` for ancestors of common ranks: `Taxon.kingdom`, `phylum`, `class_` (note the `_`; 'class' is a reserved keyword), `order`, `family`, `genus` * Update `Observation.taxon.ancestors` based on identification data, if available diff --git a/pyinaturalist/models/taxon.py b/pyinaturalist/models/taxon.py index a46818fd..e569cc33 100644 --- a/pyinaturalist/models/taxon.py +++ b/pyinaturalist/models/taxon.py @@ -522,8 +522,11 @@ def _find_root( def _find_and_graft_root(taxa: Iterable[Taxon], include_ranks: Optional[List[str]] = None) -> Taxon: taxa_by_id = {t.id: t for t in taxa} - max_rank = max(t.rank_level for t in taxa if not include_ranks or t.rank in include_ranks) - root_taxa = [t for t in taxa if t.rank_level == max_rank] + rank_levels = [t.rank_level for t in taxa if not include_ranks or t.rank in include_ranks] + if not rank_levels: + logger.warning('All taxon ranks excluded; returning default root') + return deepcopy(DEFAULT_ROOT) + root_taxa = [t for t in taxa if t.rank_level == max(rank_levels)] # Add any ungrafted taxa and deduplicate ungrafted = [ diff --git a/test/test_models.py b/test/test_models.py index 3261e7a4..b0d035cd 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1205,6 +1205,15 @@ def test_make_tree__filtered(): ] +def test_make_tree__all_filtered(): + """When all available ranks are filtered out, a single root node should be created""" + root = make_tree( + Taxon.from_json_list(j_life_list_2), + include_ranks=['infraclass'], + ) + assert root.name == 'Life' and not root.children + + def test_make_tree__preserves_originals(): """Children/ancestors of original taxon objects should be preserved""" taxa = Taxon.from_json_list(j_life_list_2)