Skip to content

Commit

Permalink
Merge pull request #587 from pyinat/taxonomy
Browse files Browse the repository at this point in the history
Taxon tree updates
  • Loading branch information
JWCook authored Aug 15, 2024
2 parents e7407d3 + 171d180 commit 187ce38
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 54 deletions.
4 changes: 3 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
* Add `User.annotated_observations_count` field
* Add `root_id` filter to `taxon.make_tree()` to explicitly set the root taxon instead of determining it automatically
* Fix `taxon.make_tree()` rank filtering to allow skipping any number of rank levels
* `taxon.make_tree()` now returns copies of original taxon objects instead of modifying them in-place
* Fix `taxon.make_tree()` rank filtering to handle all available ranks filtered out
* Update `taxon.make_tree()` to return copies of original taxon objects instead of modifying them in-place
* Update `Taxon.flatten(hide_root=True)` to only hide the root taxon if it was automatically inserted by `make_tree()`
* Add shortcut properties to `Taxon` for ancestors of common ranks:
`Taxon.kingdom`, `phylum`, `class_` (note the `_`; 'class' is a reserved keyword), `order`, `family`, `genus`
* Update `Observation.taxon.ancestors` based on identification data, if available
Expand Down
48 changes: 33 additions & 15 deletions pyinaturalist/models/taxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ class Taxon(BaseModel):
Photo.from_json_list, type=List[Photo], doc='All taxon photos shown on taxon info page'
)

# Indicates this was inserted as an artificial tree root
_artificial: bool = field(default=False, repr=False)
# Indicates this is a partial record (e.g. from nested Taxon.ancestors or children)
_partial: bool = field(default=False, repr=False)

# Used for tree formatting
_indent_level: int = field(default=None, repr=False)

Expand Down Expand Up @@ -253,7 +254,7 @@ def _full_name(self, markup: bool = False) -> str:
else self.name
)
common_name = (
f' ({title(self.preferred_common_name)})' if self.preferred_common_name else ''
f' ({_title(self.preferred_common_name)})' if self.preferred_common_name else ''
)
return f'{rank}{name}{common_name}'

Expand Down Expand Up @@ -302,7 +303,8 @@ def flatten(self, hide_root: bool = False) -> List['Taxon']:
``Taxon.indent_level`` is set to indicate the tree depth of each taxon.
Args:
hide_root: If True, exclude the current taxon from the list and from indentation level.
hide_root: Exclude the current taxon from the list if it was automatically inserted by
:py:func:`make_tree`
"""

def flatten_tree(taxon: Taxon, level: int = 0) -> List[Taxon]:
Expand All @@ -315,7 +317,7 @@ def flatten_tree(taxon: Taxon, level: int = 0) -> List[Taxon]:
)
)

return flatten_tree(self, level=-1 if hide_root else 0)
return flatten_tree(self, level=-1 if hide_root and self._artificial else 0)

@property
def _row(self) -> TableRow:
Expand Down Expand Up @@ -427,12 +429,14 @@ def get_count(self, taxon_id: int, count_field='descendant_obs_count') -> int:
return super().get_count(taxon_id, count_field=count_field)


def title(value: str) -> str:
"""Title case a string, with handling for apostrophes
Borrowed/modified from ``django.template.defaultfilters.title()``
"""
return re.sub("([a-z])['’]([A-Z])", lambda m: m[0].lower(), value.title())
DEFAULT_ROOT = TaxonCount(
id=ROOT_TAXON_ID,
name='Life',
rank='stateofmatter',
rank_level=100,
is_active=True,
artificial=True,
) # type: ignore


def make_tree(
Expand Down Expand Up @@ -518,8 +522,11 @@ def _find_root(

def _find_and_graft_root(taxa: Iterable[Taxon], include_ranks: Optional[List[str]] = None) -> Taxon:
taxa_by_id = {t.id: t for t in taxa}
max_rank = max(t.rank_level for t in taxa if not include_ranks or t.rank in include_ranks)
root_taxa = [t for t in taxa if t.rank_level == max_rank]
rank_levels = [t.rank_level for t in taxa if not include_ranks or t.rank in include_ranks]
if not rank_levels:
logger.warning('All taxon ranks excluded; returning default root')
return deepcopy(DEFAULT_ROOT)
root_taxa = [t for t in taxa if t.rank_level == max(rank_levels)]

# Add any ungrafted taxa and deduplicate
ungrafted = [
Expand All @@ -529,11 +536,14 @@ def _find_and_graft_root(taxa: Iterable[Taxon], include_ranks: Optional[List[str
]
root_taxa = list({t.id: t for t in root_taxa + ungrafted}.values())

# Single branch: return the root taxon
if len(root_taxa) == 1:
return root_taxa[0]
root = root_taxa[0]
root._artificial = True
return root

# If there are multiple branches, we need to insert a 'Life' root above them
root = TaxonCount(id=ROOT_TAXON_ID, name='Life', rank='stateofmatter', is_active=True) # type: ignore
# Multiple branches: we need to insert a 'Life' root above them
root = deepcopy(DEFAULT_ROOT)
root.children = root_taxa
for t in root.children:
t.ancestors = [root]
Expand All @@ -550,3 +560,11 @@ def _sort_groupby(values, key):
def _sort_rank_name(taxon):
"""Get a sort key by rank (descending) and name"""
return (taxon.rank_level or 0) * -1, taxon.name


def _title(value: str) -> str:
"""Title case a string, with handling for apostrophes
Borrowed/modified from ``django.template.defaultfilters.title()``
"""
return re.sub("([a-z])['’]([A-Z])", lambda m: m[0].lower(), value.title())
121 changes: 83 additions & 38 deletions test/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,15 @@ def test_make_tree__filtered():
]


def test_make_tree__all_filtered():
"""When all available ranks are filtered out, a single root node should be created"""
root = make_tree(
Taxon.from_json_list(j_life_list_2),
include_ranks=['infraclass'],
)
assert root.name == 'Life' and not root.children


def test_make_tree__preserves_originals():
"""Children/ancestors of original taxon objects should be preserved"""
taxa = Taxon.from_json_list(j_life_list_2)
Expand All @@ -1217,44 +1226,6 @@ def test_make_tree__preserves_originals():
assert animalia.children[0].name == 'Arthropoda'


def test_make_tree__flattened():
flat_list = make_tree(Taxon.from_json_list(j_life_list_1)).flatten()
assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7]

assert flat_list[0].ancestors == []
assert [t.id for t in flat_list[5].ancestors] == [48460, 1, 2, 3, 573]
assert [t.id for t in flat_list[9].ancestors] == [48460, 1, 2, 3, 573, 574, 980]


def test_make_tree__flattened_without_root():
taxa = Taxon.from_json_list(j_life_list_1)
flat_list = make_tree(taxa).flatten(hide_root=True)
assert [t.id for t in flat_list] == [1, 2, 3, 573, 574, 889, 890, 980, 981]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 5, 6]


def test_make_tree__flattened_filtered():
flat_list = make_tree(
Taxon.from_json_list(j_life_list_2),
include_ranks=['kingdom', 'family', 'genus', 'subgenus'],
).flatten()
assert [t.id for t in flat_list] == [
1,
47221,
52775,
538903,
538893,
538900,
415027,
538902,
]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 3, 3, 3, 3]

assert flat_list[0].ancestors == []
assert [t.id for t in flat_list[1].ancestors] == [1]


def test_make_tree__find_root():
"""With 'Life' root node removed, the next highest rank should be used as root"""
taxa = Taxon.from_json_list(j_life_list_2)[1:]
Expand Down Expand Up @@ -1314,6 +1285,80 @@ def test_make_tree__explicit_root_filtered_out():
assert len(root.children) == 1


def test_flatten():
flat_list = make_tree(Taxon.from_json_list(j_life_list_1)).flatten()
assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7]

assert flat_list[0].ancestors == []
assert [t.id for t in flat_list[5].ancestors] == [48460, 1, 2, 3, 573]
assert [t.id for t in flat_list[9].ancestors] == [48460, 1, 2, 3, 573, 574, 980]


def test_flatten__filtered():
flat_list = make_tree(
Taxon.from_json_list(j_life_list_2),
include_ranks=['kingdom', 'family', 'genus', 'subgenus'],
).flatten()
assert [t.id for t in flat_list] == [
1,
47221,
52775,
538903,
538893,
538900,
415027,
538902,
]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 3, 3, 3, 3]

assert flat_list[0].ancestors == []
assert [t.id for t in flat_list[1].ancestors] == [1]


def test_flatten__hide_root__noop():
"""hide_root with no automatically inserted root should have no effect"""
taxa = Taxon.from_json_list(j_life_list_1)
flat_list = make_tree(taxa).flatten(hide_root=True)
assert [t.id for t in flat_list] == [48460, 1, 2, 3, 573, 574, 889, 890, 980, 981]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 7, 6, 7]


def test_flatten__hide_root__artificial():
"""hide_root with an automatically inserted root should remove root taxon"""
taxa = Taxon.from_json_list(j_life_list_1)
tree = make_tree(taxa)
tree._artificial = True
flat_list = tree.flatten(hide_root=True)
assert [t.id for t in flat_list] == [1, 2, 3, 573, 574, 889, 890, 980, 981]
assert [t.indent_level for t in flat_list] == [0, 1, 2, 3, 4, 5, 6, 5, 6]


def test_flatten__hide_root__multiple_roots():
"""hide_root with an automatically inserted root (due to multiple roots) should remove root taxon"""
fungi = Taxon(id=47170, name='Fungi', rank='kingdom', parent_id=ROOT_TAXON_ID)
taxa = Taxon.from_json_list(j_life_list_2) + [fungi]
flat_list = make_tree(taxa, include_ranks=COMMON_RANKS).flatten(hide_root=True)
assert [t.id for t in flat_list] == [
1,
47120,
47158,
47201,
47221,
52775,
52779,
143854,
52774,
541839,
118970,
155085,
127905,
121517,
128670,
47170,
]


# Users
# --------------------

Expand Down

0 comments on commit 187ce38

Please sign in to comment.