Skip to content

Commit

Permalink
Avoid filtering graph again when getting root node ids
Browse files Browse the repository at this point in the history
Ontobio function generates a filtered grap before getting root node ids. Since the GD code used ontobio get_roots() function in multiple places, this was taking up extra time. Now the code directly extracts root nodes one time only without generating a subgraph to save time.
  • Loading branch information
valearna committed Jan 3, 2024
1 parent f05998c commit 60285de
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
7 changes: 5 additions & 2 deletions genedescriptions/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,13 @@ def set_ontology(self, ontology_type: DataType, ontology: Ontology, config: Gene
terms_replacement_regex = config.get_module_property(module=module, prop=ConfigModuleProperty.RENAME_TERMS)
if terms_replacement_regex:
self.rename_ontology_terms(ontology=ontology, terms_replacement_regex=terms_replacement_regex)
set_all_depths(ontology=ontology, relations=self.get_relations(ontology_type))
root_nodes = [n for n in ontology.nodes() if len(
list(g.predecessors(n))) == 0 and len(list(ontology.successors(n))) > 0]
set_all_depths(ontology=ontology, root_node_ids=root_nodes, relations=self.get_relations(ontology_type))
if config.get_module_property(module=module,
prop=ConfigModuleProperty.TRIMMING_ALGORITHM) == "ic":
set_ic_ontology_struct(ontology=ontology, relations=self.get_relations(ontology_type))
set_ic_ontology_struct(ontology=ontology, relations=self.get_relations(ontology_type),
root_node_ids=root_nodes)
if slim_cache_path:
slim_url = config.get_module_property(module=module, prop=ConfigModuleProperty.SLIM_URL)
self.load_slim(module=module, slim_url=slim_url, slim_cache_path=slim_cache_path)
Expand Down
19 changes: 9 additions & 10 deletions genedescriptions/ontology_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distan
ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0]]


def set_all_depths(ontology: Ontology, relations: List[str] = None, comparison_func=max):
def set_all_depths(ontology: Ontology, root_node_ids: List[str], relations: List[str] = None, comparison_func=max):
logger.info("Setting depth for all nodes")
start_time = time.time()
for root_id in ontology.get_roots():
for root_id in root_node_ids:
if "type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS":
set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations,
comparison_func=comparison_func)
Expand Down Expand Up @@ -111,31 +111,30 @@ def set_all_depths_in_subgraph(ontology: Ontology, root_id: str, relations: List
stack.extend([(child_id, current_depth + 1) for child_id in children])


def set_ic_ontology_struct(ontology: Ontology, relations: List[str] = None):
def set_ic_ontology_struct(ontology: Ontology, root_node_ids: List[str], relations: List[str] = None):
logger.info("Setting information content values based on ontology structure")
start_time = time.time()
roots = ontology.get_roots(relations=relations)
for root_id in roots:
for root_id in root_node_ids:
if "num_subsumers" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or
ontology.node_type(root_id) == "CLASS"):
set_num_subsumers(ontology=ontology, root_id=root_id, relations=relations)
for root_id in roots:
for root_id in root_node_ids:
if "num_leaves" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or
ontology.node_type(root_id) == "CLASS"):
set_leaf_sets(ontology=ontology, root_id=root_id, relations=relations)
set_num_leaves(ontology=ontology, root_id=root_id, relations=relations)
for root_id in roots:
for root_id in root_node_ids:
if "depth" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or
ontology.node_type(root_id) == "CLASS"):
set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations)
for root_id in roots:
for root_id in root_node_ids:
if "type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS":
set_information_content_in_subgraph(ontology=ontology, root_id=root_id,
maxleaves=ontology.node(root_id)["num_leaves"], relations=relations)
logger.info(f"setting information content values based on ic took {time.time() - start_time} seconds")


def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet):
def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet, root_node_ids: List[str]):
logger.info("Setting information content values based on annotation frequency")
for node_id in ontology.nodes():
node_prop = ontology.node(node_id)
Expand All @@ -145,7 +144,7 @@ def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet):
del node_prop["tot_annot_genes"]
if "IC" in node_prop:
del node_prop["IC"]
for root_id in ontology.get_roots():
for root_id in root_node_ids:
if "depth" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or
ontology.node_type(root_id) == "CLASS"):
set_all_depths_in_subgraph(ontology=ontology, root_id=root_id)
Expand Down

0 comments on commit 60285de

Please sign in to comment.