From 60285de7c7f19ff65c95655f318d7e2a024e3073 Mon Sep 17 00:00:00 2001 From: Valerio Arnaboldi Date: Wed, 3 Jan 2024 08:59:48 -0800 Subject: [PATCH] Avoid filtering graph again when getting root node ids Ontobio function generates a filtered grap before getting root node ids. Since the GD code used ontobio get_roots() function in multiple places, this was taking up extra time. Now the code directly extracts root nodes one time only without generating a subgraph to save time. --- genedescriptions/data_manager.py | 7 +++++-- genedescriptions/ontology_tools.py | 19 +++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/genedescriptions/data_manager.py b/genedescriptions/data_manager.py index f7018c2..3785aad 100644 --- a/genedescriptions/data_manager.py +++ b/genedescriptions/data_manager.py @@ -203,10 +203,13 @@ def set_ontology(self, ontology_type: DataType, ontology: Ontology, config: Gene terms_replacement_regex = config.get_module_property(module=module, prop=ConfigModuleProperty.RENAME_TERMS) if terms_replacement_regex: self.rename_ontology_terms(ontology=ontology, terms_replacement_regex=terms_replacement_regex) - set_all_depths(ontology=ontology, relations=self.get_relations(ontology_type)) + root_nodes = [n for n in ontology.nodes() if len( + list(g.predecessors(n))) == 0 and len(list(ontology.successors(n))) > 0] + set_all_depths(ontology=ontology, root_node_ids=root_nodes, relations=self.get_relations(ontology_type)) if config.get_module_property(module=module, prop=ConfigModuleProperty.TRIMMING_ALGORITHM) == "ic": - set_ic_ontology_struct(ontology=ontology, relations=self.get_relations(ontology_type)) + set_ic_ontology_struct(ontology=ontology, relations=self.get_relations(ontology_type), + root_node_ids=root_nodes) if slim_cache_path: slim_url = config.get_module_property(module=module, prop=ConfigModuleProperty.SLIM_URL) self.load_slim(module=module, slim_url=slim_url, slim_cache_path=slim_cache_path) diff --git a/genedescriptions/ontology_tools.py b/genedescriptions/ontology_tools.py index 16d5c06..16ad04b 100644 --- a/genedescriptions/ontology_tools.py +++ b/genedescriptions/ontology_tools.py @@ -72,10 +72,10 @@ def get_all_common_ancestors(node_ids: List[str], ontology: Ontology, min_distan ancestors.items() if len(covered_nodes) > 1 or ancestor == covered_nodes[0]] -def set_all_depths(ontology: Ontology, relations: List[str] = None, comparison_func=max): +def set_all_depths(ontology: Ontology, root_node_ids: List[str], relations: List[str] = None, comparison_func=max): logger.info("Setting depth for all nodes") start_time = time.time() - for root_id in ontology.get_roots(): + for root_id in root_node_ids: if "type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS": set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations, comparison_func=comparison_func) @@ -111,31 +111,30 @@ def set_all_depths_in_subgraph(ontology: Ontology, root_id: str, relations: List stack.extend([(child_id, current_depth + 1) for child_id in children]) -def set_ic_ontology_struct(ontology: Ontology, relations: List[str] = None): +def set_ic_ontology_struct(ontology: Ontology, root_node_ids: List[str], relations: List[str] = None): logger.info("Setting information content values based on ontology structure") start_time = time.time() - roots = ontology.get_roots(relations=relations) - for root_id in roots: + for root_id in root_node_ids: if "num_subsumers" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS"): set_num_subsumers(ontology=ontology, root_id=root_id, relations=relations) - for root_id in roots: + for root_id in root_node_ids: if "num_leaves" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS"): set_leaf_sets(ontology=ontology, root_id=root_id, relations=relations) set_num_leaves(ontology=ontology, root_id=root_id, relations=relations) - for root_id in roots: + for root_id in root_node_ids: if "depth" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS"): set_all_depths_in_subgraph(ontology=ontology, root_id=root_id, relations=relations) - for root_id in roots: + for root_id in root_node_ids: if "type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS": set_information_content_in_subgraph(ontology=ontology, root_id=root_id, maxleaves=ontology.node(root_id)["num_leaves"], relations=relations) logger.info(f"setting information content values based on ic took {time.time() - start_time} seconds") -def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet): +def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet, root_node_ids: List[str]): logger.info("Setting information content values based on annotation frequency") for node_id in ontology.nodes(): node_prop = ontology.node(node_id) @@ -145,7 +144,7 @@ def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet): del node_prop["tot_annot_genes"] if "IC" in node_prop: del node_prop["IC"] - for root_id in ontology.get_roots(): + for root_id in root_node_ids: if "depth" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS"): set_all_depths_in_subgraph(ontology=ontology, root_id=root_id)