From c6c4eff981022cb22a9cf5ba3b36df27e4ec7027 Mon Sep 17 00:00:00 2001 From: Valerio Arnaboldi Date: Fri, 15 Nov 2019 17:04:40 -0800 Subject: [PATCH 1/2] fix: do not count parents as covered if children present in final set --- genedescriptions/stats.py | 3 +-- tests/test_trimming.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/genedescriptions/stats.py b/genedescriptions/stats.py index bd989e8..b5d3191 100644 --- a/genedescriptions/stats.py +++ b/genedescriptions/stats.py @@ -38,11 +38,10 @@ def __init__(self): @staticmethod def _get_num_covered_nodes(set_initial_terms, set_final_terms, ontology): num_covered_nodes = 0 - final_t_ancestors = {final_term: ontology.ancestors(final_term) for final_term in set_final_terms} for initial_term in set_initial_terms: initial_t_ancestors = set(ontology.ancestors(initial_term, reflexive=True)) for final_term in set_final_terms: - if final_term in initial_t_ancestors or initial_term in final_t_ancestors[final_term]: + if final_term in initial_t_ancestors: num_covered_nodes += 1 break return num_covered_nodes diff --git a/tests/test_trimming.py b/tests/test_trimming.py index f122a3b..bd0cac3 100644 --- a/tests/test_trimming.py +++ b/tests/test_trimming.py @@ -24,8 +24,8 @@ def setUp(self): self.df = DataManager(do_relations=None, go_relations=["subClassOf", "BFO:0000050"]) logging.basicConfig(filename=None, level="ERROR", format='%(asctime)s - %(name)s - %(levelname)s: %(message)s') self.df.load_ontology_from_file(ontology_type=DataType.GO, ontology_url="file://" + os.path.join( - self.this_dir, "data", "go_gd_test.obo"), - ontology_cache_path=os.path.join(self.this_dir, "cache", "go_gd_test.obo"), + self.this_dir, "data", "go.obo"), + ontology_cache_path=os.path.join(self.this_dir, "cache", "go_test.obo"), config=self.conf_parser) logger.info("Loading go associations from file") self.df.load_associations_from_file(associations_type=DataType.GO, associations_url="file://" + os.path.join( @@ -124,18 +124,20 @@ def test_trimming_lca(self): self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca" - gene = Gene(id="SGD:S000007393", name="acr-5", dead=False, pseudo=False) - associations = self.get_associations(gene.id, ['GO:0003723', 'GO:0003887', 'GO:0003964', 'GO:0004540', - 'GO:0008233'], [""], "F", "ISS") + gene = Gene(id='HGNC:10301', name="RPL11", dead=False, pseudo=False) + associations = self.get_associations(gene.id, ['GO:1904667', 'GO:2000059', 'GO:0032092', 'GO:2000435', + 'GO:0010628', 'GO:0000027', 'GO:1901796', 'GO:0050821', + 'GO:0006364', 'GO:0002181', 'GO:0042273', 'GO:0006605'], [""], + "P", "EXP") self.df.go_associations = AssociationSetFactory().create_from_assocs( assocs=associations, ontology=self.df.go_ontology) - gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="YDR210W-B", + gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="RPL11", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene) gene_desc_lca.stats.calculate_stats(data_manager=self.df) self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic" - gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="YDR210W-B", + gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="RPL11", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene) gene_desc_ic.stats.calculate_stats(data_manager=self.df) From ab01677858a90759a1723a91d7615c4c6ff8f0d4 Mon Sep 17 00:00:00 2001 From: Valerio Arnaboldi Date: Fri, 15 Nov 2019 17:06:15 -0800 Subject: [PATCH 2/2] improvement: added log lines for ic_struct --- genedescriptions/ontology_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/genedescriptions/ontology_tools.py b/genedescriptions/ontology_tools.py index 1ac9ed5..0f8b996 100644 --- a/genedescriptions/ontology_tools.py +++ b/genedescriptions/ontology_tools.py @@ -104,6 +104,7 @@ def set_all_depths_in_subgraph(ontology: Ontology, root_id: str, relations: List def set_ic_ontology_struct(ontology: Ontology, relations: List[str] = None): + logger.info("Setting information content values based on ontology structure") roots = ontology.get_roots(relations=relations) for root_id in roots: if "num_subsumers" not in ontology.node(root_id) and ("type" not in ontology.node(root_id) or @@ -121,6 +122,7 @@ def set_ic_ontology_struct(ontology: Ontology, relations: List[str] = None): if "type" not in ontology.node(root_id) or ontology.node_type(root_id) == "CLASS": _set_information_content_in_subgraph(ontology=ontology, root_id=root_id, maxleaves=ontology.node(root_id)["num_leaves"], relations=relations) + logger.info("Finished setting information content values") def set_ic_annot_freq(ontology: Ontology, annotations: AssociationSet):