Skip to content

Commit

Permalink
add (multiple) when children are removed because parents are present
Browse files Browse the repository at this point in the history
  • Loading branch information
valearna committed Feb 27, 2019
1 parent b814af5 commit 2f73fa1
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 8 deletions.
30 changes: 22 additions & 8 deletions genedescriptions/descriptions_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ def reduce_terms(self, terms, max_num_terms, aspect, config: GenedescConfigParse
del_overlap: bool = False, terms_already_covered: Set[str] = None, exclude_terms: List[str] = None,
remove_parents: bool = False, remove_children: bool = False,
high_priority_term_ids: List[str] = None):
add_mul_common_anc = config.get_module_property(module=self.module,
prop=ConfigModuleProperty.ADD_MULTIPLE_TO_COMMON_ANCEST)
if not terms_already_covered:
terms_already_covered = set()
if not exclude_terms:
Expand All @@ -178,8 +180,11 @@ def reduce_terms(self, terms, max_num_terms, aspect, config: GenedescConfigParse
else:
terms_already_covered.update(terms)
if remove_children:
terms = self.remove_children_if_parents_present(terms, self.ontology, terms_already_covered,
high_priority_term_ids)
terms = self.remove_children_if_parents_present(
terms=terms, ontology=self.ontology, terms_already_covered=terms_already_covered,
high_priority_terms=high_priority_term_ids,
ancestors_covering_multiple_children=ancestors_covering_multiple_children if add_mul_common_anc else
None)
return terms, trimmed, add_others, ancestors_covering_multiple_children

def get_trimmed_terms_by_common_ancestor(self, terms: Set[str], terms_already_covered, aspect: str,
Expand All @@ -202,8 +207,10 @@ def get_trimmed_terms_by_common_ancestor(self, terms: Set[str], terms_already_co
if terms_high_priority is None:
terms_high_priority = []
if len(terms_high_priority) > max_terms:
terms_high_priority = self.remove_children_if_parents_present(terms_high_priority, self.ontology,
terms_already_covered)
terms_high_priority = self.remove_children_if_parents_present(
terms=terms_high_priority, ontology=self.ontology, terms_already_covered=terms_already_covered,
ancestors_covering_multiple_children=ancestors_covering_multiple_children if add_mul_common_anc else
None)
if len(terms_high_priority) > max_terms:
logger.debug("Reached maximum number of terms. Applying trimming to high priority terms")
terms_high_priority, add_others_highp = get_best_nodes(
Expand Down Expand Up @@ -231,7 +238,8 @@ def get_trimmed_terms_by_common_ancestor(self, terms: Set[str], terms_already_co
# remove possible children of terms in the high priority list
terms_low_priority = list(set(terms_low_priority) | set(terms_high_priority))
terms_low_priority = OntologySentenceGenerator.remove_children_if_parents_present(
terms_low_priority, self.ontology)
terms=terms_low_priority, ontology=self.ontology,
ancestors_covering_multiple_children=ancestors_covering_multiple_children if add_mul_common_anc else None)
# remove possible parents of terms in the high priority list
terms_low_priority = list(set(terms_low_priority) | set(terms_high_priority))
terms_low_priority = OntologySentenceGenerator.remove_parents_if_child_present(
Expand All @@ -246,9 +254,15 @@ def get_trimmed_terms_by_common_ancestor(self, terms: Set[str], terms_already_co

@staticmethod
def remove_children_if_parents_present(terms, ontology, terms_already_covered: Set[str] = None,
high_priority_terms: List[str] = None):
terms_nochildren = [term for term in terms if len(set(ontology.ancestors(term)).intersection(set(terms))) == 0
or (high_priority_terms and term in high_priority_terms)]
high_priority_terms: List[str] = None,
ancestors_covering_multiple_children: Set[str] = None):
terms_nochildren = []
for term in terms:
if len(set(ontology.ancestors(term)).intersection(set(terms))) == 0 or (high_priority_terms and
term in high_priority_terms):
terms_nochildren.append(term)
elif ancestors_covering_multiple_children is not None:
ancestors_covering_multiple_children.update(set(ontology.ancestors(term)).intersection(set(terms)))
if len(terms_nochildren) < len(terms):
if terms_already_covered is not None:
terms_already_covered.update(set(terms) - set(terms_nochildren))
Expand Down
39 changes: 39 additions & 0 deletions tests/test_descriptions_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,45 @@ def test_disease_trimming(self):
keep_only_best_group=True)
self.assertTrue("chronic fatigue syndrome" in sentences.get_description())

associations = [DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:0050144", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:10754", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:0110609", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:0110599", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:9562", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:6419", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
DataManager.create_annotation_record(source_line="", gene_id="MGI:107718",
gene_symbol="", gene_type="gene", taxon_id="",
object_id="DOID:0050545", qualifiers="", aspect="D",
ecode="TAS", references="", prvdr="MGI", date=""),
]
self.df.do_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
ontology=self.df.do_ontology)
self.conf_parser.config["do_exp_sentences_options"]["trimming_algorithm"] = "naive"
generator = OntologySentenceGenerator(gene_id="MGI:107718", module=Module.DO_EXPERIMENTAL,
data_manager=self.df, config=self.conf_parser, humans=True)
sentences = generator.get_module_sentences(
config=self.conf_parser, aspect='D', qualifier='', merge_groups_with_same_prefix=True,
keep_only_best_group=True)
self.assertTrue("(multiple)" in sentences.get_description())

def test_remove_mixed_functions_processes(self):
generator = OntologySentenceGenerator(gene_id="WB:WBGene00000105", module=Module.GO,
data_manager=self.df, config=self.conf_parser)
Expand Down

0 comments on commit 2f73fa1

Please sign in to comment.