Skip to content

Commit

Permalink
Merge pull request #674 from biolink/go-site-676-gorule-0000022-check…
Browse files Browse the repository at this point in the history
…-for-retracted-publications

Go site 676 gorule 0000022 check for retracted publications
  • Loading branch information
mugitty authored May 16, 2024
2 parents 169b864 + cd830b9 commit 682e2f5
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 9 deletions.
13 changes: 11 additions & 2 deletions bin/ontobio-parse-assocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def main():
parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
help="GPI file")
parser.add_argument("-m", "--metadata_dir", type=dir_path, required=False,
help="Path to metadata directory")
help="Path to metadata directory")
parser.add_argument("--retracted_pub_set", type=argparse.FileType('r'), required=False,
help="Path to retracted publications file")
parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
help="Set of rules to be run. Default is no rules to be run, with the exception \
of gorule-0000027 and gorule-0000020. See command line documentation in the \
Expand Down Expand Up @@ -143,11 +145,17 @@ def main():
rule_set = assocparser.RuleSet.ALL

goref_metadata = None
ref_species_metadata = None
ref_species_metadata = None
if args.metadata_dir:
absolute_metadata = os.path.abspath(args.metadata_dir)
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")

retracted_pub_set = None
if args.retracted_pub_set:
retracted_pub_set = metadata.retracted_pub_set(args.retracted_pub_set.name)
elif args.metadata_dir:
retracted_pub_set = metadata.retracted_pub_set_from_meta(absolute_metadata)

# set configuration
filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
Expand All @@ -164,6 +172,7 @@ def main():
gpi_authority_path=args.gpi,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
rule_set=rule_set
)
p = None
Expand Down
29 changes: 24 additions & 5 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ontobio.validation import tools
from ontobio.validation import rules


from typing import Dict, Set

# logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.WARNING)
Expand Down Expand Up @@ -210,7 +211,7 @@ def create_parser(config, group, dataset, format="gaf"):
"""

@tools.gzips
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, retracted_pub_set=None,db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")
config = assocparser.AssocParserConfig(
ontology=ontology_graph,
Expand All @@ -221,6 +222,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
entity_idspaces=db_entities,
group_idspace=group_idspace,
suppress_rule_reporting_tags=suppress_rule_reporting_tags,
Expand Down Expand Up @@ -493,7 +495,8 @@ def cli(ctx, verbose):
@click.option("--only-dataset", default=None)
@click.option("--gaf-output-version", default="2.2", type=click.Choice(["2.1", "2.2"]))
@click.option("--rule-set", "-l", "rule_set", default=[assocparser.RuleSet.ALL], multiple=True)
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version, rule_set):
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version, rule_set, retracted_pub_set):

logger.info("Logging is verbose")
products = {
Expand Down Expand Up @@ -529,7 +532,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base

db_entities = metadata.database_entities(absolute_metadata)
group_ids = metadata.groups(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)

gaferences = None
if gaferencer_file:
Expand All @@ -539,6 +542,12 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
if rule_set == (assocparser.RuleSet.ALL,):
rule_set = assocparser.RuleSet.ALL

retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)

for dataset_metadata, source_gaf in downloaded_gaf_sources:
dataset = dataset_metadata["dataset"]
# Set paint to True when the group is "paint".
Expand All @@ -550,6 +559,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pubs,
db_entities=db_entities,
group_idspace=group_ids,
suppress_rule_reporting_tags=suppress_rule_reporting_tag,
Expand Down Expand Up @@ -634,13 +644,14 @@ def paint(group, dataset, metadata, target, ontology):
absolute_target = os.path.abspath(target)
os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")

paint_src_gaf = check_and_download_mixin_source(paint_metadata, dataset, absolute_target)

click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology)

gpi_path = os.path.join(absolute_target, "groups", dataset, "{}.gpi".format(dataset))
click.echo("Using GPI at {}".format(gpi_path))
click.echo("Using GPI at {}".format(gpi_path))
paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path)


Expand All @@ -650,7 +661,8 @@ def paint(group, dataset, metadata, target, ontology):
@click.option("--ontology", type=click.Path(), required=True)
@click.option("--gaferencer-file", "-I", type=click.Path(exists=True), default=None, required=False,
help="Path to Gaferencer output to be used for inferences")
def rule(metadata_dir, out, ontology, gaferencer_file):
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def rule(metadata_dir, out, ontology, gaferencer_file, retracted_pub_set):
absolute_metadata = os.path.abspath(metadata_dir)

click.echo("Loading ontology: {}...".format(ontology))
Expand All @@ -659,6 +671,12 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")
retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)


click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

Expand All @@ -673,6 +691,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
ontology=ontology_graph,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pubs,
entity_idspaces=db_entities,
group_idspace=group_ids,
annotation_inferences=gaferences,
Expand Down
2 changes: 2 additions & 0 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def __init__(self,
ref_species_metadata=None,
group_metadata=None,
dbxrefs=None,
retracted_pub_set=None,
suppress_rule_reporting_tags=[],
annotation_inferences=None,
extensions_constraints=None,
Expand All @@ -258,6 +259,7 @@ def __init__(self,
self.goref_metadata = goref_metadata
self.ref_species_metadata = ref_species_metadata
self.group_metadata = group_metadata
self.retracted_pub_set = retracted_pub_set
self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
self.annotation_inferences = annotation_inferences
self.entity_idspaces = entity_idspaces
Expand Down
15 changes: 15 additions & 0 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,20 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return self._result(bool(withfrom))
else:
return self._result(True)

class GoRule22(GoRule):

def __init__(self):
super().__init__("GORULE:0000022", "Check for, and filter, annotations made to retracted publications", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
if config.retracted_pub_set is not None:
references = annotation.evidence.has_supporting_reference
for ref in references:
ref = str(ref)
if ref in config.retracted_pub_set:
return self._result(False)
return self._result(True)


class GoRule26(GoRule):
Expand Down Expand Up @@ -952,6 +966,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule16": GoRule16(),
"GoRule17": GoRule17(),
"GoRule18": GoRule18(),
"GoRule22": GoRule22(),
"GoRule26": GoRule26(),
"GoRule28": GoRule28(),
"GoRule29": GoRule29(),
Expand Down
27 changes: 26 additions & 1 deletion ontobio/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,32 @@ def yaml_set(metadata, yaml_file_name, field) -> Set[str]:
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e)))

return set([yaml[field] for yaml in yaml_list])
return set([yaml[field] for yaml in yaml_list])

def retracted_pub_set_from_meta(metadata) -> Set:
retracted_path = os.path.join(metadata, "retracted-publications.txt")
if os.access(retracted_path, os.R_OK):
return retracted_pub_set_use_abspath(retracted_path)
else:
return set()

def retracted_pub_set(abspath_retracted_file) -> Set:
return retracted_pub_set_use_abspath(os.path.abspath(abspath_retracted_file))

def retracted_pub_set_use_abspath(abspath_retracted_file) -> Set:
try:
retracted_pubs = None
with open(abspath_retracted_file, "r") as f:
retracted_pubs = set()
for line in f:
li=line.strip()
if not li.startswith("!"):
if "," in li:
li = li.partition(',')[0]
retracted_pubs.add(li)
return retracted_pubs
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(abspath_retracted_file, str(e)))



16 changes: 15 additions & 1 deletion tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,20 @@ def test_go_rule_18():
test_result = qc.GoRule18().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule22():
config = assocparser.AssocParserConfig(
ontology=ontology,
retracted_pub_set={"RETRACTED:1234","PMID:37772366"},
rule_set=assocparser.RuleSet.ALL
)
assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:12345").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:37772366").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

def test_go_rule26():

config = assocparser.AssocParserConfig(
Expand Down Expand Up @@ -819,7 +833,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 26
assert len(test_results.keys()) == 27
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down

0 comments on commit 682e2f5

Please sign in to comment.