From 7494f324c8b1fa698c4f773cd95d43cb70ac104f Mon Sep 17 00:00:00 2001 From: dustine32 Date: Mon, 22 Mar 2021 15:23:16 -0700 Subject: [PATCH] Gocamgen reports: Logical def errors and GO rules --- bin/validate.py | 11 ++++++++--- ontobio/rdfgen/gocamgen/errors.py | 14 +++++++++++++- ontobio/rdfgen/gocamgen/gocam_builder.py | 6 ++++-- ontobio/rdfgen/gocamgen/gocamgen.py | 5 ++++- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/bin/validate.py b/bin/validate.py index 1c51541d..fabbdeee 100644 --- a/bin/validate.py +++ b/bin/validate.py @@ -566,9 +566,11 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl): gpad_basename = os.path.basename(gpad_path) gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename) output_basename = "{}.nq".format(gpad_basename_root) - report_basename = "{}.gocamgen.report".format(gpad_basename_root) + parse_report_basename = "{}.parser.report".format(gpad_basename_root) + model_report_basename = "{}.gocamgen.report".format(gpad_basename_root) output_path = os.path.join(absolute_target, output_basename) - report_path = os.path.join(absolute_target, report_basename) + parse_report_path = os.path.join(absolute_target, parse_report_basename) + model_report_path = os.path.join(absolute_target, model_report_basename) builder = GoCamBuilder(parser_config=parser_config) @@ -580,7 +582,10 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl): if not ttl: builder.write_out_store_to_nquads(filepath=output_path) - builder.write_report(report_filepath=report_path) + # Reports + builder.write_report(report_filepath=model_report_path) + with open(parse_report_path, 'w') as prp: + prp.write(extractor.gpad_parser.report.to_markdown()) @cli.command() diff --git a/ontobio/rdfgen/gocamgen/errors.py b/ontobio/rdfgen/gocamgen/errors.py index 2aa2cda3..70d281d2 100644 --- a/ontobio/rdfgen/gocamgen/errors.py +++ b/ontobio/rdfgen/gocamgen/errors.py @@ -1,5 +1,8 @@ +from ontobio.rdfgen.gocamgen import collapsed_assoc + + class GocamgenException(Exception): - def __init__(self, message): + def __init__(self, message: str): self.message = message def __str__(self): @@ -14,6 +17,15 @@ class ShexException(GocamgenException): pass +class CollapsedAssocGocamgenException(GocamgenException): + def __init__(self, message: str, assoc: collapsed_assoc.CollapsedAssociation): + self.message = message + self.assoc = assoc + + def __str__(self): + return "{}\n{}".format(self.message, "\n".join([l.source_line for l in self.assoc.lines])) + + class GeneErrorSet: def __init__(self): self.errors = {} diff --git a/ontobio/rdfgen/gocamgen/gocam_builder.py b/ontobio/rdfgen/gocamgen/gocam_builder.py index 629450b4..ebd1d878 100644 --- a/ontobio/rdfgen/gocamgen/gocam_builder.py +++ b/ontobio/rdfgen/gocamgen/gocam_builder.py @@ -76,6 +76,8 @@ def make_model(self, gene, annotations: List[GoAssociation], output_directory=No model.write(out_filename) logger.info("Model for {} written to {} in {} sec".format(gene, out_filename, (time.time() - start_time))) + for err in model.errors: + self.errors.add_error(gene, err) return model except GocamgenException as ex: self.errors.add_error(gene, ex) @@ -156,13 +158,13 @@ def parse_gpi(gpi_file): class AssocExtractor: def __init__(self, gpad_file, parser_config: AssocParserConfig): self.assocs = [] - gpad_parser = gpadparser.GpadParser(config=parser_config) + self.gpad_parser = gpadparser.GpadParser(config=parser_config) with open(gpad_file) as sg: lines = sum(1 for line in sg) with open(gpad_file) as gf: click.echo("Making products...") - with click.progressbar(iterable=gpad_parser.association_generator(file=gf, skipheader=True), + with click.progressbar(iterable=self.gpad_parser.association_generator(file=gf, skipheader=True), length=lines) as associations: self.assocs = list(associations) diff --git a/ontobio/rdfgen/gocamgen/gocamgen.py b/ontobio/rdfgen/gocamgen/gocamgen.py index ffb07026..222830bc 100644 --- a/ontobio/rdfgen/gocamgen/gocamgen.py +++ b/ontobio/rdfgen/gocamgen/gocamgen.py @@ -463,6 +463,7 @@ def __init__(self, modeltitle, assocs: List[GoAssociation], config: AssocParserC self.provided_bys = set() self.graph.bind("GOREL", GOREL) # Because GOREL isn't in context.jsonld's self.gpi_entities = gpi_entities + self.errors: List[errors.GocamgenException] = [] ncbi_taxon = self.taxon_id_from_entity(str(assocs[0].subject.id)) # Emit model-level in_taxon triple from ncbi_taxon if ncbi_taxon: @@ -615,7 +616,9 @@ def translate(self): ext_target_n = annot_subgraph.add_instance_of_class(ext_target) annot_subgraph.add_edge(regulated_term_n, chained_rel, ext_target_n) else: - logger.warning("Couldn't get regulates relation from LD of: {}".format(term)) + err_msg = "Couldn't get regulates relation from LD of: {}".format(term) + logger.warning(err_msg) + self.errors.append(errors.CollapsedAssocGocamgenException(err_msg, a)) elif ext_relation in HAS_REGULATION_TARGET_RELATIONS: if aspect == 'P': # For BP annotations, translate 'has regulation target' to 'has input'.