From c7d18385846406752c61c570a2c2a4728c046001 Mon Sep 17 00:00:00 2001 From: joeflack4 Date: Mon, 3 Apr 2023 18:09:19 -0400 Subject: [PATCH] FHIR: Output as NPM package - Rename: OboGraphToFHIRConverter --> OboGraphToFhirJsonConverter - Add: OboGraphToFhirNpmConverter: Saves in FHIR NPM package format. - Add: New CLI output_type option: fhirnpm - Add: StreamingFhirNpmWriter (WIP) - Add: Test file: tests/input/fhir_npm_manifest_so.json - Add: Test helper function: _load_and_convert_npm() - Add: Unit test: test_convert_so_package() - Update: .gitignore: tests/input/*_conf.json --- .gitignore | 10 ++- .../packages/converters/obo-graph-to-fhir.rst | 2 +- src/oaklib/cli.py | 10 ++- .../converters/obo_graph_to_fhir_converter.py | 83 ++++++++++++++++- src/oaklib/interfaces/dumper_interface.py | 8 +- src/oaklib/io/streaming_fhir_writer.py | 27 +++++- tests/input/fhir_npm_manifest_so.json | 24 +++++ .../test_converters/test_obo_graph_to_fhir.py | 89 ++++++++++++++++--- 8 files changed, 225 insertions(+), 28 deletions(-) create mode 100644 tests/input/fhir_npm_manifest_so.json diff --git a/.gitignore b/.gitignore index e54ff20dd..bb9a4e1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ .tox/ __pycache__/ .ipynb_checkpoints/ -tests/output/ dist/ db/ @@ -11,6 +10,10 @@ db/ .vscode .venv +.coverage.* +.coverage +coverage.* + docs/_build/ docs/src/ docs/datamodels/*/*.md @@ -22,6 +25,5 @@ notebooks/*/output/*json notebooks/*/output/*tsv notebooks/api-key.txt -.coverage.* -.coverage -coverage.* +tests/output/ +tests/input/*_conf.json diff --git a/docs/packages/converters/obo-graph-to-fhir.rst b/docs/packages/converters/obo-graph-to-fhir.rst index a44dcefca..c77b0e78a 100644 --- a/docs/packages/converters/obo-graph-to-fhir.rst +++ b/docs/packages/converters/obo-graph-to-fhir.rst @@ -5,5 +5,5 @@ OBO Graph to FHIR Converter .. currentmodule:: oaklib.converters.obo_graph_to_fhir_converter -.. autoclass:: OboGraphToFHIRConverter +.. autoclass:: OboGraphToFhirJsonConverter :members: diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py index c3518d956..21c3d31d2 100644 --- a/src/oaklib/cli.py +++ b/src/oaklib/cli.py @@ -121,7 +121,10 @@ from oaklib.io.rollup_report_writer import write_report from oaklib.io.streaming_axiom_writer import StreamingAxiomWriter from oaklib.io.streaming_csv_writer import StreamingCsvWriter -from oaklib.io.streaming_fhir_writer import StreamingFHIRWriter +from oaklib.io.streaming_fhir_writer import ( + StreamingFhirJsonWriter, + StreamingFhirNpmWriter, +) from oaklib.io.streaming_info_writer import StreamingInfoWriter from oaklib.io.streaming_json_writer import StreamingJsonWriter from oaklib.io.streaming_kgcl_writer import StreamingKGCLWriter @@ -197,6 +200,7 @@ NL_FORMAT = "nl" KGCL_FORMAT = "kgcl" FHIR_JSON_FORMAT = "fhirjson" +FHIR_NPM_FORMAT = "fhirnpm" HEATMAP_FORMAT = "heatmap" ONT_FORMATS = [ @@ -207,6 +211,7 @@ JSON_FORMAT, YAML_FORMAT, FHIR_JSON_FORMAT, + FHIR_NPM_FORMAT, CSV_FORMAT, NL_FORMAT, ] @@ -223,7 +228,8 @@ JSONL_FORMAT: StreamingJsonWriter, YAML_FORMAT: StreamingYamlWriter, SSSOM_FORMAT: StreamingSssomWriter, - FHIR_JSON_FORMAT: StreamingFHIRWriter, + FHIR_JSON_FORMAT: StreamingFhirJsonWriter, + FHIR_NPM_FORMAT: StreamingFhirNpmWriter, INFO_FORMAT: StreamingInfoWriter, NL_FORMAT: StreamingNaturalLanguageWriter, KGCL_FORMAT: StreamingKGCLWriter, diff --git a/src/oaklib/converters/obo_graph_to_fhir_converter.py b/src/oaklib/converters/obo_graph_to_fhir_converter.py index fe7ea4080..29302097e 100644 --- a/src/oaklib/converters/obo_graph_to_fhir_converter.py +++ b/src/oaklib/converters/obo_graph_to_fhir_converter.py @@ -4,7 +4,12 @@ - Updates issue: https://github.com/INCATools/ontology-access-kit/issues/369 - Conversion examples: https://drive.google.com/drive/folders/1lwGQ63_fedfWlGlRemq8OeZhZsvIXN01 """ +import json import logging +import os +import shutil +import tarfile +import tempfile from dataclasses import dataclass from typing import Any, Dict, List, Tuple, Union @@ -52,7 +57,7 @@ @dataclass -class OboGraphToFHIRConverter(DataModelConverter): +class OboGraphToFhirJsonConverter(DataModelConverter): """Converts from OboGraph to FHIR. - An ontology is mapped to a FHIR `CodeSystem `_. @@ -86,7 +91,9 @@ def dump( Dump an OBO Graph Document to a FHIR CodeSystem. :param source: Source serialization. - :param target: Target serialization. + :param target: Target outpath. + :param output_as_npm: Output as a FHIR NPM package. + See: https://confluence.hl7.org/display/FHIR/NPM+Package+Specification :param kwargs: Additional keyword arguments passed to :ref:`convert`. """ cs = self.convert( @@ -119,11 +126,11 @@ def convert( To use: - >>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter + >>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter >>> from oaklib.datamodels.obograph import GraphDocument >>> from linkml_runtime.dumpers import json_dumper >>> from linkml_runtime.loaders import json_loader - >>> converter = OboGraphToFHIRConverter() + >>> converter = OboGraphToFhirJsonConverter() >>> graph = json_loader.load("tests/input/hp_test.json", target_class=GraphDocument) >>> code_system = converter.convert(graph) >>> print(json_dumper.dumps(code_system)) @@ -209,6 +216,7 @@ def _convert_graph( predicate_period_replacement: bool = False, ) -> CodeSystem: target.id = source.id + target.version = source.meta.version edges_by_subject = index_graph_edges_by_subject(source) logging.info(f"Converting graph to obo: {source.id}, nodes={len(source.nodes)}") self.predicates_to_export = set() @@ -290,3 +298,70 @@ def _convert_meta(self, source: Node, concept: Concept): value=synonym.val, ) ) + + +@dataclass +class OboGraphToFhirNpmConverter(OboGraphToFhirJsonConverter): + """Converts an OBO Graph to a FHIR NPM package. + + Plays the same role as OboGraphToFhirJsonConverter, but also packages the outpus. + """ + + def dump( + self, + source: GraphDocument, + target: str, + manifest_path: str, + **kwargs, + ) -> str: + """ + Dump an OBO Graph Document to a FHIR CodeSystem. + + :param source: Source serialization. + :param target: Target directory to save the output. + :param manifest_path: Path to a manifest JSON. Required fields:'name', 'version', 'description', and 'author'. + See: https://confluence.hl7.org/display/FHIR/NPM+Package+Specification + :param kwargs: Additional keyword arguments passed to :ref:`convert`. + """ + cs = self.convert( + source, + **kwargs, + ) + cs_filename = "CodeSystem-" + kwargs["code_system_id"] + ".json" + outpath = os.path.join(target, cs_filename.replace(".json", ".tgz")) + + # Create directory structure + temp_dir = tempfile.mkdtemp() + package_dir = os.path.join(temp_dir, "package") + os.mkdir(package_dir) + + # Save FHIR resources + cs_str = json_dumper.dumps(cs, inject_type=False) + with open(os.path.join(package_dir, cs_filename), "w", encoding="UTF-8") as f: + f.write(cs_str) + + # Save manifest package.json + shutil.copyfile(manifest_path, os.path.join(package_dir, "package.json")) + + # Create and save .index.json + package_index = { + "index-version": 1, + "files": [ + { + "filename": cs_filename, + "resourceType": "CodeSystem", + "id": kwargs["code_system_id"], + "url": kwargs["code_system_url"], + "version": cs.version, + }, + ], + } + with open(os.path.join(package_dir, ".index.json"), "w", encoding="UTF-8") as f: + json.dump(package_index, f) + + # Save zipfile and remove temp dir + with tarfile.open(outpath, "w:gz") as tar: + tar.add(package_dir, arcname="package") + shutil.rmtree(temp_dir) + + return outpath diff --git a/src/oaklib/interfaces/dumper_interface.py b/src/oaklib/interfaces/dumper_interface.py index 75aada944..117b83355 100644 --- a/src/oaklib/interfaces/dumper_interface.py +++ b/src/oaklib/interfaces/dumper_interface.py @@ -5,7 +5,10 @@ from linkml_runtime.dumpers import json_dumper from oaklib.converters.obo_graph_to_cx_converter import OboGraphToCXConverter -from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter +from oaklib.converters.obo_graph_to_fhir_converter import ( + OboGraphToFhirJsonConverter, + OboGraphToFhirNpmConverter, +) from oaklib.converters.obo_graph_to_obo_format_converter import ( OboGraphToOboFormatConverter, ) @@ -18,7 +21,8 @@ OBOGRAPH_CONVERTERS = { "obo": OboGraphToOboFormatConverter, - "fhirjson": OboGraphToFHIRConverter, + "fhirjson": OboGraphToFhirJsonConverter, + "fhirnpm": OboGraphToFhirNpmConverter, "owl": OboGraphToRdfOwlConverter, "turtle": OboGraphToRdfOwlConverter, "rdf": OboGraphToRdfOwlConverter, diff --git a/src/oaklib/io/streaming_fhir_writer.py b/src/oaklib/io/streaming_fhir_writer.py index da79d8543..908f08fe8 100644 --- a/src/oaklib/io/streaming_fhir_writer.py +++ b/src/oaklib/io/streaming_fhir_writer.py @@ -4,7 +4,7 @@ from linkml_runtime.dumpers import json_dumper -from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter +from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter from oaklib.datamodels.obograph import GraphDocument from oaklib.interfaces.obograph_interface import OboGraphInterface from oaklib.io.streaming_writer import StreamingWriter @@ -12,7 +12,7 @@ @dataclass -class StreamingFHIRWriter(StreamingWriter): +class StreamingFhirJsonWriter(StreamingWriter): """ A writer that emits FHIR CodeSystem objects or Concept objects """ @@ -24,10 +24,31 @@ def emit_multiple(self, entities: Iterable[CURIE], **kwargs): g = oi.extract_graph(list(entities), include_metadata=True) gd = GraphDocument(graphs=[g]) logging.info(f"Converting {len(g.nodes)} nodes to OBO") - converter = OboGraphToFHIRConverter() + converter = OboGraphToFhirJsonConverter() converter.curie_converter = oi.converter code_system = converter.convert(gd) logging.info(f"Writing {len(code_system.concept)} Concepts") + # TODO: Should not this call OboGraphToFhirJsonConverter.dump()? self.file.write(json_dumper.dumps(code_system)) else: super().emit_multiple(entities, **kwargs) + + +# TODO: +@dataclass +class StreamingFhirNpmWriter(StreamingWriter): + """ + A writer that emits FHIR CodeSystem objects or Concept objects + """ + + def emit_multiple(self, entities: Iterable[CURIE], **kwargs): + oi = self.ontology_interface + if isinstance(oi, OboGraphInterface): + logging.info("Extracting graph") + g = oi.extract_graph(list(entities), include_metadata=True) + gd = GraphDocument(graphs=[g]) + logging.info(f"Converting {len(g.nodes)} nodes to OBO") + converter = None + print(gd, converter) + else: + super().emit_multiple(entities, **kwargs) diff --git a/tests/input/fhir_npm_manifest_so.json b/tests/input/fhir_npm_manifest_so.json new file mode 100644 index 000000000..51329257b --- /dev/null +++ b/tests/input/fhir_npm_manifest_so.json @@ -0,0 +1,24 @@ +{ + "name": "sequence-ontology", + "version": "0.1.0", + "canonical": "http://purl.obolibrary.org/obo/so.owl", + "title": "Sequence Ontology", + "description": "The Sequence Ontology is a set of terms and relationships used to describe the features and attributes of biological sequence.", + "homepage": "http://www.sequenceontology.org/", + "keywords": [ + "SO", + "Sequence Ontology" + ], + "author": "TIMS", + "maintainers": [ + { + "name": "Joe Flack", + "email": "jflack@jhu.edu" + }, + { + "name": "Shahim Essaid", + "email": "shahim@essaid.com" + } + ], + "license": "MIT" +} \ No newline at end of file diff --git a/tests/test_converters/test_obo_graph_to_fhir.py b/tests/test_converters/test_obo_graph_to_fhir.py index 0056a4ed6..254e15ee8 100644 --- a/tests/test_converters/test_obo_graph_to_fhir.py +++ b/tests/test_converters/test_obo_graph_to_fhir.py @@ -1,5 +1,8 @@ """Tests for: Obographs to FHIR converter""" +import json import os +import shutil +import tarfile import unittest from typing import List @@ -7,7 +10,10 @@ import requests from linkml_runtime.loaders import json_loader -from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter +from oaklib.converters.obo_graph_to_fhir_converter import ( + OboGraphToFhirJsonConverter, + OboGraphToFhirNpmConverter, +) from oaklib.datamodels.fhir import CodeSystem from oaklib.datamodels.obograph import GraphDocument from oaklib.interfaces.basic_ontology_interface import get_default_prefix_map @@ -34,7 +40,33 @@ def _load_ontology(url: str, download_path: str, use_cache: bool = True) -> Grap return json_loader.load(str(download_path), target_class=GraphDocument) return json_loader.load(url, target_class=GraphDocument) - def _load_and_convert( + def _load_and_convert_npm( + self, + outdir: str, + obograph_path: str, + dl_url: str = None, + code_system_url: str = None, + code_system_id: str = None, + native_uri_stems: List[str] = None, + manifest_path: str = None, + ) -> str: + """Loads and converts an ontology.""" + if dl_url: + gd: GraphDocument = self._load_ontology(dl_url, obograph_path) + else: + gd: GraphDocument = json_loader.load(str(obograph_path), target_class=GraphDocument) + outpath: str = self.npm_converter.dump( + gd, + outdir, + code_system_id=code_system_id, + code_system_url=code_system_url, + include_all_predicates=True, + native_uri_stems=native_uri_stems, + manifest_path=manifest_path, + ) + return outpath + + def _load_and_convert_json( self, outpath: str, obograph_path: str, @@ -48,7 +80,7 @@ def _load_and_convert( gd: GraphDocument = self._load_ontology(dl_url, obograph_path) else: gd: GraphDocument = json_loader.load(str(obograph_path), target_class=GraphDocument) - self.converter.dump( + self.json_converter.dump( gd, outpath, code_system_id=code_system_id, @@ -60,8 +92,14 @@ def _load_and_convert( def setUp(self): """Set up tests""" - self.converter = OboGraphToFHIRConverter() - self.converter.curie_converter = curies.Converter.from_prefix_map(get_default_prefix_map()) + self.json_converter = OboGraphToFhirJsonConverter() + self.json_converter.curie_converter = curies.Converter.from_prefix_map( + get_default_prefix_map() + ) + self.npm_converter = OboGraphToFhirNpmConverter() + self.npm_converter.curie_converter = curies.Converter.from_prefix_map( + get_default_prefix_map() + ) self.compliance_tester = ComplianceTester(self) def test_convert_go_nucleus(self): @@ -69,7 +107,7 @@ def test_convert_go_nucleus(self): filename = "CodeSystem-go-nucleus" ont = INPUT_DIR / "go-nucleus.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, ont, code_system_id=filename.replace("CodeSystem-", ""), @@ -89,7 +127,7 @@ def test_convert_hp_subset(self): filename = "CodeSystem-hp_test" ont = INPUT_DIR / "hp_test.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, ont, code_system_id=filename.replace("CodeSystem-", ""), @@ -111,7 +149,7 @@ def test_convert_mondo(self): ) dl_path = OUTPUT_DIR / "mondo.owl.obographs.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, dl_path, dl_url=dl_url, @@ -134,7 +172,7 @@ def test_convert_hpo(self): ) dl_path = OUTPUT_DIR / "hpo.owl.obographs.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, dl_path, dl_url=dl_url, @@ -157,7 +195,7 @@ def test_convert_comploinc(self): ) dl_path = OUTPUT_DIR / "comploinc.owl.obographs.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, dl_path, dl_url=dl_url, @@ -181,7 +219,7 @@ def test_convert_rxnorm(self): ) dl_path = OUTPUT_DIR / "RXNORM-fixed.ttl.obographs.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, dl_path, dl_url=dl_url, @@ -206,7 +244,7 @@ def test_convert_so(self): ) dl_path = OUTPUT_DIR / "so.owl.obographs.json" out = OUTPUT_DIR / f"{filename}.json" - cs: CodeSystem = self._load_and_convert( + cs: CodeSystem = self._load_and_convert_json( out, dl_path, dl_url=dl_url, @@ -219,3 +257,30 @@ def test_convert_so(self): # TODO: choose a property to assert # prop_uris: List[str] = [p.uri for p in cs.property] # self.assertIn("", prop_uris) + + @unittest.skip("TODO: change to an integration test") + def test_convert_so_package(self): + """Tests specific to Sequence Ontology (SO).""" + if DOWNLOAD_TESTS_ON: + filename = "CodeSystem-sequence-ontology" + dl_url = ( + "https://github.com/" + "HOT-Ecosystem/owl-on-fhir-content/releases/download/2023-01-13/so.owl.obographs.json" + ) + dl_path = OUTPUT_DIR / "so.owl.obographs.json" + zip_outpath: str = self._load_and_convert_npm( + OUTPUT_DIR, + dl_path, + dl_url=dl_url, + code_system_id=filename.replace("CodeSystem-", ""), + code_system_url="http://purl.obolibrary.org/obo/so.owl", + native_uri_stems=["http://purl.obolibrary.org/obo/SO_"], + manifest_path=INPUT_DIR / "fhir_npm_manifest_so.json", + ) + unzip_path = os.path.join(OUTPUT_DIR, os.path.basename(zip_outpath).replace(".tgz", "")) + with tarfile.open(zip_outpath, "r:gz") as tar: + tar.extractall(unzip_path) + with open(os.path.join(unzip_path, "package", filename + ".json")) as f: + cs = json.load(f) + shutil.rmtree(unzip_path) + self.assertGreater(len(cs["concept"]), 100)