Skip to content

Commit

Permalink
FHIR: Output as NPM package
Browse files Browse the repository at this point in the history
- Rename: OboGraphToFHIRConverter --> OboGraphToFhirJsonConverter
- Add: OboGraphToFhirNpmConverter: Saves in FHIR NPM package format.
- Add: New CLI output_type option: fhirnpm
- Add: StreamingFhirNpmWriter (WIP)
- Add: Test file: tests/input/fhir_npm_manifest_so.json
- Add: Test helper function: _load_and_convert_npm()
- Add: Unit test: test_convert_so_package()
- Update: .gitignore: tests/input/*_conf.json

FHIR: Output ConceptMap JSONs
- Update: OboGraphToFhirJsonConverter: Now also saves ConceptMaps
  • Loading branch information
joeflack4 committed Sep 5, 2023
1 parent f7d3738 commit b1c486a
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 29 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
.tox/
__pycache__/
.ipynb_checkpoints/
tests/output/
dist/
db/

Expand All @@ -11,6 +10,10 @@ db/
.vscode
.venv

.coverage.*
.coverage
coverage.*

docs/_build/
docs/src/
docs/datamodels/*/*.md
Expand All @@ -25,7 +28,9 @@ notebooks/api-key.txt
.coverage.*
.coverage
coverage.*
tests/input/*_conf.json
tests/input/fhirjson_conf.json
tests/output/

oak_hp.profile
oak_semsimian_hp.profile
Expand Down
2 changes: 1 addition & 1 deletion docs/packages/converters/obo-graph-to-fhir.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ OBO Graph to FHIR Converter

.. currentmodule:: oaklib.converters.obo_graph_to_fhir_converter

.. autoclass:: OboGraphToFHIRConverter
.. autoclass:: OboGraphToFhirJsonConverter
:members:
10 changes: 8 additions & 2 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@
from oaklib.io.rollup_report_writer import write_report
from oaklib.io.streaming_axiom_writer import StreamingAxiomWriter
from oaklib.io.streaming_csv_writer import StreamingCsvWriter
from oaklib.io.streaming_fhir_writer import StreamingFHIRWriter
from oaklib.io.streaming_fhir_writer import (
StreamingFhirJsonWriter,
StreamingFhirNpmWriter,
)
from oaklib.io.streaming_info_writer import StreamingInfoWriter
from oaklib.io.streaming_json_writer import StreamingJsonWriter
from oaklib.io.streaming_kgcl_writer import StreamingKGCLWriter
Expand Down Expand Up @@ -208,6 +211,7 @@
NL_FORMAT = "nl"
KGCL_FORMAT = "kgcl"
FHIR_JSON_FORMAT = "fhirjson"
FHIR_NPM_FORMAT = "fhirnpm"
HEATMAP_FORMAT = "heatmap"

ONT_FORMATS = [
Expand All @@ -218,6 +222,7 @@
JSON_FORMAT,
YAML_FORMAT,
FHIR_JSON_FORMAT,
FHIR_NPM_FORMAT,
CSV_FORMAT,
NL_FORMAT,
]
Expand All @@ -234,7 +239,8 @@
JSONL_FORMAT: StreamingJsonWriter,
YAML_FORMAT: StreamingYamlWriter,
SSSOM_FORMAT: StreamingSssomWriter,
FHIR_JSON_FORMAT: StreamingFHIRWriter,
FHIR_JSON_FORMAT: StreamingFhirJsonWriter,
FHIR_NPM_FORMAT: StreamingFhirNpmWriter,
INFO_FORMAT: StreamingInfoWriter,
NL_FORMAT: StreamingNaturalLanguageWriter,
KGCL_FORMAT: StreamingKGCLWriter,
Expand Down
153 changes: 149 additions & 4 deletions src/oaklib/converters/obo_graph_to_fhir_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@
- Updates issue: https://github.com/INCATools/ontology-access-kit/issues/369
- Conversion examples: https://drive.google.com/drive/folders/1lwGQ63_fedfWlGlRemq8OeZhZsvIXN01
"""
import json
import logging
import os
import shutil
import subprocess
import sys
import tarfile
import tempfile
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple, Union

import rdflib
from linkml_runtime.dumpers import json_dumper
from sssom.parsers import parse_sssom_table
from sssom.writers import write_fhir_json

from oaklib.converters.data_model_converter import DataModelConverter
from oaklib.datamodels.fhir import (
Expand Down Expand Up @@ -51,8 +60,9 @@
}


# TODO: Change target to be an 'outdir': (i) docs, (ii) dump(), (iii) usages
@dataclass
class OboGraphToFHIRConverter(DataModelConverter):
class OboGraphToFhirJsonConverter(DataModelConverter):
"""Converts from OboGraph to FHIR.
- An ontology is mapped to a FHIR `CodeSystem <https://build.fhir.org/codesystem.html>`_.
Expand Down Expand Up @@ -80,15 +90,18 @@ def dump(
self,
source: GraphDocument,
target: str = None,
obograph_path: str = None,
**kwargs,
) -> None:
"""
Dump an OBO Graph Document to a FHIR CodeSystem.
:param source: Source serialization.
:param target: Target serialization.
:param target: Target outpath.
:param obograph_path: Path to an OBO Graph JSON file.
:param kwargs: Additional keyword arguments passed to :ref:`convert`.
"""
# CodeSystem
cs = self.convert(
source,
**kwargs,
Expand All @@ -99,6 +112,8 @@ def dump(
else:
with open(target, "w", encoding="UTF-8") as f:
f.write(json_str)
# ConceptMaps
self.write_concept_maps(obograph_path, target, kwargs.get("code_system_id", ""))

# todo: id/url: any way to try to ascertain ID or URL if not passed? and warn if not determined?
def convert(
Expand All @@ -119,11 +134,11 @@ def convert(
To use:
>>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
>>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter
>>> from oaklib.datamodels.obograph import GraphDocument
>>> from linkml_runtime.dumpers import json_dumper
>>> from linkml_runtime.loaders import json_loader
>>> converter = OboGraphToFHIRConverter()
>>> converter = OboGraphToFhirJsonConverter()
>>> graph = json_loader.load("tests/input/hp_test.json", target_class=GraphDocument)
>>> code_system = converter.convert(graph)
>>> print(json_dumper.dumps(code_system))
Expand Down Expand Up @@ -209,6 +224,7 @@ def _convert_graph(
predicate_period_replacement: bool = False,
) -> CodeSystem:
target.id = source.id
target.version = source.meta.version
edges_by_subject = index_graph_edges_by_subject(source)
logging.info(f"Converting graph to obo: {source.id}, nodes={len(source.nodes)}")
self.predicates_to_export = set()
Expand Down Expand Up @@ -290,3 +306,132 @@ def _convert_meta(self, source: Node, concept: Concept):
value=synonym.val,
)
)

@staticmethod
def write_concept_maps(obograph_path: str, outdir: str = None, code_system_id: str = None, verbose=True):
""""From an Obograph JSON, convert to SSSOM, then convert to 1+ ConceptMap JSON"""
# Vars
outdir = outdir or obograph_path
outdir = outdir if os.path.isdir(outdir) else os.path.dirname(outdir)
# todo: could be more combinations like - or _ obographs
code_system_id = code_system_id or os.path.basename(obograph_path)\
.replace(".obographs", "").replace(".obograph", "").replace(".json", "")
outpath_sssom = os.path.join(outdir, f"{code_system_id}.sssom.tsv")

# TODO obograph -> sssom
# - needs path to be passed to dump
# - before i continue with this, check how owl-on-fhir does. does it use OAK CLI or Python API?
# ...if CLI, then I need to somehow get the CLI to pass the path down here. maybe not difficult
# ...or maybe i should just do that regardless
# ...how did I call the CLI again? maybe i ned to check my notes or save better notes...

# todo: is there a way to do this via Python API? would be better
# todo: -m metadata.sssom.yml: can I create this file on the fly and pass?
command_str = f'sssom parse {obograph_path} -I obographs-json -o {outpath_sssom}'
print('Converting: Obographs -> SSSSOM')
result = subprocess.run(command_str.split(), capture_output=True, text=True)
stderr, stdout = result.stderr, result.stdout
if verbose:
print(stdout)
print(stderr, file=sys.stderr)

# TODO: sssom -> ConceptMap (I can try Python API)
# - pass outdir
# todo: outpath_concept_map: this is temporary. in next sssom update, will pass outdir, cuz 2+ maps
print('Converting: SSSOM -> ConceptMaps')
outpath_concept_map = os.path.join(outdir, f'ConceptMap-{code_system_id}.json')
df = parse_sssom_table(outpath_sssom)
with open(outpath_concept_map, "w") as file:
write_fhir_json(df, file)

# todo: Repurpose / reuse this test from sssom-py
# def test_write_sssom_fhir(self):
# """Test writing as FHIR ConceptMap JSON."""
# path = os.path.join(test_out_dir, "test_write_sssom_fhir.json")
# with open(path, "w") as file:
# write_fhir_json(self.msdf, file)
# # todo: @Joe: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()`
# with open(path, "r") as file:
# d: JsonObj = json.load(file)
# # todo: @Joe: What else is worth checking?
# self.assertEqual(
# len(d["group"][0]["element"]),
# self.mapping_count,
# f"{path} has the wrong number of mappings.",

print()


# TODO: also needs conceptmaps
@dataclass
class OboGraphToFhirNpmConverter(OboGraphToFhirJsonConverter):
"""Converts an OBO Graph to a FHIR NPM package.
Plays the same role as OboGraphToFhirJsonConverter, but also packages the outpus.
"""

def dump(
self,
source: GraphDocument,
target: str,
manifest_path: str,
obograph_path: str = None,
**kwargs,
) -> str:
"""
Dump an OBO Graph Document to a FHIR CodeSystem.
:param source: Source serialization.
:param target: Target directory to save the output.
:param manifest_path: Path to a manifest JSON. Required fields:'name', 'version', 'description', and 'author'.
See: https://confluence.hl7.org/display/FHIR/NPM+Package+Specification
:param obograph_path: Path to an OBO Graph JSON file.
:param kwargs: Additional keyword arguments passed to :ref:`convert`.
"""
# Create CodeSystem JSON
cs = self.convert(
source,
**kwargs,
)
cs_filename = "CodeSystem-" + kwargs["code_system_id"] + ".json"
# Create ConceptMap JSONs
# TODO: make sure these conceptmaps get bundled too
self.write_concept_maps(obograph_path, target, kwargs.get("code_system_id", ""))

outpath = os.path.join(target, cs_filename.replace(".json", ".tgz"))

# Create directory structure
temp_dir = tempfile.mkdtemp()
package_dir = os.path.join(temp_dir, "package")
os.mkdir(package_dir)

# Save FHIR resources
cs_str = json_dumper.dumps(cs, inject_type=False)
with open(os.path.join(package_dir, cs_filename), "w", encoding="UTF-8") as f:
f.write(cs_str)

# Save manifest package.json
shutil.copyfile(manifest_path, os.path.join(package_dir, "package.json"))

# Create and save .index.json
package_index = {
"index-version": 1,
"files": [
{
"filename": cs_filename,
"resourceType": "CodeSystem",
"id": kwargs["code_system_id"],
"url": kwargs["code_system_url"],
"version": cs.version,
},
],
}
with open(os.path.join(package_dir, ".index.json"), "w", encoding="UTF-8") as f:
json.dump(package_index, f)

# Save zipfile and remove temp dir
with tarfile.open(outpath, "w:gz") as tar:
tar.add(package_dir, arcname="package")
shutil.rmtree(temp_dir)

return outpath
8 changes: 6 additions & 2 deletions src/oaklib/interfaces/dumper_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from linkml_runtime.dumpers import json_dumper

from oaklib.converters.obo_graph_to_cx_converter import OboGraphToCXConverter
from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
from oaklib.converters.obo_graph_to_fhir_converter import (
OboGraphToFhirJsonConverter,
OboGraphToFhirNpmConverter,
)
from oaklib.converters.obo_graph_to_obo_format_converter import (
OboGraphToOboFormatConverter,
)
Expand All @@ -18,7 +21,8 @@

OBOGRAPH_CONVERTERS = {
"obo": OboGraphToOboFormatConverter,
"fhirjson": OboGraphToFHIRConverter,
"fhirjson": OboGraphToFhirJsonConverter,
"fhirnpm": OboGraphToFhirNpmConverter,
"owl": OboGraphToRdfOwlConverter,
"turtle": OboGraphToRdfOwlConverter,
"rdf": OboGraphToRdfOwlConverter,
Expand Down
27 changes: 24 additions & 3 deletions src/oaklib/io/streaming_fhir_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

from linkml_runtime.dumpers import json_dumper

from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter
from oaklib.datamodels.obograph import GraphDocument
from oaklib.interfaces.obograph_interface import OboGraphInterface
from oaklib.io.streaming_writer import StreamingWriter
from oaklib.types import CURIE


@dataclass
class StreamingFHIRWriter(StreamingWriter):
class StreamingFhirJsonWriter(StreamingWriter):
"""
A writer that emits FHIR CodeSystem objects or Concept objects
"""
Expand All @@ -24,10 +24,31 @@ def emit_multiple(self, entities: Iterable[CURIE], **kwargs):
g = oi.extract_graph(list(entities), include_metadata=True)
gd = GraphDocument(graphs=[g])
logging.info(f"Converting {len(g.nodes)} nodes to OBO")
converter = OboGraphToFHIRConverter()
converter = OboGraphToFhirJsonConverter()
converter.curie_converter = oi.converter
code_system = converter.convert(gd)
logging.info(f"Writing {len(code_system.concept)} Concepts")
# TODO: Should not this call OboGraphToFhirJsonConverter.dump()?
self.file.write(json_dumper.dumps(code_system))
else:
super().emit_multiple(entities, **kwargs)


# TODO:
@dataclass
class StreamingFhirNpmWriter(StreamingWriter):
"""
A writer that emits FHIR CodeSystem objects or Concept objects
"""

def emit_multiple(self, entities: Iterable[CURIE], **kwargs):
oi = self.ontology_interface
if isinstance(oi, OboGraphInterface):
logging.info("Extracting graph")
g = oi.extract_graph(list(entities), include_metadata=True)
gd = GraphDocument(graphs=[g])
logging.info(f"Converting {len(g.nodes)} nodes to OBO")
converter = None
print(gd, converter)
else:
super().emit_multiple(entities, **kwargs)
24 changes: 24 additions & 0 deletions tests/input/fhir_npm_manifest_so.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "sequence-ontology",
"version": "0.1.0",
"canonical": "http://purl.obolibrary.org/obo/so.owl",
"title": "Sequence Ontology",
"description": "The Sequence Ontology is a set of terms and relationships used to describe the features and attributes of biological sequence.",
"homepage": "http://www.sequenceontology.org/",
"keywords": [
"SO",
"Sequence Ontology"
],
"author": "TIMS",
"maintainers": [
{
"name": "Joe Flack",
"email": "[email protected]"
},
{
"name": "Shahim Essaid",
"email": "[email protected]"
}
],
"license": "MIT"
}
Loading

0 comments on commit b1c486a

Please sign in to comment.