Skip to content

Commit

Permalink
Adding ability to output d3viz json.
Browse files Browse the repository at this point in the history
Also added graph transformers to allow for dynamic generation of
SEP-style grouping classes.

Fixes #696
  • Loading branch information
cmungall committed Jan 27, 2024
1 parent a805ace commit a7f9eb0
Show file tree
Hide file tree
Showing 13 changed files with 566 additions and 29 deletions.
86 changes: 72 additions & 14 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@
from oaklib.parsers.association_parser_factory import get_association_parser
from oaklib.resource import OntologyResource
from oaklib.selector import get_adapter, get_resource_from_shorthand
from oaklib.transformers.transformers_factory import (
apply_ontology_transformation,
get_ontology_transformer,
)
from oaklib.types import CURIE, PRED_CURIE
from oaklib.utilities import table_filler
from oaklib.utilities.apikey_manager import set_apikey_value
Expand Down Expand Up @@ -179,10 +183,11 @@
from oaklib.utilities.obograph_utils import (
ancestors_with_stats,
default_stylemap_path,
graph_to_d3viz_objects,
graph_to_image,
graph_to_tree_display,
shortest_paths,
trim_graph, graph_to_d3viz_objects,
trim_graph,
)
from oaklib.utilities.semsim.similarity_utils import load_information_content_map
from oaklib.utilities.subsets.slimmer_utils import (
Expand Down Expand Up @@ -2010,7 +2015,6 @@ def tree(
)



@main.command()
@click.argument("terms", nargs=-1)
@predicates_option
Expand Down Expand Up @@ -2433,11 +2437,6 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
"""
Exports (dumps) the entire contents of an ontology.
:param terms: A list of terms to dump. If not specified, the entire ontology will be dumped.
:param output: Path to output file
:param output_type: The output format. One of: obo, obojson, ofn, rdf, json, yaml, fhirjson, csv, nl
:param config_file: Path to a configuration JSON file for additional params (which may be required for some formats)
Example:
runoak -i pato.obo dump -o pato.json -O json
Expand Down Expand Up @@ -2468,14 +2467,73 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
if terms:
raise NotImplementedError("Currently dump for a subset of terms is not supported")
impl = settings.impl
if isinstance(impl, BasicOntologyInterface):
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= json.load(file)
impl.dump(output, syntax=output_type, **kwargs)
else:
if not isinstance(impl, BasicOntologyInterface):
raise NotImplementedError
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= json.load(file)
impl.dump(output, syntax=output_type, **kwargs)


@main.command()
@click.argument("terms", nargs=-1)
@click.option("-o", "--output", help="Path to output file")
@output_type_option
@click.option(
"-c",
"--config-file",
help="""Config file for additional transform params.""",
)
@click.option(
"-t",
"--transform",
required=True,
help="""Name of transformation to apply.""",
)
def transform(terms, transform, output, output_type: str, config_file: str = None, **kwargs):
"""
Transforms an ontology
Example:
runoak -i pato.obo dump -o pato.json -O json
Example:
runoak -i pato.owl dump -o pato.ttl -O turtle
You can also pass in a JSON configuration file to parameterize the dump process.
Currently this is only used for fhirjson dumps, the configuration options are specified here:
https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html
Example:
runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json
Currently each implementation only supports a subset of formats.
The dump command is also blocked for remote endpoints such as Ubergraph,
to avoid killer queries.
Python API:
https://incatools.github.io/ontology-access-kit/interfaces/basic
"""
if terms:
raise NotImplementedError("Currently transform for a subset of terms is not supported")
impl = settings.impl
if not isinstance(impl, BasicOntologyInterface):
raise NotImplementedError
logging.info(f"Out={output} syntax={output_type}")
if config_file:
with open(config_file) as file:
kwargs |= yaml.safe_load(file)
transformer = get_ontology_transformer(transform, **kwargs)
new_impl = apply_ontology_transformation(impl, transformer)
new_impl.dump(output, syntax=output_type)


@main.command()
Expand Down
8 changes: 8 additions & 0 deletions src/oaklib/implementations/sqldb/sql_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ def _is_quoted_url(curie: CURIE):
return curie.startswith("<")


def _remove_uri_quotes(curie: CURIE):
if _is_quoted_url(curie):
return curie[1:-1]
else:
return curie


@dataclass
class SqlImplementation(
RelationGraphInterface,
Expand Down Expand Up @@ -2689,6 +2696,7 @@ def _filter(select_expr, filter_expr=None):
f"Ad-hoc repair of literal value for contributor: {contributor_id}"
)
contributor_id = string_as_base64_curie(contributor_id)
contributor_id = _remove_uri_quotes(contributor_id)
if contributor_id not in ssc.contributor_summary:
ssc.contributor_summary[contributor_id] = ContributorStatistics(
contributor_id=contributor_id, contributor_name=contributor_name
Expand Down
Empty file.
18 changes: 18 additions & 0 deletions src/oaklib/transformers/chained_ontology_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from dataclasses import dataclass
from typing import Any, Collection

from oaklib.transformers.ontology_transformer import OntologyTransformer


@dataclass
class ChainedOntologyTransformer(OntologyTransformer):
"""
An ontology graph transformer that chains multiple other transformers
"""

chained_transformers: Collection[OntologyTransformer]

def transform(self, source_ontology: Any, **kwargs) -> Any:
for transformer in self.chained_transformers:
source_ontology = transformer.transform(source_ontology, **kwargs)
return source_ontology
75 changes: 75 additions & 0 deletions src/oaklib/transformers/edge_filter_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from dataclasses import dataclass
from typing import Collection, Optional

from oaklib.datamodels.obograph import Graph
from oaklib.datamodels.vocabulary import IS_A
from oaklib.transformers.graph_transformer import GraphTransformer
from oaklib.types import PRED_CURIE


@dataclass
class EdgeFilterTransformer(GraphTransformer):
"""
An ontology graph transformer that filters edges
"""

include_predicates: Optional[Collection[PRED_CURIE]] = None
"""A collection of predicates to include"""

exclude_predicates: Optional[Collection[PRED_CURIE]] = None
"""A collection of predicates to exclude"""

filter_function: Optional[callable] = None
"""A function that takes an edge and returns True if it should be included"""

def transform(self, source_ontology: Graph, **kwargs) -> Graph:
"""
Filters edges from a graph.
Example:
>>> from oaklib import get_adapter
>>> from oaklib.transformers.transformers_factory import get_ontology_transformer
>>> from oaklib.datamodels.vocabulary import IS_A
>>> adapter = get_adapter("tests/input/go-nucleus.obo")
>>> graph = adapter.as_obograph()
>>> transformer = get_ontology_transformer("EdgeFilterTransformer", include_predicates=[IS_A])
>>> filtered_graph = transformer.transform(graph)
>>> set([e.pred for e in filtered_graph.edges])
{'is_a'}
:param graph:
:return:
"""
include_predicates = self.include_predicates
exclude_predicates = self.exclude_predicates

if include_predicates is None and exclude_predicates is None:
return source_ontology

def _normalize_id(pred: PRED_CURIE) -> PRED_CURIE:
if pred == IS_A:
return "is_a"
else:
return pred

if include_predicates is not None:
include_predicates = {_normalize_id(pred) for pred in include_predicates}

if exclude_predicates is not None:
exclude_predicates = {_normalize_id(pred) for pred in exclude_predicates}

new_edges = []
for edge in source_ontology.edges:
if include_predicates is not None:
if edge.pred not in include_predicates:
continue
if exclude_predicates is not None:
if edge.pred in exclude_predicates:
continue
if self.filter_function is not None:
if not self.filter_function(edge):
continue
new_edges.append(edge)
new_graph = Graph(id=source_ontology.id, nodes=source_ontology.nodes, edges=new_edges)
return self._post_process(new_graph)
43 changes: 43 additions & 0 deletions src/oaklib/transformers/graph_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from dataclasses import dataclass

from oaklib.datamodels.obograph import Graph
from oaklib.transformers.ontology_transformer import OntologyTransformer


@dataclass
class GraphTransformer(OntologyTransformer):
"""
An ontology transformer that operates on a graph
"""

remove_dangling_edges: bool = False
"""If true, removes edges that point to nodes that are not in the graph"""

def transform(self, source_ontology: Graph, **kwargs) -> Graph:
"""
Transforms a graph into an ontology
:param graph:
:return:
"""
raise NotImplementedError

def apply_remove_dangling_edges(self, graph: Graph):
"""
Removes edges that point to nodes that are not in the graph.
:param graph:
:return:
"""
node_ids = {n.id for n in graph.nodes}
new_edges = []
for edge in graph.edges:
if edge.sub in node_ids and edge.obj in node_ids:
new_edges.append(edge)
return Graph(id=graph.id, nodes=graph.nodes, edges=new_edges)

def _post_process(self, graph: Graph):
if self.remove_dangling_edges:
return self.apply_remove_dangling_edges(graph)
else:
return graph
49 changes: 49 additions & 0 deletions src/oaklib/transformers/node_filter_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from dataclasses import dataclass
from typing import Optional

from oaklib.datamodels.obograph import Graph
from oaklib.transformers.graph_transformer import GraphTransformer


@dataclass
class NodeFilterTransformer(GraphTransformer):
"""
An ontology graph transformer that filters nodes
"""

filter_function: Optional[callable] = None
"""A function that takes an Node and returns True if it should be included"""

remove_dangling_edges: bool = False
"""If true, removes edges that point to nodes that are not in the graph"""

def transform(self, source_ontology: Graph, **kwargs) -> Graph:
"""
Filters Nodes from a graph.
Example:
>>> from oaklib import get_adapter
>>> from oaklib.transformers.node_filter_transformer import NodeFilterTransformer
>>> from oaklib.datamodels.vocabulary import IS_A
>>> adapter = get_adapter("tests/input/go-nucleus.obo")
>>> graph = adapter.as_obograph()
>>> transformer = NodeFilterTransformer(
... filter_function=lambda node: node.lbl.startswith("nuclear"),
... remove_dangling_edges=True)
>>> filtered_graph = transformer.transform(graph)
>>> sorted([n.lbl for n in filtered_graph.nodes])
['nuclear envelope', 'nuclear membrane', 'nuclear particle']
:param graph:
:return:
"""

new_nodes = []
for node in source_ontology.nodes:
if self.filter_function is not None:
if not self.filter_function(node):
continue
new_nodes.append(node)
new_graph = Graph(id=source_ontology.id, nodes=new_nodes, edges=source_ontology.edges)
return self._post_process(new_graph)
20 changes: 20 additions & 0 deletions src/oaklib/transformers/ontology_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from abc import ABC
from dataclasses import dataclass
from typing import Any


@dataclass
class OntologyTransformer(ABC):
"""
A class for transforming ontologies
"""

def transform(self, source_ontology: Any, **kwargs) -> Any:
"""
Transforms an ontology into another ontology
:param source_ontology:
:param kwargs: additional configuration arguments
:return:
"""
raise NotImplementedError
Loading

0 comments on commit a7f9eb0

Please sign in to comment.