Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enh/sparql update #4

Merged
merged 5 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions dsms/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
import re
from typing import Any
from urllib.parse import urljoin
from uuid import UUID

import requests
from requests import Response


def _kitem_id2uri(kitem_id: str) -> str:
def _kitem_id2uri(kitem_id: UUID) -> str:
"Convert a kitem id in the DSMS to the full resolvable URI"
from dsms import Context

return f"{Context.dsms.config.host_url}/{kitem_id}"
return urljoin(str(Context.dsms.config.host_url), str(kitem_id))


def _uri2kitem_idi(uri: str) -> str:
Expand Down
4 changes: 3 additions & 1 deletion dsms/knowledge/kitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,9 @@ def dsms(cls, value: "DSMS") -> None:
@property
def subgraph(cls) -> Optional[Graph]:
"""Getter for Subgraph"""
return _get_subgraph(cls.id, cls.dsms.config.kitem_repo)
return _get_subgraph(
cls.id, cls.dsms.config.kitem_repo, is_kitem_id=True
)

@property
def context(cls) -> "Context":
Expand Down
29 changes: 23 additions & 6 deletions dsms/knowledge/sparql_interface/sparql_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

from typing import TYPE_CHECKING

from dsms.knowledge.sparql_interface.subgraph import Subgraph
from dsms.knowledge.sparql_interface.utils import (
_add_rdf,
_sparql_query,
_sparql_update,
)

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, TextIO, Union

from dsms.core.dsms import DSMS

Expand All @@ -21,17 +22,33 @@ class SparqlInterface:
def __init__(self, dsms):
"""Initalize the Sparql interface"""
self._dsms: "DSMS" = dsms
self._subgraph = Subgraph(dsms)

def query(
self, query: str, repository: str = "knowledge"
) -> "Dict[str, Any]":
"""Perform Sparql Query"""
return _sparql_query(query, repository)

def update(self, filepath: str, repository: str = "knowledge") -> None:
def update(
self,
file_or_pathlike: "Union[str, TextIO]",
repository: str = "knowledge",
) -> None:
"""Perform update query from local file"""
_sparql_update(filepath, self._dsms.config.encoding, repository)

def add_rdf(self, filepath: str, repository: str = "knowledge") -> None:
_sparql_update(
file_or_pathlike, self._dsms.config.encoding, repository
)

def insert(
self,
file_or_pathlike: "Union[str, TextIO]",
repository: str = "knowledge",
) -> None:
"""Upload RDF to triplestore from local file"""
_add_rdf(filepath, self._dsms.config.encoding, repository)
_add_rdf(file_or_pathlike, self._dsms.config.encoding, repository)

@property
def subgraph(cls) -> Subgraph:
"""Subgraph interface for DSMS"""
return cls._subgraph
44 changes: 44 additions & 0 deletions dsms/knowledge/sparql_interface/subgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""DSMS Subgraph interface"""

from typing import TYPE_CHECKING

from dsms.knowledge.sparql_interface.utils import (
_create_subgraph,
_delete_subgraph,
_get_subgraph,
_update_subgraph,
)

if TYPE_CHECKING:
from rdflib import Graph

from dsms import DSMS


class Subgraph:
"""Subgraph interface for DSMS"""

def __init__(self, dsms):
"""Initalize the Sparql interface"""
self._dsms: "DSMS" = dsms

def update(self, graph: "Graph", repository: str = "knowledge") -> None:
"""Update a subgraph in the DSMS"""
_update_subgraph(graph, self._dsms.config.encoding, repository)

def create(self, graph: "Graph", repository: str = "knowledge") -> None:
"""Create a subgraph in the DSMS"""
_create_subgraph(graph, self._dsms.config.encoding, repository)

def delete(self, identifier: str, repository: str = "knowledge") -> None:
"""Delete a subgraph in the DSMS"""
_delete_subgraph(identifier, repository)

def get(
self,
identifier: str,
repository: str = "knowledge",
is_kitem_id: bool = False,
) -> "Graph":
"""Get a subgraph from the DSMS"""
return _get_subgraph(identifier, repository, is_kitem_id)
109 changes: 69 additions & 40 deletions dsms/knowledge/sparql_interface/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
import io
from typing import TYPE_CHECKING

from rdflib import Graph
from rdflib.plugins.sparql.results.jsonresults import JSONResult

from dsms.core.utils import _kitem_id2uri, _perform_request

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, Optional, TextIO, Union

from rdflib import Graph


def _sparql_query(query: str, repository: str) -> "Dict[str, Any]":
Expand All @@ -28,44 +30,64 @@ def _sparql_query(query: str, repository: str) -> "Dict[str, Any]":
\n
`{query}`"""
) from excep
return response["results"]["bindings"]
return response


def _sparql_update(
filepath: str,
file_or_pathlike: "Union[str, TextIO]",
encoding: str,
repository: str,
) -> None:
"""Submit plain SPARQL-query to the DSMS instance."""

with open(filepath, mode="r+", encoding=encoding) as file:
response = _perform_request(
"api/knowledge/update-query",
"post",
files={"file": file},
params={"repository": repository},
)
response = _perform_request(
"api/knowledge/update-query",
"post",
files=_get_file_or_pathlike(file_or_pathlike, encoding),
params={"repository": repository},
)
if not response.ok:
raise RuntimeError(f"Sparql was not successful: {response.text}")


def _add_rdf(filepath: str, encoding: str, repository: str) -> None:
"""Create the subgraph in the remote backend"""
def _get_file_or_pathlike(
file_or_pathlike: "Union[str, TextIO]", encoding: str
) -> "TextIO":
if isinstance(file_or_pathlike, str):
with open(file_or_pathlike, mode="r+", encoding=encoding) as file:
files = {"file", file}
else:
if "read" not in dir(file_or_pathlike):
raise TypeError(
f"{file_or_pathlike} is neither a path"
f"or a file-like object."
)
files = {"file": file_or_pathlike}
return files

with open(filepath, mode="r+", encoding=encoding) as file:
response = _perform_request(
"api/knowledge/add-rdf",
"post",
files={"file": file},
params={"repository": repository},
)

def _add_rdf(
file_or_pathlike: "Union[str, TextIO]",
encoding: str,
repository: str,
context: "Optional[str]" = None,
) -> None:
"""Create the subgraph in the remote backend"""
params = {"repository": repository}
if context:
params["context"] = context
response = _perform_request(
"api/knowledge/add-rdf",
"post",
files=_get_file_or_pathlike(file_or_pathlike, encoding),
params=params,
)
if not response.ok:
raise RuntimeError(
f"Not able to create subgraph in backend: {response.text}"
)


def _delete_subgraph(identifier: str, encoding: str, repository: str) -> None:
def _delete_subgraph(identifier: str, repository: str) -> None:
"""Get subgraph related to a certain dataset id."""
query = f"""
DELETE {{
Expand All @@ -79,42 +101,49 @@ def _delete_subgraph(identifier: str, encoding: str, repository: str) -> None:
GRAPH ?g {{ ?s ?p ?o . }}
}}
}}"""
_sparql_update(query, encoding, repository)
response = _sparql_query(query, repository)
if not response.get("boolean"):
raise RuntimeError(
f"Deleteing subgraph was not successful: {response}"
)


def _create_subgraph(graph: "Graph", encoding: str, respository: str) -> None:
"""Create the subgraph in the remote backend"""
upload_file = io.BytesIO(graph.serialize(encoding=encoding))
_add_rdf(
upload_file, encoding, respository, context=f"<{graph.identifier}>"
)

def _update_subgraph(graph: Graph, encoding: str, repository: str) -> None:

def _update_subgraph(graph: "Graph", encoding: str, repository: str) -> None:
"""Update the subgraph in the remote backend"""
_delete_subgraph(graph.identifier, encoding, repository)
_add_rdf(graph, encoding, repository)
_delete_subgraph(graph.identifier, repository)
_create_subgraph(graph, encoding, repository)


def _get_subgraph(kitem_id: str, repository: str) -> Graph:
def _get_subgraph(
identifier: str, repository: str, is_kitem_id: bool = False
) -> "Graph":
"""Get subgraph related to a certain dataset id."""
uri = _kitem_id2uri(kitem_id)
if is_kitem_id:
identifier = _kitem_id2uri(identifier)
query = f"""
SELECT DISTINCT
?s ?p ?o
WHERE {{
BIND(
<{uri}> as ?g
<{identifier}> as ?g
)
{{
GRAPH ?g {{ ?s ?p ?o . }}
}}
}}"""
data = _sparql_query(query, repository)

buffer = io.StringIO()
buffer.writelines(
f"<{row['s']['value']}> <{row['p']['value']}> <{row['o']['value']}> ."
for row in data
)
buffer.seek(0)

graph = Graph(identifier=uri)
graph.parse(buffer, format="n3")
graph = JSONResult(data)
graph.identifier = identifier

if len(graph) == 0:
raise ValueError(f"Subgraph for id `{kitem_id}` does not exist.")
raise ValueError(f"Subgraph for id `{identifier}` does not exist.")

return graph
Loading