Skip to content

Commit

Permalink
Graph expansion using inoculation or SHACL TripleRules will now expan…
Browse files Browse the repository at this point in the history
…d into a separate named graph if you are working with an RDFLib Dataset instead of a Graph.
  • Loading branch information
ashleysommer committed Nov 1, 2024
1 parent d7241ca commit 83684fe
Show file tree
Hide file tree
Showing 13 changed files with 351 additions and 139 deletions.
21 changes: 20 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d
## [Unreleased]
- Nothing yet

## [0.29.0] - 2024-11-01

### Added
- When validating a Dataset instead of a bare Graph, PySHACL will now expand RDFS and OWL-RL inferences into
a separate named graph, to avoid polluting the datagraph.
- When using SHACL Triple Rules from SHACL-AF spec, PySHACL will now add the expressed triples into
a separate named graph. This allows you to more easily get the expanded triples back out again afterward.

### Changed
- PySHACL no longer supports older RDFLib versions
- PySHACL relies on the latest OWL-RL version, that in-turn relies on the latest RDFLib version
- Therefore PySHACL now requires RDFLib v7.1.1 or newer
- Dropped Python 3.8 support.
- Python developers discontinued Python 3.8 last month
- The next version of RDFLib and OWL-RL will not support Python 3.8
- Removed Python 3.8 from the RDFLib test suite
- Python 3.9-specific typing changes will be incrementally introduced

## [0.28.1] - 2024-10-25

### Fixed
Expand Down Expand Up @@ -1182,7 +1200,8 @@ just leaves the files open. Now it is up to the command-line client to close the

- Initial version, limited functionality

[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...HEAD
[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.29.0...HEAD
[0.29.0]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...v0.29.0
[0.28.1]: https://github.com/RDFLib/pySHACL/compare/v0.28.0...v0.28.1
[0.28.0]: https://github.com/RDFLib/pySHACL/compare/v0.27.0...v0.28.0
[0.27.0]: https://github.com/RDFLib/pySHACL/compare/v0.26.0...v0.27.0
Expand Down
11 changes: 8 additions & 3 deletions pyshacl/inference/custom_rdfs_closure.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
from typing import Optional, TYPE_CHECKING

try:
from owlrl import OWL

Expand All @@ -25,6 +27,9 @@
from owlrl.OWLRL import OWLRL_Semantics
from owlrl.RDFSClosure import RDFS_Semantics as OrigRDFSSemantics

if TYPE_CHECKING:
from rdflib.graph import Graph


class CustomRDFSSemantics(OrigRDFSSemantics):
def one_time_rules(self):
Expand All @@ -49,9 +54,9 @@ class CustomRDFSOWLRLSemantics(CustomRDFSSemantics, OWLRL_Semantics):
(OWL.DataRange, OWL.equivalentClass, RDFS.Datatype),
]

def __init__(self, graph, axioms, daxioms, rdfs=True):
OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs)
CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs)
def __init__(self, graph, axioms, daxioms, rdfs: bool = True, destination: Optional['Graph'] = None):
OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination)
CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination)
self.rdfs = True

# noinspection PyMethodMayBeStatic
Expand Down
118 changes: 22 additions & 96 deletions pyshacl/rdfutil/inoculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,20 @@
from typing import TYPE_CHECKING, Dict, Optional, Union

import rdflib
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
from rdflib.namespace import NamespaceManager

from .clone import clone_blank_node, clone_graph, clone_node
from .clone import clone_blank_node, clone_node, clone_dataset
from .consts import OWL, RDF, ConjunctiveLike, GraphLike, OWL_classes, OWL_properties, RDFS_classes, RDFS_properties

if TYPE_CHECKING:
from rdflib import BNode
from rdflib.term import IdentifiedNode
from rdflib.term import URIRef

from .consts import RDFNode

OWLNamedIndividual = OWL.NamedIndividual


def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:
def inoculate(data_graph: rdflib.Graph, ontology: GraphLike) -> rdflib.Graph:
"""
Copies all RDFS and OWL axioms (classes, relationship definitions, and properties)
from the ontology graph into the data_graph.
Expand All @@ -33,6 +31,9 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:
ontology_ns = ontology.namespace_manager
data_graph_ns = data_graph.namespace_manager

if isinstance(ontology, (rdflib.ConjunctiveGraph, rdflib.Dataset)):
# always set default context true on the ontology DS
ontology.default_context = True
# Bind any missing ontology namespaces in the DataGraph NS manager.
if ontology_ns is not data_graph_ns:
data_graph_prefixes = {p: n for (p, n) in data_graph_ns.namespaces()}
Expand Down Expand Up @@ -108,119 +109,44 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph:


def inoculate_dataset(
base_ds: ConjunctiveLike, ontology_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None
base_ds: ConjunctiveLike, ontology_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None,
target_graph_identifier: Optional['URIRef'] = None,
):
"""
Make a clone of base_ds (dataset) and add RDFS and OWL triples from ontology_ds
:param base_ds:
:type base_ds: rdflib.Dataset
:param ontology_ds:
:type ontology_ds: rdflib.Dataset
:type ontology_ds: rdflib.Dataset|rdflib.ConjunctiveGraph|rdflib.Graph
:param target_ds:
:type target_ds: rdflib.Dataset|str|NoneType
:param target_graph_identifier:
:type target_graph_identifier: rdflib.URIRef | None
:return: The cloned Dataset with ontology triples from ontology_ds
:rtype: rdflib.Dataset
"""

# TODO: Decide whether we need to clone base_ds before calling this,
# or we clone base_ds as part of this function
default_union: bool = base_ds.default_union
base_named_graphs = [
(
rdflib.Graph(base_ds.store, i, namespace_manager=base_ds.namespace_manager) # type: ignore[arg-type]
if not isinstance(i, rdflib.Graph)
else i
)
for i in base_ds.store.contexts(None)
]
if isinstance(base_ds, rdflib.Dataset) and len(base_named_graphs) < 1:
base_named_graphs = [
rdflib.Graph(base_ds.store, DATASET_DEFAULT_GRAPH_ID, namespace_manager=base_ds.namespace_manager)
]
base_default_context_id = base_ds.default_context.identifier
if target_ds is None:
target_ds = rdflib.Dataset(default_union=default_union)
target_ds.namespace_manager = NamespaceManager(target_ds, 'core')
target_ds.default_context.namespace_manager = target_ds.namespace_manager
target_ds = clone_dataset(base_ds)
elif target_ds is base_ds:
pass
elif target_ds == "inplace" or target_ds == "base":
target_ds = base_ds
elif isinstance(target_ds, str):
raise RuntimeError("target_ds cannot be a string (unless it is 'inplace' or 'base')")

if isinstance(target_ds, (rdflib.ConjunctiveGraph, rdflib.Dataset)):
if not isinstance(target_ds, rdflib.Dataset):
raise RuntimeError("Cannot inoculate ConjunctiveGraph, use Dataset instead.")
else:
raise RuntimeError("Cannot inoculate datasets if target_ds passed in is not a Dataset itself.")
ont_default_context_id: Union[IdentifiedNode, str, None]
if isinstance(ontology_ds, (rdflib.Dataset, rdflib.ConjunctiveGraph)):
ont_graphs = [
(
rdflib.Graph(ontology_ds.store, i, namespace_manager=ontology_ds.namespace_manager) # type: ignore[arg-type]
if not isinstance(i, rdflib.Graph)
else i
)
for i in ontology_ds.store.contexts(None)
]
ont_default_context_id = ontology_ds.default_context.identifier
else:
ont_graphs = [ontology_ds]
ont_default_context_id = None
if target_ds is base_ds or target_ds == "inplace" or target_ds == "base":
target_ds = base_ds
for bg in base_named_graphs:
if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1:
# skip empty default named graph in base_graph
continue
for og in ont_graphs:
if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1:
# skip empty default named graph in ontology_graph
continue
inoculate(bg, og)

if target_graph_identifier:
dest_graph = target_ds.get_context(target_graph_identifier)
else:
inoculated_graphs = {}
for bg in base_named_graphs:
if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1:
# skip empty default named graph in base_graph
continue
target_g = rdflib.Graph(store=target_ds.store, identifier=bg.identifier)
clone_g = clone_graph(bg, target_graph=target_g)
for og in ont_graphs:
if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1:
# skip empty default named graph in ontology_graph
continue
inoculate(clone_g, og)
inoculated_graphs[bg.identifier] = clone_g

base_graph_identifiers = [bg.identifier for bg in base_named_graphs]
base_default_context_id = base_ds.default_context.identifier
target_default_context_id = target_ds.default_context.identifier
if base_default_context_id != target_default_context_id:
old_target_default_context = target_ds.default_context
old_target_default_context_id = old_target_default_context.identifier
if isinstance(target_ds, rdflib.Dataset):
new_target_default_context = target_ds.graph(base_default_context_id)
else:
new_target_default_context = target_ds.get_context(base_default_context_id)
target_ds.store.add_graph(new_target_default_context)
target_ds.default_context = new_target_default_context
if old_target_default_context_id not in base_graph_identifiers:
if isinstance(target_ds, rdflib.Dataset):
target_ds.remove_graph(old_target_default_context)
else:
target_ds.store.remove_graph(old_target_default_context)
target_default_context_id = new_target_default_context.identifier
else:
if isinstance(target_ds, rdflib.Dataset):
_ = target_ds.graph(target_default_context_id)
else:
t_default = target_ds.get_context(target_default_context_id)
target_ds.store.add_graph(t_default)
for i, ig in inoculated_graphs.items():
if ig == target_ds.default_context or i == target_default_context_id:
continue
if isinstance(target_ds, rdflib.Dataset):
_ = target_ds.graph(ig) # alias to Dataset.add_graph()
else:
target_ds.store.add_graph(ig)
dest_graph = target_ds.default_context

# inoculate() routine will set default_union on the ontology_ds if it is a Dataset
inoculate(dest_graph, ontology_ds)

return target_ds
4 changes: 2 additions & 2 deletions pyshacl/rule_expand_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def mix_in_ontology(self):
else:
to_graph = clone_graph(self.data_graph, identifier=self.data_graph.identifier)
return inoculate(to_graph, self.ont_graph)
return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None)
return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None, URIRef("urn:pyshacl:inoculation"))

def make_executor(self) -> SHACLExecutor:
return SHACLExecutor(
Expand Down Expand Up @@ -134,7 +134,7 @@ def run(self) -> GraphLike:
datagraph = clone_graph(datagraph)
has_cloned = True
self.logger.debug(f"Running pre-inferencing with option='{inference_option}'.")
self._run_pre_inference(datagraph, inference_option, logger=self.logger)
self._run_pre_inference(datagraph, inference_option, URIRef("urn:pyshacl:inference"), logger=self.logger)
self.pre_inferenced = True
if not has_cloned and not self.inplace:
# We still need to clone in advanced mode, because of triple rules
Expand Down
11 changes: 7 additions & 4 deletions pyshacl/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def gather_rules(
ret_rules[shape].append(rule)
return ret_rules

RULES_ITERATE_LIMIT = 100

def apply_rules(
executor: SHACLExecutor,
Expand All @@ -98,11 +99,13 @@ def apply_rules(
for shape, rules in sorted_shapes_rules:
# sort the rules by the sh:order before execution
rules = sorted(rules, key=lambda x: x.order)
iterate_limit = 100
_iterate_limit = int(RULES_ITERATE_LIMIT)
while True:
if iterate_limit < 1:
raise ReportableRuntimeError("SHACL Shape Rule iteration exceeded iteration limit of 100.")
iterate_limit -= 1
if _iterate_limit < 1:
raise ReportableRuntimeError(
f"SHACL Shape Rule iteration exceeded iteration limit of {RULES_ITERATE_LIMIT}."
)
_iterate_limit -= 1
this_modified = 0
for r in rules:
if r.deactivated:
Expand Down
13 changes: 9 additions & 4 deletions pyshacl/rules/shacl_rule.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# -*- coding: utf-8 -*-
from decimal import Decimal
from typing import Sequence, Union
from typing import Sequence, TYPE_CHECKING, Optional

from rdflib import RDF, Literal

from pyshacl.consts import SH_condition, SH_deactivated, SH_order
from pyshacl.errors import RuleLoadError
from pyshacl.pytypes import RDFNode, SHACLExecutor

if TYPE_CHECKING:
from pyshacl.pytypes import GraphLike
from rdflib.term import URIRef

RDF_first = RDF.first


Expand Down Expand Up @@ -41,7 +45,7 @@ def __init__(self, executor: SHACLExecutor, shape, rule_node, iterate=False):
self.executor = executor
self.shape = shape
self.node = rule_node
self.iterate = False
self.iterate = iterate

deactivated_nodes = list(self.shape.sg.objects(self.node, SH_deactivated))
self._deactivated = len(deactivated_nodes) > 0 and bool(deactivated_nodes[0])
Expand Down Expand Up @@ -111,7 +115,8 @@ def filter_conditions(self, focus_nodes: Sequence[RDFNode], data_graph):

def apply(
self,
data_graph,
focus_nodes: Union[Sequence[RDFNode], None] = None,
data_graph: 'GraphLike',
focus_nodes: Optional[Sequence[RDFNode]] = None,
target_graph_identifier: Optional['URIRef'] = None,
):
raise NotImplementedError()
22 changes: 17 additions & 5 deletions pyshacl/rules/sparql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from typing import TYPE_CHECKING, List, Sequence, Union
from typing import TYPE_CHECKING, List, Sequence, Union, Optional

import rdflib
from rdflib import Literal
Expand All @@ -13,11 +13,13 @@
from ..shacl_rule import SHACLRule

if TYPE_CHECKING:
from rdflib.term import URIRef
from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor
from pyshacl.shape import Shape

XSD_string = XSD.string

SPARQL_RULE_ITERATE_LIMIT = 100

class SPARQLRule(SHACLRule):
__slots__ = ("_constructs", "_qh")
Expand Down Expand Up @@ -52,7 +54,8 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib
def apply(
self,
data_graph: 'GraphLike',
focus_nodes: Union[Sequence['RDFNode'], None] = None,
focus_nodes: Optional[Sequence['RDFNode']] = None,
target_graph_identifier: Optional['URIRef'] = None,
) -> int:
focus_list: Sequence['RDFNode']
if focus_nodes is not None:
Expand All @@ -70,10 +73,12 @@ def apply(
focus_list = filtered_focus_nodes
all_added = 0
SPARQLQueryHelper = get_query_helper_cls()
iterate_limit = 100
iterate_limit = int(SPARQL_RULE_ITERATE_LIMIT)
while True:
if iterate_limit < 1:
raise ReportableRuntimeError("Local SPARQLRule iteration exceeded iteration limit of 100.")
raise ReportableRuntimeError(
f"Local SPARQLRule iteration exceeded iteration limit of {SPARQL_RULE_ITERATE_LIMIT}."
)
iterate_limit -= 1
added = 0
applicable_nodes = self.filter_conditions(focus_list, data_graph)
Expand Down Expand Up @@ -101,8 +106,15 @@ def apply(
added += 1
construct_graphs.add(result_graph)
if added > 0:
if isinstance(data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)):
if target_graph_identifier is not None:
target_graph = data_graph.get_context(target_graph_identifier)
else:
target_graph = data_graph.default_context
else:
target_graph = data_graph
for g in construct_graphs:
data_graph = clone_graph(g, target_graph=data_graph)
data_graph = clone_graph(g, target_graph=target_graph)
all_added += added
if self.iterate:
continue # Jump up to iterate
Expand Down
Loading

0 comments on commit 83684fe

Please sign in to comment.