Skip to content

Commit

Permalink
GH-3950 SHACL validation against data in an existing Sail (#3951)
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad authored May 15, 2023
2 parents 8980a78 + 098fb86 commit 74158c7
Show file tree
Hide file tree
Showing 640 changed files with 3,426 additions and 1,339 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;

import org.eclipse.rdf4j.common.iterator.EmptyIterator;
import org.eclipse.rdf4j.common.iterator.SingletonIterator;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.ModelFactory;
Expand Down Expand Up @@ -334,9 +333,9 @@ public Iterable<Statement> getStatements(Resource subject, IRI predicate, Value
.createStatement(subject, predicate, object, contexts[0]);
Statement foundStatement = statements.get(statement);
if (foundStatement == null) {
return EmptyIterator::new;
return List.of();
}
return () -> new SingletonIterator<>(foundStatement);
return List.of(foundStatement);
} else if (model == null && subject == null && predicate == null && object == null && contexts != null
&& contexts.length == 0) {
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public static List<IRI> getSupportedShaclPredicates() {
SHACL.PREFIXES,
SHACL.PREFIX_PROP,
SHACL.NAMESPACE_PROP,
SHACL.SEVERITY_PROP,
DASH.hasValueIn,
RSX.targetShape
);
Expand Down Expand Up @@ -398,7 +399,8 @@ public List<ContextWithShapes> getShapes(RepositoryConnection shapesRepoConnecti

try (ShapeSource shapeSource = new CombinedShapeSource(shapesRepoConnection, sailConnection)
.withContext(shapesGraphs)) {
return Shape.Factory.getShapes(shapeSource, this);
return Shape.Factory.getShapes(shapeSource,
new Shape.ParseSettings(isEclipseRdf4jShaclExtensions(), isDashDataShapes()));
}

}
Expand All @@ -408,7 +410,8 @@ public List<ContextWithShapes> getShapes(RepositoryConnection shapesRepoConnecti
throws SailException {

try (ShapeSource shapeSource = new ForwardChainingShapeSource(shapesRepoConnection).withContext(shapesGraphs)) {
return Shape.Factory.getShapes(shapeSource, this);
return Shape.Factory.getShapes(shapeSource,
new Shape.ParseSettings(isEclipseRdf4jShaclExtensions(), isDashDataShapes()));
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
import org.eclipse.rdf4j.sail.shacl.ShaclSail.TransactionSettings.ValidationApproach;
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShapes;
import org.eclipse.rdf4j.sail.shacl.ast.Shape;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.EmptyNode;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.SingleCloseablePlanNode;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.ValidationExecutionLogger;
Expand Down Expand Up @@ -482,12 +481,12 @@ private ValidationReport validate(List<ContextWithShapes> shapes, boolean valida

}

void prepareValidation() throws InterruptedException {
void prepareValidation(ValidationSettings validationSettings) throws InterruptedException {

assert isValidationEnabled();

if (sail.isRdfsSubClassReasoning()) {
rdfsSubClassOfReasoner = RdfsSubClassOfReasoner.createReasoner(this);
rdfsSubClassOfReasoner = RdfsSubClassOfReasoner.createReasoner(this, validationSettings);
}

if (!isBulkValidation()) {
Expand Down Expand Up @@ -864,7 +863,8 @@ public void prepare() throws SailException {

stats.setEmptyIncludingCurrentTransaction(ConnectionHelper.isEmpty(this));

prepareValidation();
prepareValidation(
new ValidationSettings(null, sail.isLogValidationPlans(), false, sail.isPerformanceLogging()));

ValidationReport invalidTuples = null;
if (useSerializableValidation) {
Expand Down Expand Up @@ -1080,7 +1080,7 @@ private long getTimeStamp() {
return 0;
}

private class ValidationContainer {
public class ValidationContainer {
private final Shape shape;
private final PlanNode planNode;
private final ValidationExecutionLogger validationExecutionLogger;
Expand All @@ -1089,10 +1089,10 @@ public ValidationContainer(Shape shape, PlanNode planNode) {
this.shape = shape;
this.validationExecutionLogger = ValidationExecutionLogger
.getInstance(sail.isGlobalLogValidationExecution());
if (!(planNode instanceof EmptyNode)) {
this.planNode = new SingleCloseablePlanNode(planNode);
this.planNode.receiveLogger(validationExecutionLogger);

if (!(planNode.isGuaranteedEmpty())) {
assert planNode instanceof SingleCloseablePlanNode;
planNode.receiveLogger(validationExecutionLogger);
this.planNode = planNode;
} else {
this.planNode = planNode;
}
Expand All @@ -1103,7 +1103,7 @@ public Shape getShape() {
}

public boolean hasPlanNode() {
return !(planNode instanceof EmptyNode);
return !(planNode.isGuaranteedEmpty());
}

public ValidationResultIterator performValidation() {
Expand All @@ -1115,7 +1115,7 @@ public ValidationResultIterator performValidation() {

try (CloseableIteration<? extends ValidationTuple, SailException> iterator = planNode.iterator()) {
validationResults = new ValidationResultIterator(iterator,
sail.getEffectiveValidationResultsLimitPerConstraint());
sail.getEffectiveValidationResultsLimitPerConstraint(), shape.getSeverity());
return validationResults;
} finally {
handlePostLogging(before, validationResults);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*******************************************************************************
* Copyright (c) 2023 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
******************************************************************************/

package org.eclipse.rdf4j.sail.shacl;

import java.util.List;
import java.util.stream.Collectors;

import org.eclipse.rdf4j.common.annotation.Experimental;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.common.transaction.IsolationLevels;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.sail.Sail;
import org.eclipse.rdf4j.sail.SailConnection;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShapes;
import org.eclipse.rdf4j.sail.shacl.ast.Shape;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.SingleCloseablePlanNode;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.ValidationExecutionLogger;
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.ValidationTuple;
import org.eclipse.rdf4j.sail.shacl.results.ValidationReport;
import org.eclipse.rdf4j.sail.shacl.results.lazy.LazyValidationReport;
import org.eclipse.rdf4j.sail.shacl.results.lazy.ValidationResultIterator;
import org.eclipse.rdf4j.sail.shacl.wrapper.data.ConnectionsGroup;
import org.eclipse.rdf4j.sail.shacl.wrapper.data.RdfsSubClassOfReasoner;
import org.eclipse.rdf4j.sail.shacl.wrapper.data.VerySimpleRdfsBackwardsChainingConnection;
import org.eclipse.rdf4j.sail.shacl.wrapper.shape.CombinedShapeSource;
import org.eclipse.rdf4j.sail.shacl.wrapper.shape.ShapeSource;

@Experimental
public class ShaclValidator {

private static final Resource[] ALL_CONTEXTS = {};

// protected so that tests can override
protected static Resource[] CONTEXTS = {};

public static ValidationReport validate(Sail dataRepo, Sail shapesRepo) {

List<ContextWithShapes> shapes;
try (SailConnection shapesConnection = shapesRepo.getConnection()) {
shapesConnection.begin(IsolationLevels.NONE);
try (ShapeSource shapeSource = new CombinedShapeSource(shapesConnection,
shapesConnection)) {
shapes = Shape.Factory.getShapes(shapeSource.withContext(CONTEXTS),
new Shape.ParseSettings(true, true));
}
shapesConnection.commit();
}

try (SailConnection dataRepoConnection = dataRepo.getConnection()) {

RdfsSubClassOfReasoner reasoner;

try (SailConnection shapesConnection = shapesRepo.getConnection()) {
reasoner = RdfsSubClassOfReasoner.createReasoner(
dataRepoConnection, shapesConnection,
new ValidationSettings(ALL_CONTEXTS, false, true, false));
}

VerySimpleRdfsBackwardsChainingConnection verySimpleRdfsBackwardsChainingConnection = new VerySimpleRdfsBackwardsChainingConnection(
dataRepoConnection, reasoner);

return performValidation(shapes, new ConnectionsGroup(verySimpleRdfsBackwardsChainingConnection, null,
null, null, new Stats(), () -> reasoner,
new ShaclSailConnection.Settings(true, true, true, IsolationLevels.NONE), true));
}

}

private static ValidationReport performValidation(List<ContextWithShapes> shapes,
ConnectionsGroup connectionsGroup) {

List<ValidationResultIterator> collect = shapes
.stream()
.flatMap(contextWithShapes -> {
return contextWithShapes
.getShapes()
.stream()
.map(shape -> shape.generatePlans(connectionsGroup,
new ValidationSettings(contextWithShapes.getDataGraph(), false, true, false)));
}
)
.map(planNode -> {
assert planNode instanceof SingleCloseablePlanNode;
planNode.receiveLogger(ValidationExecutionLogger.getInstance(false));
return (SingleCloseablePlanNode) planNode;
})

.map(planNode -> {
try (CloseableIteration<? extends ValidationTuple, SailException> iterator = planNode.iterator()) {
return new ValidationResultIterator(iterator, 1000, planNode.getShape().getSeverity());
}
})
.collect(Collectors.toList());

return new LazyValidationReport(collect, 10000);

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.SHACL;
import org.eclipse.rdf4j.sail.shacl.ShaclSail;
import org.eclipse.rdf4j.sail.shacl.SourceConstraintComponent;
import org.eclipse.rdf4j.sail.shacl.ValidationSettings;
import org.eclipse.rdf4j.sail.shacl.ast.StatementMatcher.Variable;
Expand All @@ -47,22 +46,22 @@ public NodeShape(NodeShape nodeShape) {
}

public static NodeShape getInstance(ShaclProperties properties,
ShapeSource shapeSource, Cache cache, ShaclSail shaclSail) {
ShapeSource shapeSource, ParseSettings parseSettings, Cache cache) {

NodeShape shape = (NodeShape) cache.get(properties.getId());
if (shape == null) {
shape = new NodeShape();
cache.put(properties.getId(), shape);
shape.populate(properties, shapeSource, cache, shaclSail);
shape.populate(properties, shapeSource, parseSettings, cache);
}

return shape;
}

@Override
public void populate(ShaclProperties properties, ShapeSource connection,
Cache cache, ShaclSail shaclSail) {
super.populate(properties, connection, cache, shaclSail);
ParseSettings parseSettings, Cache cache) {
super.populate(properties, connection, parseSettings, cache);

if (properties.getMinCount() != null) {
throw new IllegalStateException("NodeShapes do not support sh:MinCount in " + getId());
Expand All @@ -80,7 +79,7 @@ public void populate(ShaclProperties properties, ShapeSource connection,
* Also not supported here is: - sh:lessThan - sh:lessThanOrEquals - sh:qualifiedValueShape
*/

constraintComponents = getConstraintComponents(properties, connection, cache, shaclSail);
constraintComponents = getConstraintComponents(properties, connection, parseSettings, cache);

}

Expand Down Expand Up @@ -138,7 +137,7 @@ public ValidationQuery generateSparqlValidationQuery(ConnectionsGroup connection
assert !(constraintComponents.get(0) instanceof PropertyShape);

validationQuery = validationQuery.withShape(this);
validationQuery = validationQuery.withSeverity(severity);
validationQuery = validationQuery.withSeverity(Severity.orDefault(severity));
validationQuery.makeCurrentStateValidationReport();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.SHACL;
import org.eclipse.rdf4j.sail.shacl.ShaclSail;
import org.eclipse.rdf4j.sail.shacl.SourceConstraintComponent;
import org.eclipse.rdf4j.sail.shacl.ValidationSettings;
import org.eclipse.rdf4j.sail.shacl.ast.StatementMatcher.Variable;
Expand Down Expand Up @@ -64,13 +63,15 @@ public PropertyShape(PropertyShape propertyShape) {
this.path = propertyShape.path;
}

public static PropertyShape getInstance(ShaclProperties properties, ShapeSource shapeSource, Cache cache,
ShaclSail shaclSail) {
public static PropertyShape getInstance(ShaclProperties properties, ShapeSource shapeSource,
ParseSettings parseSettings, Cache cache) {

Shape shape = cache.get(properties.getId());

if (shape == null) {
shape = new PropertyShape();
cache.put(properties.getId(), shape);
shape.populate(properties, shapeSource, cache, shaclSail);
shape.populate(properties, shapeSource, parseSettings, cache);
}

if (shape.constraintComponents.isEmpty()) {
Expand All @@ -81,18 +82,17 @@ public static PropertyShape getInstance(ShaclProperties properties, ShapeSource
}

@Override
public void populate(ShaclProperties properties, ShapeSource connection,
Cache cache, ShaclSail shaclSail) {
super.populate(properties, connection, cache, shaclSail);
public void populate(ShaclProperties properties, ShapeSource connection, ParseSettings parseSettings, Cache cache) {

super.populate(properties, connection, parseSettings, cache);

this.path = Path.buildPath(connection, properties.getPath());

if (this.path == null) {
throw new IllegalStateException(properties.getId() + " is a sh:PropertyShape without a sh:path!");
}

constraintComponents = getConstraintComponents(properties, connection, cache, shaclSail
);
constraintComponents = getConstraintComponents(properties, connection, parseSettings, cache);
}

@Override
Expand Down
Loading

0 comments on commit 74158c7

Please sign in to comment.