From 6bd8fffa38bb2fd8db362104395d140e3d9399c4 Mon Sep 17 00:00:00 2001 From: Jerven Bolleman Date: Sun, 5 May 2024 22:24:54 +0200 Subject: [PATCH 1/3] GH-4920 SPARQLConnection.size() method should not fetch every statement in the repository. Just send a count query instead. Signed-off-by: Jerven Bolleman --- .../repository/sparql/SPARQLConnection.java | 42 ++++++++++++++++--- .../sparql/SPARQLConnectionTest.java | 34 +++++++++++++++ 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java index 0a17338e64d..6150ef296b2 100644 --- a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java +++ b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java @@ -17,7 +17,9 @@ import java.io.InputStream; import java.io.Reader; import java.net.URL; +import java.util.Arrays; import java.util.Objects; +import java.util.stream.Collectors; import org.apache.http.client.HttpClient; import org.eclipse.rdf4j.common.iteration.CloseableIteration; @@ -80,6 +82,8 @@ */ public class SPARQLConnection extends AbstractRepositoryConnection implements HttpClientDependent { + private static final String COUNT_EVERYTHING = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"; + private static final String EVERYTHING = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }"; private static final String EVERYTHING_WITH_GRAPH = "SELECT * WHERE { ?s ?p ?o . OPTIONAL { GRAPH ?ctx { ?s ?p ?o } } }"; @@ -288,14 +292,40 @@ public boolean isEmpty() throws RepositoryException { @Override public long size(Resource... contexts) throws RepositoryException { - try (RepositoryResult stmts = getStatements(null, null, null, true, contexts)) { - long i = 0; - while (stmts.hasNext()) { - stmts.next(); - i++; + String query = sizeAsTupleQuery(contexts); + TupleQuery tq = prepareTupleQuery(SPARQL, query); + try (TupleQueryResult res = tq.evaluate()) { + if (res.hasNext()) { + + Value value = res.next().getBinding("count").getValue(); + if (value instanceof Literal) { + return ((Literal) value).longValue(); + } else { + return 0; + } + } + } catch (QueryEvaluationException e) { + throw new RepositoryException(e); + } + return 0; + } + + String sizeAsTupleQuery(Resource... contexts) { + String query = COUNT_EVERYTHING; + if (contexts != null && isQuadMode()) { + if (contexts.length == 1 && contexts[0].isIRI()) { + query = "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + ((IRI) contexts[0]).stringValue() + + "> { ?s ?p ?o}}"; + } else if (contexts.length > 0) { + String graphs = Arrays.stream(contexts) + .filter(Resource::isIRI) + .map(Resource::stringValue) + .map(s -> "FROM <" + s + ">") + .collect(Collectors.joining(" ")); + query = "SELECT (COUNT(*) AS ?count) " + graphs + "WHERE { ?s ?p ?o}"; } - return i; } + return query; } @Override diff --git a/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java b/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java index e799ffb264a..92dee89907c 100644 --- a/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java +++ b/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java @@ -12,13 +12,19 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.eclipse.rdf4j.model.util.Values.iri; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.lang.ref.WeakReference; import org.eclipse.rdf4j.http.client.SPARQLProtocolSession; import org.eclipse.rdf4j.model.IRI; @@ -27,10 +33,18 @@ import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.query.impl.MapBindingSet; +import org.eclipse.rdf4j.query.impl.SimpleBinding; +import org.eclipse.rdf4j.query.impl.TupleQueryResultBuilder; +import org.eclipse.rdf4j.query.parser.ParsedQuery; +import org.eclipse.rdf4j.query.parser.sparql.SPARQLParser; +import org.eclipse.rdf4j.query.parser.sparql.SPARQLParserFactory; import org.eclipse.rdf4j.rio.ParserConfig; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.invocation.InvocationOnMock; public class SPARQLConnectionTest { @@ -100,6 +114,26 @@ public void testAddSingleContextHandling() throws Exception { assertThat(sparqlUpdate).containsPattern(expectedAddPattern).containsPattern(expectedRemovePattern); } + @Test + public void testSizeQuery() throws Exception { + + String sizeAsTupleQuery = subject.sizeAsTupleQuery(); + ParsedQuery query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + assertNotNull(query); + + sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1")); + query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + assertNotNull(query); + + sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createIRI("urn:g2")); + query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + assertNotNull(query); + + sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createBNode()); + query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + assertNotNull(query); + } + @Test public void testAddMultipleContextHandling() throws Exception { ArgumentCaptor sparqlUpdateCaptor = ArgumentCaptor.forClass(String.class); From 9650d551697ca683696c20b3466fd9ed6b62dda2 Mon Sep 17 00:00:00 2001 From: Jerven Bolleman Date: Sat, 11 May 2024 16:24:05 +0200 Subject: [PATCH 2/3] GH-4920 When sending the remote size query make sure we don't send null, or RDF4J nill --- .../repository/sparql/SPARQLConnection.java | 20 ++++++++++++++++--- .../sparql/SPARQLConnectionTest.java | 12 +++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java index 6150ef296b2..ecb010d52e2 100644 --- a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java +++ b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java @@ -42,6 +42,8 @@ import org.eclipse.rdf4j.model.impl.DynamicModelFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF4J; +import org.eclipse.rdf4j.model.vocabulary.SESAME; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.BooleanQuery; import org.eclipse.rdf4j.query.GraphQuery; @@ -313,12 +315,13 @@ public long size(Resource... contexts) throws RepositoryException { String sizeAsTupleQuery(Resource... contexts) { String query = COUNT_EVERYTHING; if (contexts != null && isQuadMode()) { - if (contexts.length == 1 && contexts[0].isIRI()) { - query = "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + ((IRI) contexts[0]).stringValue() + if (contexts.length == 1 && isExposableGraphIri(contexts[0])) { // in case the context is null we want the + // default graph. + query = "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + contexts[0].stringValue() + "> { ?s ?p ?o}}"; } else if (contexts.length > 0) { String graphs = Arrays.stream(contexts) - .filter(Resource::isIRI) + .filter(SPARQLConnection::isExposableGraphIri) .map(Resource::stringValue) .map(s -> "FROM <" + s + ">") .collect(Collectors.joining(" ")); @@ -328,6 +331,17 @@ String sizeAsTupleQuery(Resource... contexts) { return query; } + /** + * For the sparql protocol a context must be an IRI However we can't send out the RDF4j intenral default graph IRIs + * + * @param resource to test if it can be the IRI for a named graph + * @return true if it the input can be a foreign named graph. + */ + private static boolean isExposableGraphIri(Resource resource) { + // We use the instanceof test to avoid any issue with a null pointer. + return resource instanceof IRI && RDF4J.NIL != resource && SESAME.NIL != resource; + } + @Override public RepositoryResult getStatements(Resource subj, IRI pred, Value obj, boolean includeInferred, Resource... contexts) throws RepositoryException { diff --git a/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java b/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java index 92dee89907c..5201c4402c1 100644 --- a/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java +++ b/core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java @@ -13,6 +13,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.eclipse.rdf4j.model.util.Values.iri; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; @@ -32,6 +33,7 @@ import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDF4J; import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.query.impl.MapBindingSet; import org.eclipse.rdf4j.query.impl.SimpleBinding; @@ -132,6 +134,16 @@ public void testSizeQuery() throws Exception { sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createBNode()); query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); assertNotNull(query); + + sizeAsTupleQuery = subject.sizeAsTupleQuery(RDF4J.NIL); + query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + assertNotNull(query); + assertFalse(sizeAsTupleQuery.contains("nil")); + + sizeAsTupleQuery = subject.sizeAsTupleQuery(null); + query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/"); + + assertNotNull(query); } @Test From a01fab90c4b9227d5d1f5fb6aeea83b5740e6629 Mon Sep 17 00:00:00 2001 From: Jerven Bolleman Date: Fri, 31 May 2024 17:08:05 +0200 Subject: [PATCH 3/3] GH-4920 Make the logic that distinguises between counting from the remote default graph, or a dataset clearer --- .../repository/sparql/SPARQLConnection.java | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java index ecb010d52e2..05e8901fef1 100644 --- a/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java +++ b/core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java @@ -313,33 +313,40 @@ public long size(Resource... contexts) throws RepositoryException { } String sizeAsTupleQuery(Resource... contexts) { - String query = COUNT_EVERYTHING; - if (contexts != null && isQuadMode()) { - if (contexts.length == 1 && isExposableGraphIri(contexts[0])) { // in case the context is null we want the - // default graph. - query = "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + contexts[0].stringValue() + + // in case the context is null we want the + // default graph of the remote store i.e. ask without graph/from. + if (contexts != null && isQuadMode() && contexts.length > 0) { + // this is an optimization for the case that we can use a GRAPH instead of a FROM. + if (contexts.length == 1 && isExposableGraphIri(contexts[0])) { + return "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + contexts[0].stringValue() + "> { ?s ?p ?o}}"; - } else if (contexts.length > 0) { + } else { + // If we had an default graph setting that is sesame/rdf4j specific + // we must drop it before sending it over the wire. Otherwise + // gather up the given contexts and send them as a from clauses + // to make the matching dataset. String graphs = Arrays.stream(contexts) .filter(SPARQLConnection::isExposableGraphIri) .map(Resource::stringValue) .map(s -> "FROM <" + s + ">") .collect(Collectors.joining(" ")); - query = "SELECT (COUNT(*) AS ?count) " + graphs + "WHERE { ?s ?p ?o}"; + return "SELECT (COUNT(*) AS ?count) " + graphs + "WHERE { ?s ?p ?o}"; } + } else { + return COUNT_EVERYTHING; } - return query; } /** - * For the sparql protocol a context must be an IRI However we can't send out the RDF4j intenral default graph IRIs + * For the sparql protocol a context must be an IRI However we can't send out the RDF4j internal default graph IRIs * * @param resource to test if it can be the IRI for a named graph * @return true if it the input can be a foreign named graph. */ private static boolean isExposableGraphIri(Resource resource) { // We use the instanceof test to avoid any issue with a null pointer. - return resource instanceof IRI && RDF4J.NIL != resource && SESAME.NIL != resource; + return resource instanceof IRI && !RDF4J.NIL.equals(resource) && !SESAME.NIL.equals(resource); } @Override