Skip to content

Commit

Permalink
GH-4920 SPARQLConnection.size() now uses count query (#4972)
Browse files Browse the repository at this point in the history
  • Loading branch information
hmottestad authored Nov 10, 2024
2 parents 08a518a + a01fab9 commit 517353e
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.util.Arrays;
import java.util.Objects;
import java.util.stream.Collectors;

import org.apache.http.client.HttpClient;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
Expand All @@ -39,6 +41,8 @@
import org.eclipse.rdf4j.model.impl.DynamicModelFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
import org.eclipse.rdf4j.model.vocabulary.SESAME;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.BooleanQuery;
import org.eclipse.rdf4j.query.GraphQuery;
Expand Down Expand Up @@ -79,6 +83,8 @@
*/
public class SPARQLConnection extends AbstractRepositoryConnection implements HttpClientDependent {

private static final String COUNT_EVERYTHING = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }";

private static final String EVERYTHING = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }";

private static final String EVERYTHING_WITH_GRAPH = "SELECT * WHERE { ?s ?p ?o . OPTIONAL { GRAPH ?ctx { ?s ?p ?o } } }";
Expand Down Expand Up @@ -281,16 +287,61 @@ public boolean isEmpty() throws RepositoryException {

@Override
public long size(Resource... contexts) throws RepositoryException {
try (RepositoryResult<Statement> stmts = getStatements(null, null, null, true, contexts)) {
long i = 0;
while (stmts.hasNext()) {
stmts.next();
i++;
String query = sizeAsTupleQuery(contexts);
TupleQuery tq = prepareTupleQuery(SPARQL, query);
try (TupleQueryResult res = tq.evaluate()) {
if (res.hasNext()) {

Value value = res.next().getBinding("count").getValue();
if (value instanceof Literal) {
return ((Literal) value).longValue();
} else {
return 0;
}
}
} catch (QueryEvaluationException e) {
throw new RepositoryException(e);
}
return 0;
}

String sizeAsTupleQuery(Resource... contexts) {

// in case the context is null we want the
// default graph of the remote store i.e. ask without graph/from.
if (contexts != null && isQuadMode() && contexts.length > 0) {
// this is an optimization for the case that we can use a GRAPH instead of a FROM.
if (contexts.length == 1 && isExposableGraphIri(contexts[0])) {
return "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + contexts[0].stringValue()
+ "> { ?s ?p ?o}}";
} else {
// If we had an default graph setting that is sesame/rdf4j specific
// we must drop it before sending it over the wire. Otherwise
// gather up the given contexts and send them as a from clauses
// to make the matching dataset.
String graphs = Arrays.stream(contexts)
.filter(SPARQLConnection::isExposableGraphIri)
.map(Resource::stringValue)
.map(s -> "FROM <" + s + ">")
.collect(Collectors.joining(" "));
return "SELECT (COUNT(*) AS ?count) " + graphs + "WHERE { ?s ?p ?o}";
}
return i;
} else {
return COUNT_EVERYTHING;
}
}

/**
* For the sparql protocol a context must be an IRI However we can't send out the RDF4j internal default graph IRIs
*
* @param resource to test if it can be the IRI for a named graph
* @return true if it the input can be a foreign named graph.
*/
private static boolean isExposableGraphIri(Resource resource) {
// We use the instanceof test to avoid any issue with a null pointer.
return resource instanceof IRI && !RDF4J.NIL.equals(resource) && !SESAME.NIL.equals(resource);
}

@Override
public RepositoryResult<Statement> getStatements(Resource subj, IRI pred, Value obj, boolean includeInferred,
Resource... contexts) throws RepositoryException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,41 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.eclipse.rdf4j.model.util.Values.iri;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.lang.ref.WeakReference;

import org.eclipse.rdf4j.http.client.SPARQLProtocolSession;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.FOAF;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.query.impl.MapBindingSet;
import org.eclipse.rdf4j.query.impl.SimpleBinding;
import org.eclipse.rdf4j.query.impl.TupleQueryResultBuilder;
import org.eclipse.rdf4j.query.parser.ParsedQuery;
import org.eclipse.rdf4j.query.parser.sparql.SPARQLParser;
import org.eclipse.rdf4j.query.parser.sparql.SPARQLParserFactory;
import org.eclipse.rdf4j.rio.ParserConfig;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;

public class SPARQLConnectionTest {

Expand Down Expand Up @@ -100,6 +116,36 @@ public void testAddSingleContextHandling() throws Exception {
assertThat(sparqlUpdate).containsPattern(expectedAddPattern).containsPattern(expectedRemovePattern);
}

@Test
public void testSizeQuery() throws Exception {

String sizeAsTupleQuery = subject.sizeAsTupleQuery();
ParsedQuery query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
assertNotNull(query);

sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"));
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
assertNotNull(query);

sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createIRI("urn:g2"));
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
assertNotNull(query);

sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createBNode());
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
assertNotNull(query);

sizeAsTupleQuery = subject.sizeAsTupleQuery(RDF4J.NIL);
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
assertNotNull(query);
assertFalse(sizeAsTupleQuery.contains("nil"));

sizeAsTupleQuery = subject.sizeAsTupleQuery(null);
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");

assertNotNull(query);
}

@Test
public void testAddMultipleContextHandling() throws Exception {
ArgumentCaptor<String> sparqlUpdateCaptor = ArgumentCaptor.forClass(String.class);
Expand Down

0 comments on commit 517353e

Please sign in to comment.