Skip to content

Commit

Permalink
GH-5149 Add search:numDocs property and maxQueryDocuments param in Lu…
Browse files Browse the repository at this point in the history
…ceneSail query
  • Loading branch information
ate47 committed Oct 29, 2024
1 parent 3e4f94f commit 41d4869
Show file tree
Hide file tree
Showing 10 changed files with 281 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -577,10 +577,19 @@ protected Iterable<? extends DocumentScore> query(Resource subject, QuerySpec sp
}

SearchHits hits;
Integer numDocs = spec.getNumDocs();
if (subject != null) {
hits = search(subject, request, qb);
if (numDocs != null) {
hits = search(subject, request, qb, numDocs);
} else {
hits = search(subject, request, qb);
}
} else {
hits = search(request, qb);
if (numDocs != null) {
hits = search(request, qb, numDocs);
} else {
hits = search(request, qb);
}
}
return Iterables.transform(hits, new Function<>() {

Expand All @@ -600,11 +609,24 @@ public DocumentScore apply(SearchHit hit) {
* @return search hits
*/
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query) {
return search(resource, request, query, -1);
}

/**
* Evaluates the given query only for the given resource.
*
* @param resource
* @param request
* @param query
* @param numDocs
* @return search hits
*/
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query, int numDocs) {
// rewrite the query
QueryBuilder idQuery = QueryBuilders.termQuery(SearchFields.URI_FIELD_NAME,
SearchFields.getResourceID(resource));
QueryBuilder combinedQuery = QueryBuilders.boolQuery().must(idQuery).must(query);
return search(request, combinedQuery);
return search(request, combinedQuery, numDocs);
}

@Override
Expand Down Expand Up @@ -712,9 +734,22 @@ private ShapeRelation toSpatialOp(String relation) {
* Evaluates the given query and returns the results as a TopDocs instance.
*/
public SearchHits search(SearchRequestBuilder request, QueryBuilder query) {
return search(request, query, -1);
}

/**
* Evaluates the given query and returns the results as a TopDocs instance.
*/
public SearchHits search(SearchRequestBuilder request, QueryBuilder query, int numDocs) {
String[] types = getTypes();
int nDocs;
if (maxDocs > 0) {
if (numDocs > 0) {
if (maxQueryDocs > 0 && maxQueryDocs < numDocs) {
nDocs = maxQueryDocs;
} else {
nDocs = numDocs;
}
} else if (maxDocs > 0) {
nDocs = maxDocs;
} else {
long docCount = client.prepareSearch(indexName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public abstract class AbstractSearchIndex implements SearchIndex {
}

protected int maxDocs;
protected int maxQueryDocs;

protected Set<String> wktFields = Collections.singleton(SearchFields.getPropertyField(GEO.AS_WKT));

Expand All @@ -77,6 +78,8 @@ public abstract class AbstractSearchIndex implements SearchIndex {
public void initialize(Properties parameters) throws Exception {
String maxDocParam = parameters.getProperty(LuceneSail.MAX_DOCUMENTS_KEY);
maxDocs = (maxDocParam != null) ? Integer.parseInt(maxDocParam) : -1;
String maxQueryDocParam = parameters.getProperty(LuceneSail.MAX_QUERY_DOCUMENTS_KEY);
maxQueryDocs = (maxQueryDocParam != null) ? Integer.parseInt(maxQueryDocParam) : maxDocs;

String wktFieldParam = parameters.getProperty(LuceneSail.WKT_FIELDS);
if (wktFieldParam != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,13 @@ public class LuceneSail extends NotifyingSailWrapper {
*/
public static final String MAX_DOCUMENTS_KEY = "maxDocuments";

/**
* Set the key "maxQueryDocuments=&lt;n&gt;" as sail parameter to limit the maximum number of documents the user can
* query at a time to return from a search query. The default is the value of the {@link #MAX_DOCUMENTS_KEY}
* parameter.
*/
public static final String MAX_QUERY_DOCUMENTS_KEY = "maxQueryDocuments";

/**
* Set this key to configure which fields contain WKT and should be spatially indexed. The value should be a
* space-separated list of URIs. Default is http://www.opengis.net/ont/geosparql#asWKT.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ public class LuceneSailSchema {

public static final IRI CONTEXT;

public static final IRI NUM_DOCS;

static {
ValueFactory factory = SimpleValueFactory.getInstance(); // compatible with beta4:
// creating a new factory
Expand All @@ -73,5 +75,6 @@ public class LuceneSailSchema {
WITHIN_DISTANCE = factory.createIRI(NAMESPACE + "withinDistance");
DISTANCE = factory.createIRI(NAMESPACE + "distance");
CONTEXT = factory.createIRI(NAMESPACE + "context");
NUM_DOCS = factory.createIRI(NAMESPACE + "numDocs");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import java.util.stream.Collectors;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.SingletonSet;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
Expand Down Expand Up @@ -67,21 +69,43 @@ private static void append(Var var, StringBuilder buffer) {

private final StatementPattern idPattern;

private final StatementPattern numDocsPattern;

private final Resource subject;

private final String matchesVarName;

private final String scoreVarName;

private final Integer numDocs;

public QuerySpec(StatementPattern matchesPattern, Collection<QueryParam> queryPatterns,
StatementPattern scorePattern, StatementPattern typePattern,
StatementPattern idPattern, Resource subject) {
this(matchesPattern, queryPatterns, scorePattern, typePattern, idPattern, null, subject);
}

public QuerySpec(StatementPattern matchesPattern, Collection<QueryParam> queryPatterns,
StatementPattern scorePattern, StatementPattern typePattern,
StatementPattern idPattern, StatementPattern numDocsPattern, Resource subject) {
this.matchesPattern = matchesPattern;
this.queryPatterns = queryPatterns;
this.scorePattern = scorePattern;
this.typePattern = typePattern;
this.idPattern = idPattern;
this.numDocsPattern = numDocsPattern;
this.subject = subject;
if (numDocsPattern != null) {
Value val = numDocsPattern.getObjectVar().getValue();
if (val != null && val.isLiteral()) {
this.numDocs = ((Literal) val).intValue();
} else {
throw new IllegalArgumentException("numDocs should be constant literal value");
}
} else {
this.numDocs = null;
}

if (matchesPattern != null) {
this.matchesVarName = matchesPattern.getSubjectVar().getName();
} else {
Expand All @@ -101,9 +125,11 @@ public QuerySpec(String matchesVarName, String propertyVarName, String scoreVarN
this.matchesPattern = null;
this.scorePattern = null;
this.typePattern = null;
this.numDocsPattern = null;
this.queryPatterns = Set.of();
this.idPattern = null;
this.subject = subject;
this.numDocs = null;
}

@Override
Expand All @@ -121,6 +147,7 @@ public QueryModelNode removeQueryPatterns() {
replace(getScorePattern(), replacement);
replace(getTypePattern(), replacement);
replace(getIdPattern(), replacement);
replace(getNumDocsPattern(), replacement);

final QueryModelNode placeholder = new SingletonSet();

Expand Down Expand Up @@ -154,6 +181,10 @@ public StatementPattern getScorePattern() {
return scorePattern;
}

public StatementPattern getNumDocsPattern() {
return numDocsPattern;
}

/**
* The variable name associated with the query score
*
Expand All @@ -163,6 +194,10 @@ public String getScoreVariableName() {
return scoreVarName;
}

public Integer getNumDocs() {
return numDocs;
}

public StatementPattern getTypePattern() {
return typePattern;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.INDEXID;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.LUCENE_QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.NUM_DOCS;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.PROPERTY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE;
Expand Down Expand Up @@ -152,7 +153,7 @@ public void process(TupleExpr tupleExpr, BindingSet bindings, Collection<SearchQ
}

// find the relevant outgoing patterns
StatementPattern typePattern, propertyPattern, scorePattern, snippetPattern;
StatementPattern typePattern, propertyPattern, scorePattern, snippetPattern, numDocsPattern;
List<StatementPattern> queryPatterns;

try {
Expand All @@ -161,6 +162,7 @@ public void process(TupleExpr tupleExpr, BindingSet bindings, Collection<SearchQ
propertyPattern = getPattern(matchesVar, filter.propertyPatterns);
scorePattern = getPattern(matchesVar, filter.scorePatterns);
snippetPattern = getPattern(matchesVar, filter.snippetPatterns);
numDocsPattern = getPattern(matchesVar, filter.numDocsPatterns);
} catch (IllegalArgumentException e) {
failOrWarn(e);
continue;
Expand Down Expand Up @@ -302,7 +304,8 @@ else if (propertyValue != null) {
queryString, propertyURI, null));
}

QuerySpec querySpec = new QuerySpec(matchesPattern, queries, scorePattern, typePattern, idPattern, subject);
QuerySpec querySpec = new QuerySpec(matchesPattern, queries, scorePattern, typePattern, idPattern,
numDocsPattern, subject);

if (querySpec.isEvaluable()) {
// constant optimizer
Expand Down Expand Up @@ -341,6 +344,10 @@ else if (propertyValue != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.SNIPPET));
funcCall.addResultVar(snippetVar);
}
if (numDocsPattern != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.NUM_DOCS));
funcCall.addArg(numDocsPattern.getObjectVar());
}

Join join = new Join();
matchesPattern.replaceWith(join);
Expand Down Expand Up @@ -465,6 +472,8 @@ private static class PatternFilter extends AbstractQueryModelVisitor<RuntimeExce

public ArrayList<StatementPattern> boostPatterns = new ArrayList<>();

public ArrayList<StatementPattern> numDocsPatterns = new ArrayList<>();

/**
* Method implementing the visitor pattern that gathers all statements using a predicate from the LuceneSail's
* namespace.
Expand All @@ -487,6 +496,8 @@ public void meet(StatementPattern node) {
idPatterns.add(node);
} else if (BOOST.equals(predicate)) {
boostPatterns.add(node);
} else if (NUM_DOCS.equals(predicate)) {
numDocsPatterns.add(node);
} else if (TYPE.equals(predicate)) {
Value object = node.getObjectVar().getValue();
if (LUCENE_QUERY.equals(object)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.BOOST;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.LUCENE_QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.NUM_DOCS;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SNIPPET;
Expand Down Expand Up @@ -55,6 +56,7 @@ public void testQueryInterpretation() {
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
"<" + QUERY + "> \"my Lucene query\"; " +
"<" + SCORE + "> ?Score; " +
"<" + NUM_DOCS + "> 76; " +
"<" + SNIPPET + "> ?Snippet ]. } ";
ParsedQuery query = parser.parseQuery(buffer, null);
TupleExpr tupleExpr = query.getTupleExpr();
Expand All @@ -69,6 +71,8 @@ public void testQueryInterpretation() {
assertEquals("Score", querySpec.getScorePattern().getObjectVar().getName());
assertEquals("Snippet", param.getSnippetPattern().getObjectVar().getName());
assertEquals(LUCENE_QUERY, querySpec.getTypePattern().getObjectVar().getValue());
assertEquals(76, querySpec.getNumDocs());
assertEquals(76, ((Literal) querySpec.getNumDocsPattern().getObjectVar().getValue()).intValue());
assertEquals("my Lucene query", param.getQuery());
assertNull(querySpec.getSubject());
}
Expand All @@ -80,11 +84,13 @@ public void testMultipleQueriesInterpretation() {
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
"<" + QUERY + "> \"my Lucene query\"; " +
"<" + SCORE + "> ?score1; " +
"<" + NUM_DOCS + "> 86; " +
"<" + SNIPPET + "> ?snippet1 ]. " +
" ?sub2 <" + MATCHES + "> [ " +
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
"<" + QUERY + "> \"second lucene query\"; " +
"<" + SCORE + "> ?score2; " +
"<" + NUM_DOCS + "> 13; " +
"<" + SNIPPET + "> ?snippet2 ]. " +
// and connect them both via any X in between, just as salt to make the
// parser do something
Expand All @@ -103,6 +109,7 @@ public void testMultipleQueriesInterpretation() {
// Matched the first
assertEquals("sub1", querySpec.getMatchesPattern().getSubjectVar().getName());
assertEquals(1, querySpec.getQueryPatterns().size());
assertEquals(86, querySpec.getNumDocs());
QuerySpec.QueryParam param = querySpec.getQueryPatterns().iterator().next();
assertEquals("my Lucene query",
((Literal) param.getQueryPattern().getObjectVar().getValue()).getLabel());
Expand All @@ -116,6 +123,7 @@ public void testMultipleQueriesInterpretation() {
// and the second
assertEquals("sub2", querySpec.getMatchesPattern().getSubjectVar().getName());
assertEquals(1, querySpec.getQueryPatterns().size());
assertEquals(13, querySpec.getNumDocs());
QuerySpec.QueryParam param = querySpec.getQueryPatterns().iterator().next();
assertEquals("second lucene query",
((Literal) param.getQueryPattern().getObjectVar().getValue()).getLabel());
Expand Down
Loading

0 comments on commit 41d4869

Please sign in to comment.