Skip to content

Commit

Permalink
#13: push limit clauses to BGPs for simple queries
Browse files Browse the repository at this point in the history
For simple queries consisting of a single BGP, we now push LIMIT clauses
into the BGP for early evaluation.
  • Loading branch information
Andreas Schwarte committed Apr 16, 2019
1 parent caf50bd commit f4f8dc4
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 1 deletion.
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ See upgrade notes for details.
- make logging backend optional (adapters via SLF4j)
- migrate (optional) logging backend to log4j 2
- redefine logging behavior for CLI
* Improvements to optimizers (LIMIT, FILTER)
* Improved overall robustness and fault tolerance
* Documentation migrated to https://github.com/VeritasOS/fedx/wiki
* Technical Improvements
Expand Down
45 changes: 45 additions & 0 deletions src/com/fluidops/fedx/algebra/FedXStatementPattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode;
import org.eclipse.rdf4j.query.algebra.QueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
Expand Down Expand Up @@ -50,6 +51,7 @@ public abstract class FedXStatementPattern extends StatementPattern implements S
protected final List<String> freeVars = new ArrayList<String>(3);
protected FilterValueExpr filterExpr = null;
protected QueryBindingSet boundFilters = null; // contains bound filter bindings, that need to be added as additional bindings
protected long upperLimit = -1; // if set to a positive number, this upper limit is applied to any subquery

public FedXStatementPattern(StatementPattern node, QueryInfo queryInfo) {
super(node.getSubjectVar(), node.getPredicateVar(), node.getObjectVar(), node.getContextVar());
Expand All @@ -69,6 +71,10 @@ public <X extends Exception> void visitChildren(QueryModelVisitor<X> visitor)
BoundFiltersNode.visit(visitor, boundFilters);
}

if (upperLimit > 0) {
new UpperLimitNode(upperLimit).visit(visitor);
}

if (filterExpr!=null)
filterExpr.visit(visitor);
}
Expand Down Expand Up @@ -180,6 +186,24 @@ public void addBoundFilter(String varName, Value value) {
// if no free vars AND hasResults => replace by TrueNode to avoid additional remote requests
}

/**
* Set the upper limit for this statement expression (i.e. applied in the
* evaluation to individual subqueries of this expr)
*
* @param upperLimit the upper limit, a negative number means unlimited
*/
public void setUpperLimit(long upperLimit) {
this.upperLimit = upperLimit;
}

/**
*
* @return the upper limit or a negative number (meaning no LIMIT)
*/
public long getUpperLimit() {
return this.upperLimit;
}

private List<StatementSource> sort(List<StatementSource> stmtSources) {
List<StatementSource> res = new ArrayList<StatementSource>(stmtSources);
Collections.sort(res, new Comparator<StatementSource>() {
Expand All @@ -190,4 +214,25 @@ public int compare(StatementSource o1, StatementSource o2) {
});
return res;
}

static class UpperLimitNode extends AbstractQueryModelNode {

private static final long serialVersionUID = -1331709574582152474L;

private final long upperLimit;

public UpperLimitNode(long upperLimit) {
super();
this.upperLimit = upperLimit;
}

@Override
public String getSignature() {
return "Upper Limit: " + upperLimit;
}
@Override
public <X extends Exception> void visit(QueryModelVisitor<X> visitor) throws X {
visitor.meetOther(this);
}
}
}
31 changes: 30 additions & 1 deletion src/com/fluidops/fedx/optimizer/GenericInfoOptimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

import org.eclipse.rdf4j.query.algebra.Filter;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.Projection;
import org.eclipse.rdf4j.query.algebra.Service;
import org.eclipse.rdf4j.query.algebra.Slice;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Union;
Expand All @@ -46,7 +48,11 @@ public class GenericInfoOptimizer extends AbstractQueryModelVisitor<Optimization
protected boolean hasFilter = false;
protected boolean hasUnion = false;
protected boolean hasService = false;
protected List<StatementPattern> stmts = new ArrayList<StatementPattern>();
protected long limit = -1; // set to a positive number if the main query has a limit
protected List<StatementPattern> stmts = new ArrayList<>();

// internal helpers
private boolean seenProjection = false; // whether the main projection has been visited

protected final QueryInfo queryInfo;

Expand All @@ -67,6 +73,14 @@ public List<StatementPattern> getStatements() {
return stmts;
}

public boolean hasLimit() {
return limit > 0;
}

public long getLimit() {
return limit;
}

@Override
public void optimize(TupleExpr tupleExpr) {

Expand Down Expand Up @@ -119,6 +133,21 @@ public void meet(StatementPattern node) {
stmts.add(node);
}

@Override
public void meet(Projection node) throws OptimizationException {
seenProjection = true;
super.meet(node);
}

@Override
public void meet(Slice node) throws OptimizationException {
// remember the limit of the main query (i.e. outside of a projection)
if (!seenProjection) {
limit = node.getLimit();
}
super.meet(node);
}

public boolean hasService() {
return hasService;
}
Expand Down
93 changes: 93 additions & 0 deletions src/com/fluidops/fedx/optimizer/LimitOptimizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (C) 2019 Veritas Technologies LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.fluidops.fedx.optimizer;

import org.eclipse.rdf4j.query.algebra.Projection;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.Slice;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;

import com.fluidops.fedx.algebra.FedXStatementPattern;
import com.fluidops.fedx.exception.OptimizationException;

/**
* An optimizer that attempts to push upper limits into BGPs of the query.
*
* Currently upper limits are only pushed for simple queries consisting of a
* single BGP.
*
*
* @author Andreas Schwarte
*
*/
public class LimitOptimizer extends AbstractQueryModelVisitor<OptimizationException> implements FedXOptimizer {

/**
* Helper variable that contains an applicable limit for the current scope. Set
* to -1 if no limit is applicable.
*/
private long applicableLimitInScope = -1;

@Override
public void optimize(TupleExpr tupleExpr) {

try {
tupleExpr.visit(this);
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

@Override
public void meetOther(QueryModelNode node) throws OptimizationException {

super.meetOther(node);
}

@Override
public void meet(Slice node) throws OptimizationException {
if (!node.hasOffset()) {
applicableLimitInScope = node.getLimit();
}
super.meet(node);
applicableLimitInScope = -1;

}

@Override
public void meet(Projection proj) throws OptimizationException {

TupleExpr expr = proj.getArg();
// if the top most element is a statement, i.e. no join, union or
// any other complex pattern, we can push the limit
// => this case typically represents a query with a single BGP
if (expr instanceof FedXStatementPattern) {
if (applicableLimitInScope > 0) {
pushLimit((FedXStatementPattern) expr, applicableLimitInScope);
}
}

// currently no need to traverse further. Might be added if we do further
// optimizations
}

protected void pushLimit(FedXStatementPattern stmt, long upperLimit) {
stmt.setUpperLimit(upperLimit);
}
}
5 changes: 5 additions & 0 deletions src/com/fluidops/fedx/optimizer/Optimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ public static TupleExpr optimize(TupleExpr parsed, Dataset dataset, BindingSet b
// optimize statement groups and join order
new StatementGroupOptimizer(queryInfo).optimize(query);

// potentially push limits (if applicable)
if (info.hasLimit()) {
new LimitOptimizer().optimize(query);
}

// optimize Filters, if available
// Note: this is done after the join order is determined to ease filter pushing
if (info.hasFilter())
Expand Down
4 changes: 4 additions & 0 deletions src/com/fluidops/fedx/util/QueryStringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ public static String selectQueryString(FedXStatementPattern stmt, BindingSet bin

res.append(" }");

long upperLimit = stmt.getUpperLimit();
if (upperLimit > 0) {
res.append(" LIMIT ").append(upperLimit);
}
return res.toString();
}

Expand Down
24 changes: 24 additions & 0 deletions test/com/fluidops/fedx/BasicTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.Set;

import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.TupleQueryResult;
import org.eclipse.rdf4j.repository.RepositoryConnection;
Expand Down Expand Up @@ -141,4 +142,27 @@ public void testQueryBinding() throws Exception {

compareTupleQueryResults(actual, expected, false);
}

@Test
public void testQueryWithLimit() throws Exception {

prepareTest(Arrays.asList("/tests/medium/data1.ttl", "/tests/medium/data2.ttl", "/tests/medium/data3.ttl",
"/tests/medium/data4.ttl"));

String queryString = readQueryString("/tests/basic/query_limit01.rq");

evaluateQueryPlan("/tests/basic/query_limit01.rq", "/tests/basic/query_limit01.qp");

TupleQuery query = QueryManager.prepareTupleQuery(queryString);

try (TupleQueryResult actual = query.evaluate()) {
if (actual.hasNext()) {
BindingSet firstResult = actual.next();
System.out.println(firstResult);
}
if (actual.hasNext()) {
throw new Exception("Expected single result due to LIMIT 1");
}
}
}
}
12 changes: 12 additions & 0 deletions test/tests/basic/query_limit01.qp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
QueryRoot
Slice ( limit=1 )
Projection
ProjectionElemList
ProjectionElem "person"
StatementSourcePattern
Var (name=person)
Var (name=_const_f5e5585a_uri, value=http://www.w3.org/1999/02/22-rdf-syntax-ns#type, anonymous)
Var (name=_const_e1df31e0_uri, value=http://xmlns.com/foaf/0.1/Person, anonymous)
StatementSource (id=endpoint1, type=REMOTE)
StatementSource (id=endpoint2, type=REMOTE)
Upper Limit: 1
5 changes: 5 additions & 0 deletions test/tests/basic/query_limit01.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?person WHERE {
?person a foaf:Person
} LIMIT 1

0 comments on commit f4f8dc4

Please sign in to comment.